[PATCH v3 13/18] KVM: arm64: nVHE: Handle hyp panics

Marc Zyngier maz at kernel.org
Mon Sep 7 09:24:19 EDT 2020


On Thu, 03 Sep 2020 14:53:02 +0100,
Andrew Scull <ascull at google.com> wrote:
> 
> Restore the host context when panicking from hyp to give the best chance
> of the panic being clean.
> 
> The host requires that registers be preserved such as x18 for the shadow
> callstack. If the panic is caused by an exception from EL1, the host
> context is still valid so the panic can return straight back to the
> host. If the panic comes from EL2 then it's most likely that the hyp
> context is active and the host context needs to be restored.
> 
> There are windows before and after the host context is saved and
> restored that restoration is attempted incorrectly and the panic won't
> be clean.
> 
> Signed-off-by: Andrew Scull <ascull at google.com>
> ---
>  arch/arm64/include/asm/kvm_hyp.h |  2 +-
>  arch/arm64/kvm/hyp/nvhe/host.S   | 79 +++++++++++++++++++++++---------
>  arch/arm64/kvm/hyp/nvhe/switch.c | 18 ++------
>  3 files changed, 63 insertions(+), 36 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
> index 0b525e05e5bf..6b664de5ec1f 100644
> --- a/arch/arm64/include/asm/kvm_hyp.h
> +++ b/arch/arm64/include/asm/kvm_hyp.h
> @@ -94,7 +94,7 @@ u64 __guest_enter(struct kvm_vcpu *vcpu);
>  
>  void __noreturn hyp_panic(void);
>  #ifdef __KVM_NVHE_HYPERVISOR__
> -void __noreturn __hyp_do_panic(unsigned long, ...);
> +void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
>  #endif
>  
>  #endif /* __ARM64_KVM_HYP_H__ */
> diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
> index 1062547853db..40620c1c87b8 100644
> --- a/arch/arm64/kvm/hyp/nvhe/host.S
> +++ b/arch/arm64/kvm/hyp/nvhe/host.S
> @@ -47,6 +47,7 @@ SYM_FUNC_START(__host_exit)
>  	ldp	x2, x3,   [x29, #CPU_XREG_OFFSET(2)]
>  	ldp	x4, x5,   [x29, #CPU_XREG_OFFSET(4)]
>  	ldp	x6, x7,   [x29, #CPU_XREG_OFFSET(6)]
> +__host_enter_for_panic:

This definitely deserves a comment as to *why* we need to skip the
first 8 registers.

>  	ldp	x8, x9,   [x29, #CPU_XREG_OFFSET(8)]
>  	ldp	x10, x11, [x29, #CPU_XREG_OFFSET(10)]
>  	ldp	x12, x13, [x29, #CPU_XREG_OFFSET(12)]
> @@ -57,30 +58,49 @@ SYM_FUNC_START(__host_exit)
>  	restore_callee_saved_regs x29
>  
>  	/* Do not touch any register after this! */
> +__host_enter_without_restoring:
>  	eret
>  	sb
>  SYM_FUNC_END(__host_exit)
>  
> +/*
> + * void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
> + */
>  SYM_FUNC_START(__hyp_do_panic)
> +	/* Load the format arguments into x1-7 */
> +	mov	x6, x3
> +	get_vcpu_ptr x7, x3
> +	mov	x7, xzr

Is that the vcpu pointer you are zeroing, right after obtaining it?

> +
> +	mrs	x3, esr_el2
> +	mrs	x4, far_el2
> +	mrs	x5, hpfar_el2
> +
> +	/* Prepare and exit to the host's panic funciton. */
>  	mov	lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
>  		      PSR_MODE_EL1h)
>  	msr	spsr_el2, lr
>  	ldr	lr, =panic
>  	msr	elr_el2, lr
> -	eret
> -	sb
> +
> +	/*
> +	 * Set the panic format string and enter the host, conditionally
> +	 * restoring the host context.
> +	 */
> +	cmp	x0, xzr
> +	ldr	x0, =__hyp_panic_string
> +	b.eq	__host_enter_without_restoring
> +	b	__host_enter_for_panic
>  SYM_FUNC_END(__hyp_do_panic)
>  
>  .macro valid_host_el1_sync_vect
>  	.align 7
>  	stp	x0, x1, [sp, #-16]!
> -
>  	mrs	x0, esr_el2
>  	lsr	x0, x0, #ESR_ELx_EC_SHIFT
>  	cmp	x0, #ESR_ELx_EC_HVC64
> -	b.ne	hyp_panic
> -
>  	ldp	x0, x1, [sp], #16
> +	b.ne	__host_exit
>  
>  	/* Check for a stub HVC call */
>  	cmp	x0, #HVC_STUB_HCALL_NR
> @@ -102,16 +122,31 @@ SYM_FUNC_END(__hyp_do_panic)
>  	br	x5
>  .endm
>  
> -.macro invalid_host_vect
> +.macro invalid_host_el2_vect
>  	.align 7
>  	/* If a guest is loaded, panic out of it. */
>  	stp	x0, x1, [sp, #-16]!
>  	get_loaded_vcpu x0, x1
>  	cbnz	x0, __guest_exit_panic
>  	add	sp, sp, #16
> +
> +	/*
> +	 * The panic may not be clean if the exception is taken before the host
> +	 * context has been saved by __host_exit or after the hyp context has
> +	 * been partially clobbered by __host_enter.
> +	 */
>  	b	hyp_panic
>  .endm
>  
> +.macro invalid_host_el1_vect
> +	.align 7
> +	mov	x0, xzr		/* restore_host = false */
> +	mrs	x1, spsr_el2
> +	mrs	x2, elr_el2
> +	mrs	x3, par_el1
> +	b	__hyp_do_panic
> +.endm
> +
>  /*
>   * The host vector does not use an ESB instruction in order to avoid consuming
>   * SErrors that should only be consumed by the host. Guest entry is deferred by
> @@ -123,23 +158,23 @@ SYM_FUNC_END(__hyp_do_panic)
>   */
>  	.align 11
>  SYM_CODE_START(__kvm_hyp_host_vector)
> -	invalid_host_vect			// Synchronous EL2t
> -	invalid_host_vect			// IRQ EL2t
> -	invalid_host_vect			// FIQ EL2t
> -	invalid_host_vect			// Error EL2t
> +	invalid_host_el2_vect			// Synchronous EL2t
> +	invalid_host_el2_vect			// IRQ EL2t
> +	invalid_host_el2_vect			// FIQ EL2t
> +	invalid_host_el2_vect			// Error EL2t
>  
> -	invalid_host_vect			// Synchronous EL2h
> -	invalid_host_vect			// IRQ EL2h
> -	invalid_host_vect			// FIQ EL2h
> -	invalid_host_vect			// Error EL2h
> +	invalid_host_el2_vect			// Synchronous EL2h
> +	invalid_host_el2_vect			// IRQ EL2h
> +	invalid_host_el2_vect			// FIQ EL2h
> +	invalid_host_el2_vect			// Error EL2h
>  
>  	valid_host_el1_sync_vect		// Synchronous 64-bit EL1
> -	invalid_host_vect			// IRQ 64-bit EL1
> -	invalid_host_vect			// FIQ 64-bit EL1
> -	invalid_host_vect			// Error 64-bit EL1
> -
> -	invalid_host_vect			// Synchronous 32-bit EL1
> -	invalid_host_vect			// IRQ 32-bit EL1
> -	invalid_host_vect			// FIQ 32-bit EL1
> -	invalid_host_vect			// Error 32-bit EL1
> +	invalid_host_el1_vect			// IRQ 64-bit EL1
> +	invalid_host_el1_vect			// FIQ 64-bit EL1
> +	invalid_host_el1_vect			// Error 64-bit EL1
> +
> +	invalid_host_el1_vect			// Synchronous 32-bit EL1
> +	invalid_host_el1_vect			// IRQ 32-bit EL1
> +	invalid_host_el1_vect			// FIQ 32-bit EL1
> +	invalid_host_el1_vect			// Error 32-bit EL1
>  SYM_CODE_END(__kvm_hyp_host_vector)
> diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
> index 72d3e0119299..b4f6ae1d579a 100644
> --- a/arch/arm64/kvm/hyp/nvhe/switch.c
> +++ b/arch/arm64/kvm/hyp/nvhe/switch.c
> @@ -242,6 +242,8 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
>  	if (system_uses_irq_prio_masking())
>  		gic_write_pmr(GIC_PRIO_IRQOFF);
>  
> +	host_ctxt->__hyp_running_vcpu = NULL;
> +
>  	return exit_code;
>  }
>  
> @@ -253,26 +255,16 @@ void __noreturn hyp_panic(void)
>  	struct kvm_cpu_context *host_ctxt =
>  		&__hyp_this_cpu_ptr(kvm_host_data)->host_ctxt;
>  	struct kvm_vcpu *vcpu = host_ctxt->__hyp_running_vcpu;
> -	unsigned long str_va;
> +	bool restore_host = true;
>  
> -	if (read_sysreg(vttbr_el2)) {
> +	if (vcpu) {
>  		__timer_disable_traps(vcpu);
>  		__deactivate_traps(vcpu);
>  		__load_host_stage2();
>  		__sysreg_restore_state_nvhe(host_ctxt);
>  	}
>  
> -	/*
> -	 * Force the panic string to be loaded from the literal pool,
> -	 * making sure it is a kernel address and not a PC-relative
> -	 * reference.
> -	 */
> -	asm volatile("ldr %0, =%1" : "=r" (str_va) : "S" (__hyp_panic_string));
> -
> -	__hyp_do_panic(str_va,
> -		       spsr, elr,
> -		       read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR),
> -		       read_sysreg(hpfar_el2), par, vcpu);
> +	__hyp_do_panic(restore_host, spsr, elr, par);
>  	unreachable();
>  }
>  
> -- 
> 2.28.0.402.g5ffc5be6b7-goog
> 
> 

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.



More information about the linux-arm-kernel mailing list