[PATCH v5 03/40] KVM: arm64: Avoid storing the vcpu pointer on the stack

Julien Grall julien.grall at arm.com
Mon Mar 5 03:08:31 PST 2018


Hi Christoffer,

On 27/02/18 11:33, Christoffer Dall wrote:
> From: Christoffer Dall <christoffer.dall at linaro.org>
> 
> We already have the percpu area for the host cpu state, which points to
> the VCPU, so there's no need to store the VCPU pointer on the stack on
> every context switch.  We can be a little more clever and just use
> tpidr_el2 for the percpu offset and load the VCPU pointer from the host
> context.
> 
> This has the benefit of being able to retrieve the host context even
> when our stack is corrupted, and it has a potential performance benefit
> because we trade a store plus a load for an mrs and a load on a round
> trip to the guest.
> 
> This does require us to calculate the percpu offset without including
> the offset from the kernel mapping of the percpu array to the linear
> mapping of the array (which is what we store in tpidr_el1), because a
> PC-relative generated address in EL2 is already giving us the hyp alias
> of the linear mapping of a kernel address.  We do this in
> __cpu_init_hyp_mode() by using kvm_ksym_ref().
> 
> The code that accesses ESR_EL2 was previously using an alternative to
> use the _EL1 accessor on VHE systems, but this was actually unnecessary
> as the _EL1 accessor aliases the ESR_EL2 register on VHE, and the _EL2
> accessor does the same thing on both systems.
> 
> Cc: Ard Biesheuvel <ard.biesheuvel at linaro.org>
> Reviewed-by: Marc Zyngier <marc.zyngier at arm.com>
> Reviewed-by: Andrew Jones <drjones at redhat.com>
> Signed-off-by: Christoffer Dall <christoffer.dall at linaro.org>

Reviewed-by: Julien Grall <julien.grall at arm.com>

Cheers,

> ---
> 
> Notes:
>      Changes since v4:
>       - Clarified rationale in commit message.
>       - Called get_host_ctxt from get_vcpu and rename get_vcpu to
>         get_vcpu_ptr.
>      
>      Changes since v3:
>       - Reworked the assembly part of the patch after rebasing on v4.16-rc1
>         which created a conflict with the variant 2 mitigations.
>       - Removed Marc's reviewed-by due to the rework.
>       - Removed unneeded extern keyword in declaration in header file
>      
>      Changes since v1:
>       - Use PC-relative addressing to access per-cpu variables instead of
>         using a load from the literal pool.
>       - Remove stale comments as pointed out by Marc
>       - Reworded the commit message as suggested by Drew
> 
>   arch/arm64/include/asm/kvm_asm.h  | 15 +++++++++++++++
>   arch/arm64/include/asm/kvm_host.h | 15 +++++++++++++++
>   arch/arm64/kernel/asm-offsets.c   |  1 +
>   arch/arm64/kvm/hyp/entry.S        |  6 +-----
>   arch/arm64/kvm/hyp/hyp-entry.S    | 28 ++++++++++------------------
>   arch/arm64/kvm/hyp/switch.c       |  5 +----
>   arch/arm64/kvm/hyp/sysreg-sr.c    |  5 +++++
>   7 files changed, 48 insertions(+), 27 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> index 24961b732e65..7149f1520382 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -33,6 +33,7 @@
>   #define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
>   #define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
>   
> +/* Translate a kernel address of @sym into its equivalent linear mapping */
>   #define kvm_ksym_ref(sym)						\
>   	({								\
>   		void *val = &sym;					\
> @@ -70,6 +71,20 @@ extern u32 __init_stage2_translation(void);
>   
>   extern void __qcom_hyp_sanitize_btac_predictors(void);
>   
> +#else /* __ASSEMBLY__ */
> +
> +.macro get_host_ctxt reg, tmp
> +	adr_l	\reg, kvm_host_cpu_state
> +	mrs	\tmp, tpidr_el2
> +	add	\reg, \reg, \tmp
> +.endm
> +
> +.macro get_vcpu_ptr vcpu, ctxt
> +	get_host_ctxt \ctxt, \vcpu
> +	ldr	\vcpu, [\ctxt, #HOST_CONTEXT_VCPU]
> +	kern_hyp_va	\vcpu
> +.endm
> +
>   #endif
>   
>   #endif /* __ARM_KVM_ASM_H__ */
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 596f8e414a4c..618cfee7206a 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -358,10 +358,15 @@ int kvm_perf_teardown(void);
>   
>   struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
>   
> +void __kvm_set_tpidr_el2(u64 tpidr_el2);
> +DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
> +
>   static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
>   				       unsigned long hyp_stack_ptr,
>   				       unsigned long vector_ptr)
>   {
> +	u64 tpidr_el2;
> +
>   	/*
>   	 * Call initialization code, and switch to the full blown HYP code.
>   	 * If the cpucaps haven't been finalized yet, something has gone very
> @@ -370,6 +375,16 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
>   	 */
>   	BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
>   	__kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
> +
> +	/*
> +	 * Calculate the raw per-cpu offset without a translation from the
> +	 * kernel's mapping to the linear mapping, and store it in tpidr_el2
> +	 * so that we can use adr_l to access per-cpu variables in EL2.
> +	 */
> +	tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state)
> +		- (u64)kvm_ksym_ref(kvm_host_cpu_state);
> +
> +	kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
>   }
>   
>   static inline void kvm_arch_hardware_unsetup(void) {}
> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> index 1303e04110cd..78e1b0a70aaf 100644
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -138,6 +138,7 @@ int main(void)
>     DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
>     DEFINE(VCPU_FPEXC32_EL2,	offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
>     DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
> +  DEFINE(HOST_CONTEXT_VCPU,	offsetof(struct kvm_cpu_context, __hyp_running_vcpu));
>   #endif
>   #ifdef CONFIG_CPU_PM
>     DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx));
> diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
> index fdd1068ee3a5..1f458f7c3b44 100644
> --- a/arch/arm64/kvm/hyp/entry.S
> +++ b/arch/arm64/kvm/hyp/entry.S
> @@ -62,9 +62,6 @@ ENTRY(__guest_enter)
>   	// Store the host regs
>   	save_callee_saved_regs x1
>   
> -	// Store host_ctxt and vcpu for use at exit time
> -	stp	x1, x0, [sp, #-16]!
> -
>   	add	x18, x0, #VCPU_CONTEXT
>   
>   	// Restore guest regs x0-x17
> @@ -118,8 +115,7 @@ ENTRY(__guest_exit)
>   	// Store the guest regs x19-x29, lr
>   	save_callee_saved_regs x1
>   
> -	// Restore the host_ctxt from the stack
> -	ldr	x2, [sp], #16
> +	get_host_ctxt	x2, x3
>   
>   	// Now restore the host regs
>   	restore_callee_saved_regs x2
> diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
> index f36464bd57c5..82fbc368f738 100644
> --- a/arch/arm64/kvm/hyp/hyp-entry.S
> +++ b/arch/arm64/kvm/hyp/hyp-entry.S
> @@ -57,13 +57,8 @@ ENDPROC(__vhe_hyp_call)
>   el1_sync:				// Guest trapped into EL2
>   	stp	x0, x1, [sp, #-16]!
>   
> -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
> -	mrs	x1, esr_el2
> -alternative_else
> -	mrs	x1, esr_el1
> -alternative_endif
> -	lsr	x0, x1, #ESR_ELx_EC_SHIFT
> -
> +	mrs	x0, esr_el2
> +	lsr	x0, x0, #ESR_ELx_EC_SHIFT
>   	cmp	x0, #ESR_ELx_EC_HVC64
>   	ccmp	x0, #ESR_ELx_EC_HVC32, #4, ne
>   	b.ne	el1_trap
> @@ -117,10 +112,14 @@ el1_hvc_guest:
>   	eret
>   
>   el1_trap:
> +	get_vcpu_ptr	x1, x0
> +
> +	mrs		x0, esr_el2
> +	lsr		x0, x0, #ESR_ELx_EC_SHIFT
>   	/*
>   	 * x0: ESR_EC
> +	 * x1: vcpu pointer
>   	 */
> -	ldr	x1, [sp, #16 + 8]	// vcpu stored by __guest_enter
>   
>   	/*
>   	 * We trap the first access to the FP/SIMD to save the host context
> @@ -138,13 +137,13 @@ alternative_else_nop_endif
>   
>   el1_irq:
>   	stp     x0, x1, [sp, #-16]!
> -	ldr	x1, [sp, #16 + 8]
> +	get_vcpu_ptr	x1, x0
>   	mov	x0, #ARM_EXCEPTION_IRQ
>   	b	__guest_exit
>   
>   el1_error:
>   	stp     x0, x1, [sp, #-16]!
> -	ldr	x1, [sp, #16 + 8]
> +	get_vcpu_ptr	x1, x0
>   	mov	x0, #ARM_EXCEPTION_EL1_SERROR
>   	b	__guest_exit
>   
> @@ -180,14 +179,7 @@ ENTRY(__hyp_do_panic)
>   ENDPROC(__hyp_do_panic)
>   
>   ENTRY(__hyp_panic)
> -	/*
> -	 * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
> -	 * not be accessible by this address from EL2, hyp_panic() converts
> -	 * it with kern_hyp_va() before use.
> -	 */
> -	ldr	x0, =kvm_host_cpu_state
> -	mrs	x1, tpidr_el2
> -	add	x0, x0, x1
> +	get_host_ctxt x0, x1
>   	b	hyp_panic
>   ENDPROC(__hyp_panic)
>   
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index 24f52fedfb9e..46717da75643 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -469,7 +469,7 @@ static hyp_alternate_select(__hyp_call_panic,
>   			    __hyp_call_panic_nvhe, __hyp_call_panic_vhe,
>   			    ARM64_HAS_VIRT_HOST_EXTN);
>   
> -void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
> +void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
>   {
>   	struct kvm_vcpu *vcpu = NULL;
>   
> @@ -478,9 +478,6 @@ void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
>   	u64 par = read_sysreg(par_el1);
>   
>   	if (read_sysreg(vttbr_el2)) {
> -		struct kvm_cpu_context *host_ctxt;
> -
> -		host_ctxt = kern_hyp_va(__host_ctxt);
>   		vcpu = host_ctxt->__hyp_running_vcpu;
>   		__timer_disable_traps(vcpu);
>   		__deactivate_traps(vcpu);
> diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
> index 2c17afd2be96..43b7dd65e3e6 100644
> --- a/arch/arm64/kvm/hyp/sysreg-sr.c
> +++ b/arch/arm64/kvm/hyp/sysreg-sr.c
> @@ -189,3 +189,8 @@ void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
>   	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
>   		write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
>   }
> +
> +void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2)
> +{
> +	asm("msr tpidr_el2, %0": : "r" (tpidr_el2));
> +}
> 

-- 
Julien Grall



More information about the linux-arm-kernel mailing list