[PATCH v3 1/1] kvm: arm64: Add SVE support for nVHE.

Tue Mar 9 10:16:04 GMT 2021

Hi Daniel,.

On Tue, 02 Mar 2021 16:48:50 +0000,
Daniel Kiss <daniel.kiss at arm.com> wrote:
> 
> CPUs that support SVE are architecturally required to support the
> Virtualization Host Extensions (VHE), so far the kernel supported
> SVE alongside KVM with VHE enabled. In same cases it is desired to
> run nVHE config even when VHE is available.
> This patch add support for SVE for nVHE configuration too.
> 
> Tested on FVP with a Linux guest VM that run with a different VL than
> the host system.
> 
> Signed-off-by: Daniel Kiss <daniel.kiss at arm.com>
> ---
>  arch/arm64/Kconfig                      |  7 -----
>  arch/arm64/include/asm/el2_setup.h      |  2 +-
>  arch/arm64/include/asm/fpsimd.h         |  6 ++++
>  arch/arm64/include/asm/fpsimdmacros.h   | 24 ++++++++++++++--
>  arch/arm64/include/asm/kvm_arm.h        |  6 ++++
>  arch/arm64/include/asm/kvm_host.h       | 17 +++--------
>  arch/arm64/kernel/entry-fpsimd.S        |  5 ----
>  arch/arm64/kvm/arm.c                    |  5 ----
>  arch/arm64/kvm/fpsimd.c                 | 38 ++++++++++++++++++++-----
>  arch/arm64/kvm/hyp/fpsimd.S             | 15 ++++++++++
>  arch/arm64/kvm/hyp/include/hyp/switch.h | 34 +++++++++++-----------
>  arch/arm64/kvm/hyp/nvhe/switch.c        | 24 ++++++++++++++++
>  arch/arm64/kvm/reset.c                  |  4 ---
>  13 files changed, 127 insertions(+), 60 deletions(-)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index f39568b28ec1..049428f1bf27 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -1676,7 +1676,6 @@ endmenu
>  config ARM64_SVE
>  	bool "ARM Scalable Vector Extension support"
>  	default y
> -	depends on !KVM || ARM64_VHE
>  	help
>  	  The Scalable Vector Extension (SVE) is an extension to the AArch64
>  	  execution state which complements and extends the SIMD functionality
> @@ -1705,12 +1704,6 @@ config ARM64_SVE
>  	  booting the kernel.  If unsure and you are not observing these
>  	  symptoms, you should assume that it is safe to say Y.
>  
> -	  CPUs that support SVE are architecturally required to support the
> -	  Virtualization Host Extensions (VHE), so the kernel makes no
> -	  provision for supporting SVE alongside KVM without VHE enabled.
> -	  Thus, you will need to enable CONFIG_ARM64_VHE if you want to support
> -	  KVM in the same kernel image.
> -
>  config ARM64_MODULE_PLTS
>  	bool "Use PLTs to allow module memory to spill over into vmalloc area"
>  	depends on MODULES
> diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
> index a7f5a1bbc8ac..0207393e67c3 100644
> --- a/arch/arm64/include/asm/el2_setup.h
> +++ b/arch/arm64/include/asm/el2_setup.h
> @@ -133,7 +133,7 @@
>  	bic	x0, x0, #CPTR_EL2_TZ		// Also disable SVE traps
>  	msr	cptr_el2, x0			// Disable copro. traps to EL2
>  	isb
> -	mov	x1, #ZCR_ELx_LEN_MASK		// SVE: Enable full vector
> +	mov	x1, #ZCR_EL2_LEN_HOST		// SVE: Enable full vector
>  	msr_s	SYS_ZCR_EL2, x1			// length for EL1.
>  1:
>  .endm
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index bec5f14b622a..526d69f3eeb3 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -69,6 +69,12 @@ static inline void *sve_pffr(struct thread_struct *thread)
>  extern void sve_save_state(void *state, u32 *pfpsr);
>  extern void sve_load_state(void const *state, u32 const *pfpsr,
>  			   unsigned long vq_minus_1);
> +/*
> + * sve_load_state_nvhe function for the hyp code where the SVE registers are
> + * handled from the EL2, vector length is governed by ZCR_EL2.
> + */
> +extern void sve_load_state_nvhe(void const *state, u32 const *pfpsr,
> +			   unsigned long vq_minus_1);
>  extern void sve_flush_live(void);
>  extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state,
>  				       unsigned long vq_minus_1);
> diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
> index af43367534c7..d309c6071bce 100644
> --- a/arch/arm64/include/asm/fpsimdmacros.h
> +++ b/arch/arm64/include/asm/fpsimdmacros.h
> @@ -205,6 +205,17 @@
>  921:
>  .endm
>  
> +/* Update ZCR_EL2.LEN with the new VQ */
> +.macro sve_load_vq_nvhe xvqminus1, xtmp, xtmp2
> +		mrs_s		\xtmp, SYS_ZCR_EL2
> +		bic		\xtmp2, \xtmp, ZCR_ELx_LEN_MASK
> +		orr		\xtmp2, \xtmp2, \xvqminus1
> +		cmp		\xtmp2, \xtmp
> +		b.eq		922f
> +		msr_s		SYS_ZCR_EL2, \xtmp2	//self-synchronising
> +922:
> +.endm

No, please. There is strictly no need for a new macro that only
differs by the EL1 vs EL2 register. That's why we have alternatives
for. And actually, you should be able to use the ZCR_EL2 register at
all times, including on VHE.

Please get rid of this.

> +
>  /* Preserve the first 128-bits of Znz and zero the rest. */
>  .macro _sve_flush_z nz
>  	_sve_check_zreg \nz
> @@ -230,8 +241,7 @@
>  		str		w\nxtmp, [\xpfpsr, #4]
>  .endm
>  
> -.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2
> -		sve_load_vq	\xvqminus1, x\nxtmp, \xtmp2
> +.macro _sve_load nxbase, xpfpsr, nxtmp
>   _for n, 0, 31,	_sve_ldr_v	\n, \nxbase, \n - 34
>  		_sve_ldr_p	0, \nxbase
>  		_sve_wrffr	0
> @@ -242,3 +252,13 @@
>  		ldr		w\nxtmp, [\xpfpsr, #4]
>  		msr		fpcr, x\nxtmp
>  .endm
> +
> +.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2
> +		sve_load_vq	\xvqminus1, x\nxtmp, \xtmp2
> +		_sve_load	\nxbase, \xpfpsr, \nxtmp
> +.endm
> +
> +.macro sve_load_nvhe nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2
> +		sve_load_vq_nvhe	\xvqminus1, x\nxtmp, \xtmp2
> +		_sve_load	 \nxbase, \xpfpsr, \nxtmp
> +.endm
> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> index 4e90c2debf70..24d02365cc24 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -332,4 +332,10 @@
>  #define CPACR_EL1_TTA		(1 << 28)
>  #define CPACR_EL1_DEFAULT	(CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN)
>  
> +/*
> + * Always the maximum vector length is set for the host, because
> + * it need to access the whole SVE register.
> + */
> +#define ZCR_EL2_LEN_HOST ZCR_ELx_LEN_MASK
> +
>  #endif /* __ARM64_KVM_ARM_H__ */
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 8fcfab0c2567..11a058c81c1d 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -376,6 +376,10 @@ struct kvm_vcpu_arch {
>  #define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
>  				      sve_ffr_offset((vcpu)->arch.sve_max_vl)))
>  
> +#define vcpu_sve_pffr_hyp(vcpu) ((void *)((char *) \
> +			(kern_hyp_va((vcpu)->arch.sve_state)) + \
> +			sve_ffr_offset((vcpu)->arch.sve_max_vl)))
> +
>  #define vcpu_sve_state_size(vcpu) ({					\
>  	size_t __size_ret;						\
>  	unsigned int __vcpu_vq;						\
> @@ -693,19 +697,6 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
>  	ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr();
>  }
>  
> -static inline bool kvm_arch_requires_vhe(void)
> -{
> -	/*
> -	 * The Arm architecture specifies that implementation of SVE
> -	 * requires VHE also to be implemented.  The KVM code for arm64
> -	 * relies on this when SVE is present:
> -	 */
> -	if (system_supports_sve())
> -		return true;
> -
> -	return false;
> -}
> -
>  void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
>  
>  static inline void kvm_arch_hardware_unsetup(void) {}
> diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
> index 2ca395c25448..e444b753c518 100644
> --- a/arch/arm64/kernel/entry-fpsimd.S
> +++ b/arch/arm64/kernel/entry-fpsimd.S
> @@ -33,11 +33,6 @@ SYM_FUNC_END(fpsimd_load_state)
>  
>  #ifdef CONFIG_ARM64_SVE
>  
> -SYM_FUNC_START(sve_save_state)
> -	sve_save 0, x1, 2
> -	ret
> -SYM_FUNC_END(sve_save_state)
> -
>  SYM_FUNC_START(sve_load_state)
>  	sve_load 0, x1, x2, 3, x4
>  	ret
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index fe60d25c000e..6284563b352a 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -1890,11 +1890,6 @@ int kvm_arch_init(void *opaque)
>  
>  	in_hyp_mode = is_kernel_in_hyp_mode();
>  
> -	if (!in_hyp_mode && kvm_arch_requires_vhe()) {
> -		kvm_pr_unimpl("CPU unsupported in non-VHE mode, not initializing\n");
> -		return -ENODEV;
> -	}
> -
>  	if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) ||
>  	    cpus_have_final_cap(ARM64_WORKAROUND_1508412))
>  		kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \
> diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
> index 3e081d556e81..f5f648e78578 100644
> --- a/arch/arm64/kvm/fpsimd.c
> +++ b/arch/arm64/kvm/fpsimd.c
> @@ -42,6 +42,16 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
>  	if (ret)
>  		goto error;
>  
> +	if (!has_vhe() && vcpu->arch.sve_state) {

Why the !has_vhe() predicate? create_hyp_mappings() is equipped to
deal with VHE vs nVHE.

> +		void *sve_state_end = vcpu->arch.sve_state +
> +					    SVE_SIG_REGS_SIZE(
> +						sve_vq_from_vl(vcpu->arch.sve_max_vl));
> +		ret = create_hyp_mappings(vcpu->arch.sve_state,
> +					  sve_state_end,
> +					  PAGE_HYP);
> +		if (ret)
> +			goto error;
> +	}
>  	vcpu->arch.host_thread_info = kern_hyp_va(ti);
>  	vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
>  error:
> @@ -109,10 +119,21 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
>  	local_irq_save(flags);
>  
>  	if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
> +		if (guest_has_sve) {
> +			if (has_vhe())
> +				__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_s(SYS_ZCR_EL12);
> +			else {
> +				__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_s(SYS_ZCR_EL1);

The two cases can be made common by using the correct accessor:

	__vpcu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);

> +				/*
> +				 * vcpu could set ZCR_EL1 to a shorter VL than the max VL but
> +				 * the context may be still valid there so the host should save
> +				 * the whole context. In nVHE case the ZCR_EL1 need to be reset.
> +				 */

Please keep comments within 80 columns.

> +				write_sysreg_s(sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1,
> +					       SYS_ZCR_EL1);

Why is this a nVHE-only requirement?

> +			}
> +		}
>  		fpsimd_save_and_flush_cpu_state();
> -
> -		if (guest_has_sve)
> -			__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_s(SYS_ZCR_EL12);
>  	} else if (host_has_sve) {
>  		/*
>  		 * The FPSIMD/SVE state in the CPU has not been touched, and we
> @@ -120,11 +141,14 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
>  		 * reset to CPACR_EL1_DEFAULT by the Hyp code, disabling SVE
>  		 * for EL0.  To avoid spurious traps, restore the trap state
>  		 * seen by kvm_arch_vcpu_load_fp():
> +		 * nVHE case the CPACR_EL1 is context switched.

But if CPACR_EL1 is context switched earlier than ZCR_EL1, what makes
it legal to postpone context-switching *some* of the state to a later
point.

I think that's the main issue I have with this patch: it tries to
shove SVE on nVHE in the VHE model of using load/put rather than
save/restore (which is in general the rule for nVHE) without any
indication of *why* it can be done like this.

>  		 */
> -		if (vcpu->arch.flags & KVM_ARM64_HOST_SVE_ENABLED)
> -			sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN);
> -		else
> -			sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
> +		if (has_vhe()) {
> +			if (vcpu->arch.flags & KVM_ARM64_HOST_SVE_ENABLED)
> +				sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN);
> +			else
> +				sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
> +		}
>  	}
>  
>  	update_thread_flag(TIF_SVE,
> diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
> index 01f114aa47b0..da824b46b81b 100644
> --- a/arch/arm64/kvm/hyp/fpsimd.S
> +++ b/arch/arm64/kvm/hyp/fpsimd.S
> @@ -6,6 +6,7 @@
>  
>  #include <linux/linkage.h>
>  
> +#include <asm/assembler.h>
>  #include <asm/fpsimdmacros.h>
>  
>  	.text
> @@ -19,3 +20,17 @@ SYM_FUNC_START(__fpsimd_restore_state)
>  	fpsimd_restore	x0, 1
>  	ret
>  SYM_FUNC_END(__fpsimd_restore_state)
> +
> +#ifdef CONFIG_ARM64_SVE
> +
> +SYM_FUNC_START(sve_save_state)
> +	sve_save 0, x1, 2
> +	ret
> +SYM_FUNC_END(sve_save_state)
> +
> +SYM_FUNC_START(sve_load_state_nvhe)
> +	sve_load_nvhe 0, x1, x2, 3, x4
> +	ret
> +SYM_FUNC_END(sve_load_state_nvhe)
> +
> +#endif
> diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
> index 84473574c2e7..99e7f0d5bb64 100644
> --- a/arch/arm64/kvm/hyp/include/hyp/switch.h
> +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
> @@ -205,19 +205,13 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
>  	if (!system_supports_fpsimd())
>  		return false;
>  
> -	/*
> -	 * Currently system_supports_sve() currently implies has_vhe(),
> -	 * so the check is redundant. However, has_vhe() can be determined
> -	 * statically and helps the compiler remove dead code.
> -	 */
> -	if (has_vhe() && system_supports_sve()) {
> +	vhe = has_vhe();
> +	if (system_supports_sve()) {
>  		sve_guest = vcpu_has_sve(vcpu);
>  		sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
> -		vhe = true;
>  	} else {
>  		sve_guest = false;
>  		sve_host = false;
> -		vhe = has_vhe();
>  	}
>  
>  	esr_ec = kvm_vcpu_trap_get_class(vcpu);
> @@ -240,17 +234,17 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
>  
>  		write_sysreg(reg, cpacr_el1);
>  	} else {
> -		write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
> -			     cptr_el2);
> +		u64 reg = read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP;
> +
> +		if (sve_guest)
> +			reg &= ~(u64)CPTR_EL2_TZ;
> +
> +		write_sysreg(reg, cptr_el2);
>  	}
>  
>  	isb();
>  
>  	if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
> -		/*
> -		 * In the SVE case, VHE is assumed: it is enforced by
> -		 * Kconfig and kvm_arch_init().
> -		 */
>  		if (sve_host) {
>  			struct thread_struct *thread = container_of(
>  				vcpu->arch.host_fpsimd_state,
> @@ -266,10 +260,18 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
>  	}
>  
>  	if (sve_guest) {
> -		sve_load_state(vcpu_sve_pffr(vcpu),
> +		if (vhe) {
> +			sve_load_state(vcpu_sve_pffr(vcpu),
> +			       &vcpu->arch.ctxt.fp_regs.fpsr,
> +			       sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1);
> +			write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12);
> +		} else {
> +			sve_load_state_nvhe(vcpu_sve_pffr_hyp(vcpu),
>  			       &vcpu->arch.ctxt.fp_regs.fpsr,
>  			       sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1);
> -		write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12);
> +			write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL1);
> +
> +		}
>  	} else {
>  		__fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs);
>  	}
> diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
> index f3d0e9eca56c..b0ded27918ce 100644
> --- a/arch/arm64/kvm/hyp/nvhe/switch.c
> +++ b/arch/arm64/kvm/hyp/nvhe/switch.c
> @@ -45,6 +45,18 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
>  	if (!update_fp_enabled(vcpu)) {
>  		val |= CPTR_EL2_TFP;
>  		__activate_traps_fpsimd32(vcpu);
> +	} else {
> +		if (vcpu_has_sve(vcpu)) {
> +			/*
> +			 * The register access will not be trapped so restore
> +			 * ZCR_EL1/ZCR_EL2 because those were set for the host.
> +			 */
> +			write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL1);
> +			write_sysreg_s(
> +				sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1,
> +				SYS_ZCR_EL2);
> +			val &= ~CPTR_EL2_TZ;
> +		}
>  	}
>  
>  	write_sysreg(val, cptr_el2);
> @@ -110,6 +122,17 @@ static void __load_host_stage2(void)
>  	write_sysreg(0, vttbr_el2);
>  }
>  
> +static void __restore_host_sve_state(struct kvm_vcpu *vcpu)
> +{
> +	/*
> +	 * If the guest uses SVE, the ZCR_EL2 was configured for the guest.
> +	 * Host saves the context in EL1. That requires the ZCR_EL2 be set
> +	 * to the default of the host.
> +	 */
> +	if (vcpu_has_sve(vcpu) && (vcpu->arch.flags |= KVM_ARM64_FP_ENABLED))

Did you actually intend to write *that*?

> +		write_sysreg_s(ZCR_EL2_LEN_HOST, SYS_ZCR_EL2);

Apart from what I believe to be an obvious typo, you really need to
explain what is your context switching strategy. You seem to have
things both in save/restore and load/put. I can't really tell which
one is correct in your case because you don't explain anything, but
having *both* for a single mode of operation is definitely completely
wrong.

> +}
> +
>  /* Save VGICv3 state on non-VHE systems */
>  static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
>  {
> @@ -229,6 +252,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
>  	__deactivate_traps(vcpu);
>  	__load_host_stage2();
>  
> +	__restore_host_sve_state(vcpu);
>  	__sysreg_restore_state_nvhe(host_ctxt);
>  
>  	if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index 47f3f035f3ea..f08b1e7ebf68 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -74,10 +74,6 @@ static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu)
>  	if (!system_supports_sve())
>  		return -EINVAL;
>  
> -	/* Verify that KVM startup enforced this when SVE was detected: */
> -	if (WARN_ON(!has_vhe()))
> -		return -EINVAL;
> -
>  	vcpu->arch.sve_max_vl = kvm_sve_max_vl;
>  
>  	/*

I expect that the main part of the next version of this patch will be
a detailed explanation of the SVE nVHE context-switching strategy. At
it stands, I cannot tell what this patch is trying to do.

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.