[PATCH v3 1/1] kvm: arm64: Add SVE support for nVHE.
Marc Zyngier
maz at kernel.org
Tue Mar 9 10:16:04 GMT 2021
Hi Daniel,.
On Tue, 02 Mar 2021 16:48:50 +0000,
Daniel Kiss <daniel.kiss at arm.com> wrote:
>
> CPUs that support SVE are architecturally required to support the
> Virtualization Host Extensions (VHE), so far the kernel supported
> SVE alongside KVM with VHE enabled. In same cases it is desired to
> run nVHE config even when VHE is available.
> This patch add support for SVE for nVHE configuration too.
>
> Tested on FVP with a Linux guest VM that run with a different VL than
> the host system.
>
> Signed-off-by: Daniel Kiss <daniel.kiss at arm.com>
> ---
> arch/arm64/Kconfig | 7 -----
> arch/arm64/include/asm/el2_setup.h | 2 +-
> arch/arm64/include/asm/fpsimd.h | 6 ++++
> arch/arm64/include/asm/fpsimdmacros.h | 24 ++++++++++++++--
> arch/arm64/include/asm/kvm_arm.h | 6 ++++
> arch/arm64/include/asm/kvm_host.h | 17 +++--------
> arch/arm64/kernel/entry-fpsimd.S | 5 ----
> arch/arm64/kvm/arm.c | 5 ----
> arch/arm64/kvm/fpsimd.c | 38 ++++++++++++++++++++-----
> arch/arm64/kvm/hyp/fpsimd.S | 15 ++++++++++
> arch/arm64/kvm/hyp/include/hyp/switch.h | 34 +++++++++++-----------
> arch/arm64/kvm/hyp/nvhe/switch.c | 24 ++++++++++++++++
> arch/arm64/kvm/reset.c | 4 ---
> 13 files changed, 127 insertions(+), 60 deletions(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index f39568b28ec1..049428f1bf27 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -1676,7 +1676,6 @@ endmenu
> config ARM64_SVE
> bool "ARM Scalable Vector Extension support"
> default y
> - depends on !KVM || ARM64_VHE
> help
> The Scalable Vector Extension (SVE) is an extension to the AArch64
> execution state which complements and extends the SIMD functionality
> @@ -1705,12 +1704,6 @@ config ARM64_SVE
> booting the kernel. If unsure and you are not observing these
> symptoms, you should assume that it is safe to say Y.
>
> - CPUs that support SVE are architecturally required to support the
> - Virtualization Host Extensions (VHE), so the kernel makes no
> - provision for supporting SVE alongside KVM without VHE enabled.
> - Thus, you will need to enable CONFIG_ARM64_VHE if you want to support
> - KVM in the same kernel image.
> -
> config ARM64_MODULE_PLTS
> bool "Use PLTs to allow module memory to spill over into vmalloc area"
> depends on MODULES
> diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
> index a7f5a1bbc8ac..0207393e67c3 100644
> --- a/arch/arm64/include/asm/el2_setup.h
> +++ b/arch/arm64/include/asm/el2_setup.h
> @@ -133,7 +133,7 @@
> bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps
> msr cptr_el2, x0 // Disable copro. traps to EL2
> isb
> - mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
> + mov x1, #ZCR_EL2_LEN_HOST // SVE: Enable full vector
> msr_s SYS_ZCR_EL2, x1 // length for EL1.
> 1:
> .endm
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index bec5f14b622a..526d69f3eeb3 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -69,6 +69,12 @@ static inline void *sve_pffr(struct thread_struct *thread)
> extern void sve_save_state(void *state, u32 *pfpsr);
> extern void sve_load_state(void const *state, u32 const *pfpsr,
> unsigned long vq_minus_1);
> +/*
> + * sve_load_state_nvhe function for the hyp code where the SVE registers are
> + * handled from the EL2, vector length is governed by ZCR_EL2.
> + */
> +extern void sve_load_state_nvhe(void const *state, u32 const *pfpsr,
> + unsigned long vq_minus_1);
> extern void sve_flush_live(void);
> extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state,
> unsigned long vq_minus_1);
> diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
> index af43367534c7..d309c6071bce 100644
> --- a/arch/arm64/include/asm/fpsimdmacros.h
> +++ b/arch/arm64/include/asm/fpsimdmacros.h
> @@ -205,6 +205,17 @@
> 921:
> .endm
>
> +/* Update ZCR_EL2.LEN with the new VQ */
> +.macro sve_load_vq_nvhe xvqminus1, xtmp, xtmp2
> + mrs_s \xtmp, SYS_ZCR_EL2
> + bic \xtmp2, \xtmp, ZCR_ELx_LEN_MASK
> + orr \xtmp2, \xtmp2, \xvqminus1
> + cmp \xtmp2, \xtmp
> + b.eq 922f
> + msr_s SYS_ZCR_EL2, \xtmp2 //self-synchronising
> +922:
> +.endm
No, please. There is strictly no need for a new macro that only
differs by the EL1 vs EL2 register. That's why we have alternatives
for. And actually, you should be able to use the ZCR_EL2 register at
all times, including on VHE.
Please get rid of this.
> +
> /* Preserve the first 128-bits of Znz and zero the rest. */
> .macro _sve_flush_z nz
> _sve_check_zreg \nz
> @@ -230,8 +241,7 @@
> str w\nxtmp, [\xpfpsr, #4]
> .endm
>
> -.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2
> - sve_load_vq \xvqminus1, x\nxtmp, \xtmp2
> +.macro _sve_load nxbase, xpfpsr, nxtmp
> _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34
> _sve_ldr_p 0, \nxbase
> _sve_wrffr 0
> @@ -242,3 +252,13 @@
> ldr w\nxtmp, [\xpfpsr, #4]
> msr fpcr, x\nxtmp
> .endm
> +
> +.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2
> + sve_load_vq \xvqminus1, x\nxtmp, \xtmp2
> + _sve_load \nxbase, \xpfpsr, \nxtmp
> +.endm
> +
> +.macro sve_load_nvhe nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2
> + sve_load_vq_nvhe \xvqminus1, x\nxtmp, \xtmp2
> + _sve_load \nxbase, \xpfpsr, \nxtmp
> +.endm
> diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
> index 4e90c2debf70..24d02365cc24 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -332,4 +332,10 @@
> #define CPACR_EL1_TTA (1 << 28)
> #define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN)
>
> +/*
> + * Always the maximum vector length is set for the host, because
> + * it need to access the whole SVE register.
> + */
> +#define ZCR_EL2_LEN_HOST ZCR_ELx_LEN_MASK
> +
> #endif /* __ARM64_KVM_ARM_H__ */
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 8fcfab0c2567..11a058c81c1d 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -376,6 +376,10 @@ struct kvm_vcpu_arch {
> #define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
> sve_ffr_offset((vcpu)->arch.sve_max_vl)))
>
> +#define vcpu_sve_pffr_hyp(vcpu) ((void *)((char *) \
> + (kern_hyp_va((vcpu)->arch.sve_state)) + \
> + sve_ffr_offset((vcpu)->arch.sve_max_vl)))
> +
> #define vcpu_sve_state_size(vcpu) ({ \
> size_t __size_ret; \
> unsigned int __vcpu_vq; \
> @@ -693,19 +697,6 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
> ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr();
> }
>
> -static inline bool kvm_arch_requires_vhe(void)
> -{
> - /*
> - * The Arm architecture specifies that implementation of SVE
> - * requires VHE also to be implemented. The KVM code for arm64
> - * relies on this when SVE is present:
> - */
> - if (system_supports_sve())
> - return true;
> -
> - return false;
> -}
> -
> void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
>
> static inline void kvm_arch_hardware_unsetup(void) {}
> diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
> index 2ca395c25448..e444b753c518 100644
> --- a/arch/arm64/kernel/entry-fpsimd.S
> +++ b/arch/arm64/kernel/entry-fpsimd.S
> @@ -33,11 +33,6 @@ SYM_FUNC_END(fpsimd_load_state)
>
> #ifdef CONFIG_ARM64_SVE
>
> -SYM_FUNC_START(sve_save_state)
> - sve_save 0, x1, 2
> - ret
> -SYM_FUNC_END(sve_save_state)
> -
> SYM_FUNC_START(sve_load_state)
> sve_load 0, x1, x2, 3, x4
> ret
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index fe60d25c000e..6284563b352a 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -1890,11 +1890,6 @@ int kvm_arch_init(void *opaque)
>
> in_hyp_mode = is_kernel_in_hyp_mode();
>
> - if (!in_hyp_mode && kvm_arch_requires_vhe()) {
> - kvm_pr_unimpl("CPU unsupported in non-VHE mode, not initializing\n");
> - return -ENODEV;
> - }
> -
> if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) ||
> cpus_have_final_cap(ARM64_WORKAROUND_1508412))
> kvm_info("Guests without required CPU erratum workarounds can deadlock system!\n" \
> diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
> index 3e081d556e81..f5f648e78578 100644
> --- a/arch/arm64/kvm/fpsimd.c
> +++ b/arch/arm64/kvm/fpsimd.c
> @@ -42,6 +42,16 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
> if (ret)
> goto error;
>
> + if (!has_vhe() && vcpu->arch.sve_state) {
Why the !has_vhe() predicate? create_hyp_mappings() is equipped to
deal with VHE vs nVHE.
> + void *sve_state_end = vcpu->arch.sve_state +
> + SVE_SIG_REGS_SIZE(
> + sve_vq_from_vl(vcpu->arch.sve_max_vl));
> + ret = create_hyp_mappings(vcpu->arch.sve_state,
> + sve_state_end,
> + PAGE_HYP);
> + if (ret)
> + goto error;
> + }
> vcpu->arch.host_thread_info = kern_hyp_va(ti);
> vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
> error:
> @@ -109,10 +119,21 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
> local_irq_save(flags);
>
> if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
> + if (guest_has_sve) {
> + if (has_vhe())
> + __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_s(SYS_ZCR_EL12);
> + else {
> + __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_s(SYS_ZCR_EL1);
The two cases can be made common by using the correct accessor:
__vpcu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
> + /*
> + * vcpu could set ZCR_EL1 to a shorter VL than the max VL but
> + * the context may be still valid there so the host should save
> + * the whole context. In nVHE case the ZCR_EL1 need to be reset.
> + */
Please keep comments within 80 columns.
> + write_sysreg_s(sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1,
> + SYS_ZCR_EL1);
Why is this a nVHE-only requirement?
> + }
> + }
> fpsimd_save_and_flush_cpu_state();
> -
> - if (guest_has_sve)
> - __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_s(SYS_ZCR_EL12);
> } else if (host_has_sve) {
> /*
> * The FPSIMD/SVE state in the CPU has not been touched, and we
> @@ -120,11 +141,14 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
> * reset to CPACR_EL1_DEFAULT by the Hyp code, disabling SVE
> * for EL0. To avoid spurious traps, restore the trap state
> * seen by kvm_arch_vcpu_load_fp():
> + * nVHE case the CPACR_EL1 is context switched.
But if CPACR_EL1 is context switched earlier than ZCR_EL1, what makes
it legal to postpone context-switching *some* of the state to a later
point.
I think that's the main issue I have with this patch: it tries to
shove SVE on nVHE in the VHE model of using load/put rather than
save/restore (which is in general the rule for nVHE) without any
indication of *why* it can be done like this.
> */
> - if (vcpu->arch.flags & KVM_ARM64_HOST_SVE_ENABLED)
> - sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN);
> - else
> - sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
> + if (has_vhe()) {
> + if (vcpu->arch.flags & KVM_ARM64_HOST_SVE_ENABLED)
> + sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN);
> + else
> + sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
> + }
> }
>
> update_thread_flag(TIF_SVE,
> diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
> index 01f114aa47b0..da824b46b81b 100644
> --- a/arch/arm64/kvm/hyp/fpsimd.S
> +++ b/arch/arm64/kvm/hyp/fpsimd.S
> @@ -6,6 +6,7 @@
>
> #include <linux/linkage.h>
>
> +#include <asm/assembler.h>
> #include <asm/fpsimdmacros.h>
>
> .text
> @@ -19,3 +20,17 @@ SYM_FUNC_START(__fpsimd_restore_state)
> fpsimd_restore x0, 1
> ret
> SYM_FUNC_END(__fpsimd_restore_state)
> +
> +#ifdef CONFIG_ARM64_SVE
> +
> +SYM_FUNC_START(sve_save_state)
> + sve_save 0, x1, 2
> + ret
> +SYM_FUNC_END(sve_save_state)
> +
> +SYM_FUNC_START(sve_load_state_nvhe)
> + sve_load_nvhe 0, x1, x2, 3, x4
> + ret
> +SYM_FUNC_END(sve_load_state_nvhe)
> +
> +#endif
> diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
> index 84473574c2e7..99e7f0d5bb64 100644
> --- a/arch/arm64/kvm/hyp/include/hyp/switch.h
> +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
> @@ -205,19 +205,13 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
> if (!system_supports_fpsimd())
> return false;
>
> - /*
> - * Currently system_supports_sve() currently implies has_vhe(),
> - * so the check is redundant. However, has_vhe() can be determined
> - * statically and helps the compiler remove dead code.
> - */
> - if (has_vhe() && system_supports_sve()) {
> + vhe = has_vhe();
> + if (system_supports_sve()) {
> sve_guest = vcpu_has_sve(vcpu);
> sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
> - vhe = true;
> } else {
> sve_guest = false;
> sve_host = false;
> - vhe = has_vhe();
> }
>
> esr_ec = kvm_vcpu_trap_get_class(vcpu);
> @@ -240,17 +234,17 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
>
> write_sysreg(reg, cpacr_el1);
> } else {
> - write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
> - cptr_el2);
> + u64 reg = read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP;
> +
> + if (sve_guest)
> + reg &= ~(u64)CPTR_EL2_TZ;
> +
> + write_sysreg(reg, cptr_el2);
> }
>
> isb();
>
> if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
> - /*
> - * In the SVE case, VHE is assumed: it is enforced by
> - * Kconfig and kvm_arch_init().
> - */
> if (sve_host) {
> struct thread_struct *thread = container_of(
> vcpu->arch.host_fpsimd_state,
> @@ -266,10 +260,18 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
> }
>
> if (sve_guest) {
> - sve_load_state(vcpu_sve_pffr(vcpu),
> + if (vhe) {
> + sve_load_state(vcpu_sve_pffr(vcpu),
> + &vcpu->arch.ctxt.fp_regs.fpsr,
> + sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1);
> + write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12);
> + } else {
> + sve_load_state_nvhe(vcpu_sve_pffr_hyp(vcpu),
> &vcpu->arch.ctxt.fp_regs.fpsr,
> sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1);
> - write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL12);
> + write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL1);
> +
> + }
> } else {
> __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs);
> }
> diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
> index f3d0e9eca56c..b0ded27918ce 100644
> --- a/arch/arm64/kvm/hyp/nvhe/switch.c
> +++ b/arch/arm64/kvm/hyp/nvhe/switch.c
> @@ -45,6 +45,18 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
> if (!update_fp_enabled(vcpu)) {
> val |= CPTR_EL2_TFP;
> __activate_traps_fpsimd32(vcpu);
> + } else {
> + if (vcpu_has_sve(vcpu)) {
> + /*
> + * The register access will not be trapped so restore
> + * ZCR_EL1/ZCR_EL2 because those were set for the host.
> + */
> + write_sysreg_s(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR_EL1);
> + write_sysreg_s(
> + sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1,
> + SYS_ZCR_EL2);
> + val &= ~CPTR_EL2_TZ;
> + }
> }
>
> write_sysreg(val, cptr_el2);
> @@ -110,6 +122,17 @@ static void __load_host_stage2(void)
> write_sysreg(0, vttbr_el2);
> }
>
> +static void __restore_host_sve_state(struct kvm_vcpu *vcpu)
> +{
> + /*
> + * If the guest uses SVE, the ZCR_EL2 was configured for the guest.
> + * Host saves the context in EL1. That requires the ZCR_EL2 be set
> + * to the default of the host.
> + */
> + if (vcpu_has_sve(vcpu) && (vcpu->arch.flags |= KVM_ARM64_FP_ENABLED))
Did you actually intend to write *that*?
> + write_sysreg_s(ZCR_EL2_LEN_HOST, SYS_ZCR_EL2);
Apart from what I believe to be an obvious typo, you really need to
explain what is your context switching strategy. You seem to have
things both in save/restore and load/put. I can't really tell which
one is correct in your case because you don't explain anything, but
having *both* for a single mode of operation is definitely completely
wrong.
> +}
> +
> /* Save VGICv3 state on non-VHE systems */
> static void __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
> {
> @@ -229,6 +252,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
> __deactivate_traps(vcpu);
> __load_host_stage2();
>
> + __restore_host_sve_state(vcpu);
> __sysreg_restore_state_nvhe(host_ctxt);
>
> if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index 47f3f035f3ea..f08b1e7ebf68 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -74,10 +74,6 @@ static int kvm_vcpu_enable_sve(struct kvm_vcpu *vcpu)
> if (!system_supports_sve())
> return -EINVAL;
>
> - /* Verify that KVM startup enforced this when SVE was detected: */
> - if (WARN_ON(!has_vhe()))
> - return -EINVAL;
> -
> vcpu->arch.sve_max_vl = kvm_sve_max_vl;
>
> /*
I expect that the main part of the next version of this patch will be
a detailed explanation of the SVE nVHE context-switching strategy. At
it stands, I cannot tell what this patch is trying to do.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
More information about the linux-arm-kernel
mailing list