[PATCH v2 5/7] KVM: arm64: Allocate memory at hyp for host sve state in pKVM

Marc Zyngier maz at kernel.org
Tue May 21 14:44:52 PDT 2024


On Tue, 21 May 2024 17:37:18 +0100,
Fuad Tabba <tabba at google.com> wrote:
> 
> Protected mode needs to maintain (save/restore) the host's sve
> state, rather than relying on the host kernel to do that. This is
> to avoid leaking information to the host about guests and the
> type of operations they are performing.
> 
> As a first step towards that, allocate memory at hyp, per cpu, to
> hold the host sve data. The following patch will use this memory
> to save/restore the host state.

What I read in the code contradicts this statement. The memory is
definitely allocated on the *host*.

> 
> Signed-off-by: Fuad Tabba <tabba at google.com>
> ---
> Note that the last patch in this series will consolidate the
> setup of the host's fpsimd and sve states, which currently take
> place in two different locations. Moreover, that last patch will
> also place the host fpsimd and sve_state pointers in a union.
> ---
>  arch/arm64/include/asm/kvm_host.h    |  2 +
>  arch/arm64/include/asm/kvm_pkvm.h    |  9 ++++
>  arch/arm64/include/uapi/asm/ptrace.h | 14 ++++++
>  arch/arm64/kvm/arm.c                 | 68 ++++++++++++++++++++++++++++
>  arch/arm64/kvm/hyp/nvhe/setup.c      | 24 ++++++++++
>  5 files changed, 117 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 0a5fceb20f3a..7b3745ef1d73 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -535,7 +535,9 @@ struct kvm_cpu_context {
>   */
>  struct kvm_host_data {
>  	struct kvm_cpu_context host_ctxt;
> +
>  	struct user_fpsimd_state *fpsimd_state;	/* hyp VA */
> +	struct user_sve_state *sve_state;	/* hyp VA */
>  
>  	/* Ownership of the FP regs */
>  	enum {
> diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
> index ad9cfb5c1ff4..b9d12e123efb 100644
> --- a/arch/arm64/include/asm/kvm_pkvm.h
> +++ b/arch/arm64/include/asm/kvm_pkvm.h
> @@ -128,4 +128,13 @@ static inline unsigned long hyp_ffa_proxy_pages(void)
>  	return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
>  }
>  
> +static inline size_t pkvm_host_sve_state_size(void)
> +{
> +	if (!system_supports_sve())
> +		return 0;
> +
> +	return size_add(sizeof(struct user_sve_state),
> +			SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
> +}
> +
>  #endif	/* __ARM64_KVM_PKVM_H__ */
> diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
> index 7fa2f7036aa7..77aabf964071 100644
> --- a/arch/arm64/include/uapi/asm/ptrace.h
> +++ b/arch/arm64/include/uapi/asm/ptrace.h
> @@ -120,6 +120,20 @@ struct user_sve_header {
>  	__u16 __reserved;
>  };
>  
> +struct user_sve_state {
> +	__u64 zcr_el1;
> +
> +	/*
> +	 * Ordering is important since __sve_save_state/__sve_restore_state
> +	 * relies on it.
> +	 */
> +	__u32 fpsr;
> +	__u32 fpcr;
> +
> +	/* Must be SVE_VQ_BYTES (128 bit) aligned. */
> +	__u8 sve_regs[];
> +};
> +

Huh, why is this in uapi? Why should userspace even care about this at
all? From what I can tell, this is purely internal to the kernel, and
in any case, KVM isn't tied to that format if it doesn't dump stuff in
the userspace thread context. Given the number of bugs this format has
generated, I really wouldn't mind moving away from it.

>  /* Definitions for user_sve_header.flags: */
>  #define SVE_PT_REGS_MASK		(1 << 0)
>  
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index 9e565ea3d645..a9b1b0e9c319 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -1931,6 +1931,11 @@ static unsigned long nvhe_percpu_order(void)
>  	return size ? get_order(size) : 0;
>  }
>  
> +static size_t pkvm_host_sve_state_order(void)
> +{
> +	return get_order(pkvm_host_sve_state_size());
> +}
> +
>  /* A lookup table holding the hypervisor VA for each vector slot */
>  static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS];
>  
> @@ -2316,7 +2321,15 @@ static void __init teardown_hyp_mode(void)
>  	for_each_possible_cpu(cpu) {
>  		free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
>  		free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
> +
> +		if (system_supports_sve() && is_protected_kvm_enabled()) {
> +			struct user_sve_state *sve_state;
> +
> +			sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
> +			free_pages((unsigned long) sve_state, pkvm_host_sve_state_order());
> +		}
>  	}
> +
>  }
>  
>  static int __init do_pkvm_init(u32 hyp_va_bits)
> @@ -2399,6 +2412,50 @@ static int __init kvm_hyp_init_protection(u32 hyp_va_bits)
>  	return 0;
>  }
>  
> +static int init_pkvm_host_sve_state(void)
> +{
> +	int cpu;
> +
> +	if (!system_supports_sve())
> +		return 0;
> +
> +	/* Allocate pages for host sve state in protected mode. */
> +	for_each_possible_cpu(cpu) {
> +		struct page *page = alloc_pages(GFP_KERNEL, pkvm_host_sve_state_order());
> +

See? That's definitely not a HYP allocation.

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.



More information about the linux-arm-kernel mailing list