[PATCH v2 2/7] arm64/fpsimd: Track the saved FPSIMD state type separately to TIF_SVE

Marc Zyngier maz at kernel.org
Mon Jul 11 02:40:50 PDT 2022


On Mon, 20 Jun 2022 13:41:53 +0100,
Mark Brown <broonie at kernel.org> wrote:
> 
> When we save the state for the floating point registers this can be done
> in the form visible through either the FPSIMD V registers or the SVE Z and
> P registers. At present we track which format is currently used based on
> TIF_SVE and the SME streaming mode state but particularly in the SVE case
> this limits our options for optimising things, especially around syscalls.
> Introduce a new enum in thread_struct which explicitly states which format
> is active and keep it up to date when we change it.
> 
> At present we do not use this state except to verify that it has the
> expected value when loading the state, future patches will introduce
> functional changes.
> 
> Signed-off-by: Mark Brown <broonie at kernel.org>
> ---
>  arch/arm64/include/asm/fpsimd.h    |  2 +-
>  arch/arm64/include/asm/kvm_host.h  |  1 +
>  arch/arm64/include/asm/processor.h |  6 ++++
>  arch/arm64/kernel/fpsimd.c         | 57 ++++++++++++++++++++++--------
>  arch/arm64/kernel/process.c        |  2 ++
>  arch/arm64/kernel/ptrace.c         |  6 ++--
>  arch/arm64/kernel/signal.c         |  3 ++
>  arch/arm64/kvm/fpsimd.c            |  3 +-
>  8 files changed, 61 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index 847fd119cdb8..5762419fdcc0 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -61,7 +61,7 @@ extern void fpsimd_kvm_prepare(void);
>  extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
>  				     void *sve_state, unsigned int sve_vl,
>  				     void *za_state, unsigned int sme_vl,
> -				     u64 *svcr);
> +				     u64 *svcr, enum fp_state *type);
>  
>  extern void fpsimd_flush_task_state(struct task_struct *target);
>  extern void fpsimd_save_and_flush_cpu_state(void);
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index de32152cea04..250e23f221c4 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -310,6 +310,7 @@ struct kvm_vcpu_arch {
>  	void *sve_state;
>  	unsigned int sve_max_vl;
>  	u64 svcr;
> +	enum fp_state fp_type;
>  
>  	/* Stage 2 paging state used by the hardware on next switch */
>  	struct kvm_s2_mmu *hw_mmu;
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index 9e58749db21d..192986509a8e 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -122,6 +122,11 @@ enum vec_type {
>  	ARM64_VEC_MAX,
>  };
>  
> +enum fp_state {
> +	FP_STATE_FPSIMD,
> +	FP_STATE_SVE,
> +};
> +
>  struct cpu_context {
>  	unsigned long x19;
>  	unsigned long x20;
> @@ -152,6 +157,7 @@ struct thread_struct {
>  		struct user_fpsimd_state fpsimd_state;
>  	} uw;
>  
> +	enum fp_state		fp_type;	/* registers FPSIMD or SVE? */
>  	unsigned int		fpsimd_cpu;
>  	void			*sve_state;	/* SVE registers, if any */
>  	void			*za_state;	/* ZA register, if any */
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index d67e658f1e24..fdb2925becdf 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -125,6 +125,7 @@ struct fpsimd_last_state_struct {
>  	u64 *svcr;
>  	unsigned int sve_vl;
>  	unsigned int sme_vl;
> +	enum fp_state *type;

For consistency: s/type/fp_type/ ?

>  };
>  
>  static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
> @@ -330,15 +331,6 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
>   *    The task can execute SVE instructions while in userspace without
>   *    trapping to the kernel.
>   *
> - *    When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
> - *    corresponding Zn), P0-P15 and FFR are encoded in
> - *    task->thread.sve_state, formatted appropriately for vector
> - *    length task->thread.sve_vl or, if SVCR.SM is set,
> - *    task->thread.sme_vl.
> - *
> - *    task->thread.sve_state must point to a valid buffer at least
> - *    sve_state_size(task) bytes in size.
> - *
>   *    During any syscall, the kernel may optionally clear TIF_SVE and
>   *    discard the vector state except for the FPSIMD subset.
>   *
> @@ -348,7 +340,15 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
>   *    do_sve_acc() to be called, which does some preparation and then
>   *    sets TIF_SVE.
>   *
> - *    When stored, FPSIMD registers V0-V31 are encoded in
> + * During any syscall, the kernel may optionally clear TIF_SVE and
> + * discard the vector state except for the FPSIMD subset.
> + *
> + * The data will be stored in one of two formats:
> + *
> + *  * FPSIMD only - FP_STATE_FPSIMD:
> + *
> + *    When the FPSIMD only state stored task->thread.fp_type is set to
> + *    FP_STATE_FPSIMD, the FPSIMD registers V0-V31 are encoded in
>   *    task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are
>   *    logically zero but not stored anywhere; P0-P15 and FFR are not
>   *    stored and have unspecified values from userspace's point of
> @@ -358,6 +358,18 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
>   *    task->thread.sve_state does not need to be non-NULL, valid or any
>   *    particular size: it must not be dereferenced.
>   *
> + *  * SVE state - FP_STATE_SVE:
> + *
> + *    When the full SVE state is stored task->thread.fp_type is set to
> + *    FP_STATE_SVE and Z0-Z31 (incorporating Vn in bits[127:0] or the
> + *    corresponding Zn), P0-P15 and FFR are encoded in in
> + *    task->thread.sve_state, formatted appropriately for vector
> + *    length task->thread.sve_vl or, if SVCR.SM is set,
> + *    task->thread.sme_vl.

Can you add a commend about the fate of the Vn data in fpsimd_state?
My understanding is that they are stale data as soon as FP_STATE_SVE
is set, but I'd really like this being clarified.

> + *
> + *    task->thread.sve_state must point to a valid buffer at least
> + *    sve_state_size(task) bytes in size.
> + *
>   *  * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state
>   *    irrespective of whether TIF_SVE is clear or set, since these are
>   *    not vector length dependent.
> @@ -404,12 +416,15 @@ static void task_fpsimd_load(void)
>  		}
>  	}
>  
> -	if (restore_sve_regs)
> +	if (restore_sve_regs) {
> +		WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);
>  		sve_load_state(sve_pffr(&current->thread),
>  			       &current->thread.uw.fpsimd_state.fpsr,
>  			       restore_ffr);
> -	else
> +	} else {
> +		WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD);
>  		fpsimd_load_state(&current->thread.uw.fpsimd_state);
> +	}
>  }
>  
>  /*
> @@ -475,8 +490,10 @@ static void fpsimd_save(void)
>  		sve_save_state((char *)last->sve_state +
>  					sve_ffr_offset(vl),
>  			       &last->st->fpsr, save_ffr);
> +		*last->type = FP_STATE_SVE;
>  	} else {
>  		fpsimd_save_state(last->st);
> +		*last->type = FP_STATE_FPSIMD;
>  	}
>  }
>  
> @@ -847,8 +864,10 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
>  
>  	fpsimd_flush_task_state(task);
>  	if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
> -	    thread_sm_enabled(&task->thread))
> +	    thread_sm_enabled(&task->thread)) {
>  		sve_to_fpsimd(task);
> +		task->thread.fp_type = FP_STATE_FPSIMD;

Can you move this assignment into the sve_to_fpsimd() helper?

> +	}
>  
>  	if (system_supports_sme() && type == ARM64_VEC_SME) {
>  		task->thread.svcr &= ~(SVCR_SM_MASK |
> @@ -1367,6 +1386,7 @@ static void sve_init_regs(void)
>  		fpsimd_bind_task_to_cpu();
>  	} else {
>  		fpsimd_to_sve(current);
> +		current->thread.fp_type = FP_STATE_SVE;

Same thing here.

>  	}
>  }
>  
> @@ -1606,6 +1626,8 @@ void fpsimd_flush_thread(void)
>  		current->thread.svcr = 0;
>  	}
>  
> +	current->thread.fp_type = FP_STATE_FPSIMD;
> +
>  	put_cpu_fpsimd_context();
>  	kfree(sve_state);
>  	kfree(za_state);
> @@ -1654,8 +1676,10 @@ void fpsimd_kvm_prepare(void)
>  	 */
>  	get_cpu_fpsimd_context();
>  
> -	if (test_and_clear_thread_flag(TIF_SVE))
> +	if (test_and_clear_thread_flag(TIF_SVE)) {
>  		sve_to_fpsimd(current);
> +		current->thread.fp_type = FP_STATE_FPSIMD;
> +	}
>  
>  	put_cpu_fpsimd_context();
>  }
> @@ -1677,6 +1701,7 @@ static void fpsimd_bind_task_to_cpu(void)
>  	last->sve_vl = task_get_sve_vl(current);
>  	last->sme_vl = task_get_sme_vl(current);
>  	last->svcr = &current->thread.svcr;
> +	last->type = &current->thread.fp_type;
>  	current->thread.fpsimd_cpu = smp_processor_id();
>  
>  	/*
> @@ -1700,7 +1725,8 @@ static void fpsimd_bind_task_to_cpu(void)
>  
>  void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
>  			      unsigned int sve_vl, void *za_state,
> -			      unsigned int sme_vl, u64 *svcr)
> +			      unsigned int sme_vl, u64 *svcr,
> +			      enum fp_state *type)
>  {
>  	struct fpsimd_last_state_struct *last =
>  		this_cpu_ptr(&fpsimd_last_state);
> @@ -1714,6 +1740,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
>  	last->za_state = za_state;
>  	last->sve_vl = sve_vl;
>  	last->sme_vl = sme_vl;
> +	last->type = type;
>  }
>  
>  /*
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 92bcc1768f0b..944d782d581b 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -335,6 +335,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
>  		clear_tsk_thread_flag(dst, TIF_SME);
>  	}
>  
> +	dst->thread.fp_type = FP_STATE_FPSIMD;
> +
>  	/* clear any pending asynchronous tag fault raised by the parent */
>  	clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
>  
> diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
> index 21da83187a60..e0233f322af3 100644
> --- a/arch/arm64/kernel/ptrace.c
> +++ b/arch/arm64/kernel/ptrace.c
> @@ -892,8 +892,7 @@ static int sve_set_common(struct task_struct *target,
>  		ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
>  				SVE_PT_FPSIMD_OFFSET);
>  		clear_tsk_thread_flag(target, TIF_SVE);
> -		if (type == ARM64_VEC_SME)
> -			fpsimd_force_sync_to_sve(target);

I don't get this particular change. Can you please clarify?

> +		target->thread.fp_type = FP_STATE_FPSIMD;
>  		goto out;
>  	}
>  
> @@ -916,6 +915,7 @@ static int sve_set_common(struct task_struct *target,
>  	if (!target->thread.sve_state) {
>  		ret = -ENOMEM;
>  		clear_tsk_thread_flag(target, TIF_SVE);
> +		target->thread.fp_type = FP_STATE_FPSIMD;
>  		goto out;
>  	}
>  
> @@ -954,6 +954,8 @@ static int sve_set_common(struct task_struct *target,
>  				 &target->thread.uw.fpsimd_state.fpsr,
>  				 start, end);
>  
> +	target->thread.fp_type = FP_STATE_SVE;
> +
>  out:
>  	fpsimd_flush_task_state(target);
>  	return ret;
> diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
> index b0980fbb6bc7..23dd4c3bcfed 100644
> --- a/arch/arm64/kernel/signal.c
> +++ b/arch/arm64/kernel/signal.c
> @@ -207,6 +207,7 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
>  	__get_user_error(fpsimd.fpcr, &ctx->fpcr, err);
>  
>  	clear_thread_flag(TIF_SVE);
> +	current->thread.fp_type = FP_STATE_FPSIMD;
>  
>  	/* load the hardware registers from the fpsimd_state structure */
>  	if (!err)
> @@ -289,6 +290,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
>  	if (sve.head.size <= sizeof(*user->sve)) {
>  		clear_thread_flag(TIF_SVE);
>  		current->thread.svcr &= ~SVCR_SM_MASK;
> +		current->thread.fp_type = FP_STATE_FPSIMD;
>  		goto fpsimd_only;
>  	}
>  
> @@ -324,6 +326,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
>  		current->thread.svcr |= SVCR_SM_MASK;
>  	else
>  		set_thread_flag(TIF_SVE);
> +	current->thread.fp_type = FP_STATE_SVE;
>  
>  fpsimd_only:
>  	/* copy the FP and status/control registers */
> diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
> index a433ee8da232..be3ddb214ab1 100644
> --- a/arch/arm64/kvm/fpsimd.c
> +++ b/arch/arm64/kvm/fpsimd.c
> @@ -139,7 +139,8 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
>  		fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs,
>  					 vcpu->arch.sve_state,
>  					 vcpu->arch.sve_max_vl,
> -					 NULL, 0, &vcpu->arch.svcr);
> +					 NULL, 0, &vcpu->arch.svcr,
> +					 &vcpu->arch.fp_type);
>  
>  		clear_thread_flag(TIF_FOREIGN_FPSTATE);
>  		update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.



More information about the linux-arm-kernel mailing list