[PATCH v3 2/7] arm64/fpsimd: Track the saved FPSIMD state type separately to TIF_SVE
Marc Zyngier
maz at kernel.org
Tue Sep 20 10:14:13 PDT 2022
On Mon, 15 Aug 2022 23:55:24 +0100,
Mark Brown <broonie at kernel.org> wrote:
>
> When we save the state for the floating point registers this can be done
> in the form visible through either the FPSIMD V registers or the SVE Z and
> P registers. At present we track which format is currently used based on
> TIF_SVE and the SME streaming mode state but particularly in the SVE case
> this limits our options for optimising things, especially around syscalls.
> Introduce a new enum in thread_struct which explicitly states which format
> is active and keep it up to date when we change it.
>
> At present we do not use this state except to verify that it has the
> expected value when loading the state, future patches will introduce
> functional changes.
>
> Signed-off-by: Mark Brown <broonie at kernel.org>
> ---
> arch/arm64/include/asm/fpsimd.h | 2 +-
> arch/arm64/include/asm/kvm_host.h | 1 +
> arch/arm64/include/asm/processor.h | 6 ++++
> arch/arm64/kernel/fpsimd.c | 58 ++++++++++++++++++++++--------
> arch/arm64/kernel/process.c | 2 ++
> arch/arm64/kernel/ptrace.c | 3 ++
> arch/arm64/kernel/signal.c | 7 +++-
> arch/arm64/kvm/fpsimd.c | 3 +-
> 8 files changed, 64 insertions(+), 18 deletions(-)
>
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index c07e4abaca3d..b74103a79052 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -61,7 +61,7 @@ extern void fpsimd_kvm_prepare(void);
> extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
> void *sve_state, unsigned int sve_vl,
> void *za_state, unsigned int sme_vl,
> - u64 *svcr);
> + u64 *svcr, enum fp_state *type);
>
> extern void fpsimd_flush_task_state(struct task_struct *target);
> extern void fpsimd_save_and_flush_cpu_state(void);
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index f38ef299f13b..ebd37f97aeb4 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -310,6 +310,7 @@ struct kvm_vcpu_arch {
> void *sve_state;
> unsigned int sve_max_vl;
> u64 svcr;
> + enum fp_state fp_type;
Is it a state or a type? Some consistency would help. Also, what does
this represent? Your commit message keeps talking about the FP/SVE
state for the host, but this is obviously a guest-related structure.
How do the two relate?
Finally, before this patch, pahole shows this:
struct kvm_vcpu_arch {
struct kvm_cpu_context ctxt; /* 0 1824 */
/* --- cacheline 28 boundary (1792 bytes) was 32 bytes ago --- */
void * sve_state; /* 1824 8 */
unsigned int sve_max_vl; /* 1832 4 */
/* XXX 4 bytes hole, try to pack */
u64 svcr; /* 1840 8 */
struct kvm_s2_mmu * hw_mmu; /* 1848 8 */
[...]
};
After it, we gain an extra hole:
struct kvm_vcpu_arch {
struct kvm_cpu_context ctxt; /* 0 1824 */
/* --- cacheline 28 boundary (1792 bytes) was 32 bytes ago --- */
void * sve_state; /* 1824 8 */
unsigned int sve_max_vl; /* 1832 4 */
/* XXX 4 bytes hole, try to pack */
u64 svcr; /* 1840 8 */
enum fp_state fp_type; /* 1848 4 */
/* XXX 4 bytes hole, try to pack */
/* --- cacheline 29 boundary (1856 bytes) --- */
struct kvm_s2_mmu * hw_mmu; /* 1856 8 */
[...]
};
Packing things wouldn't hurt.
>
> /* Stage 2 paging state used by the hardware on next switch */
> struct kvm_s2_mmu *hw_mmu;
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index 86eb0bfe3b38..4818a6b77f39 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -122,6 +122,11 @@ enum vec_type {
> ARM64_VEC_MAX,
> };
>
> +enum fp_state {
> + FP_STATE_FPSIMD,
> + FP_STATE_SVE,
> +};
> +
> struct cpu_context {
> unsigned long x19;
> unsigned long x20;
> @@ -152,6 +157,7 @@ struct thread_struct {
> struct user_fpsimd_state fpsimd_state;
> } uw;
>
> + enum fp_state fp_type; /* registers FPSIMD or SVE? */
Same comment about the state vs type.
> unsigned int fpsimd_cpu;
> void *sve_state; /* SVE registers, if any */
> void *za_state; /* ZA register, if any */
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 549e11645e0f..6544ae00230f 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -125,6 +125,7 @@ struct fpsimd_last_state_struct {
> u64 *svcr;
> unsigned int sve_vl;
> unsigned int sme_vl;
> + enum fp_state *fp_type;
Same thing. Grouping the pointer together would probably help
readability as well.
> };
>
> static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
> @@ -330,15 +331,6 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
> * The task can execute SVE instructions while in userspace without
> * trapping to the kernel.
> *
> - * When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
> - * corresponding Zn), P0-P15 and FFR are encoded in
> - * task->thread.sve_state, formatted appropriately for vector
> - * length task->thread.sve_vl or, if SVCR.SM is set,
> - * task->thread.sme_vl.
> - *
> - * task->thread.sve_state must point to a valid buffer at least
> - * sve_state_size(task) bytes in size.
> - *
> * During any syscall, the kernel may optionally clear TIF_SVE and
> * discard the vector state except for the FPSIMD subset.
> *
> @@ -348,7 +340,15 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
> * do_sve_acc() to be called, which does some preparation and then
> * sets TIF_SVE.
> *
> - * When stored, FPSIMD registers V0-V31 are encoded in
> + * During any syscall, the kernel may optionally clear TIF_SVE and
> + * discard the vector state except for the FPSIMD subset.
> + *
> + * The data will be stored in one of two formats:
> + *
> + * * FPSIMD only - FP_STATE_FPSIMD:
> + *
> + * When the FPSIMD only state stored task->thread.fp_type is set to
> + * FP_STATE_FPSIMD, the FPSIMD registers V0-V31 are encoded in
> * task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are
> * logically zero but not stored anywhere; P0-P15 and FFR are not
> * stored and have unspecified values from userspace's point of
> @@ -358,6 +358,19 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
> * task->thread.sve_state does not need to be non-NULL, valid or any
> * particular size: it must not be dereferenced.
> *
> + * * SVE state - FP_STATE_SVE:
> + *
> + * When the full SVE state is stored task->thread.fp_type is set to
> + * FP_STATE_SVE and Z0-Z31 (incorporating Vn in bits[127:0] or the
> + * corresponding Zn), P0-P15 and FFR are encoded in in
> + * task->thread.sve_state, formatted appropriately for vector
> + * length task->thread.sve_vl or, if SVCR.SM is set,
> + * task->thread.sme_vl. The storage for the vector registers in
> + * task->thread.uw.fpsimd_state should be ignored.
> + *
> + * task->thread.sve_state must point to a valid buffer at least
> + * sve_state_size(task) bytes in size.
> + *
> * * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state
> * irrespective of whether TIF_SVE is clear or set, since these are
> * not vector length dependent.
> @@ -404,12 +417,15 @@ static void task_fpsimd_load(void)
> }
> }
>
> - if (restore_sve_regs)
> + if (restore_sve_regs) {
> + WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);
> sve_load_state(sve_pffr(¤t->thread),
> ¤t->thread.uw.fpsimd_state.fpsr,
> restore_ffr);
> - else
> + } else {
> + WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD);
> fpsimd_load_state(¤t->thread.uw.fpsimd_state);
> + }
> }
>
> /*
> @@ -474,8 +490,10 @@ static void fpsimd_save(void)
> sve_save_state((char *)last->sve_state +
> sve_ffr_offset(vl),
> &last->st->fpsr, save_ffr);
> + *last->fp_type = FP_STATE_SVE;
> } else {
> fpsimd_save_state(last->st);
> + *last->fp_type = FP_STATE_FPSIMD;
> }
> }
>
> @@ -848,8 +866,10 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
>
> fpsimd_flush_task_state(task);
> if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
> - thread_sm_enabled(&task->thread))
> + thread_sm_enabled(&task->thread)) {
> sve_to_fpsimd(task);
> + task->thread.fp_type = FP_STATE_FPSIMD;
> + }
>
> if (system_supports_sme() && type == ARM64_VEC_SME) {
> task->thread.svcr &= ~(SVCR_SM_MASK |
> @@ -1368,6 +1388,7 @@ static void sve_init_regs(void)
> fpsimd_bind_task_to_cpu();
> } else {
> fpsimd_to_sve(current);
> + current->thread.fp_type = FP_STATE_SVE;
> }
> }
>
> @@ -1596,6 +1617,8 @@ void fpsimd_flush_thread(void)
> current->thread.svcr = 0;
> }
>
> + current->thread.fp_type = FP_STATE_FPSIMD;
> +
> put_cpu_fpsimd_context();
> kfree(sve_state);
> kfree(za_state);
> @@ -1644,8 +1667,10 @@ void fpsimd_kvm_prepare(void)
> */
> get_cpu_fpsimd_context();
>
> - if (test_and_clear_thread_flag(TIF_SVE))
> + if (test_and_clear_thread_flag(TIF_SVE)) {
> sve_to_fpsimd(current);
> + current->thread.fp_type = FP_STATE_FPSIMD;
> + }
>
> put_cpu_fpsimd_context();
> }
> @@ -1667,6 +1692,7 @@ static void fpsimd_bind_task_to_cpu(void)
> last->sve_vl = task_get_sve_vl(current);
> last->sme_vl = task_get_sme_vl(current);
> last->svcr = ¤t->thread.svcr;
> + last->fp_type = ¤t->thread.fp_type;
> current->thread.fpsimd_cpu = smp_processor_id();
>
> /*
> @@ -1690,7 +1716,8 @@ static void fpsimd_bind_task_to_cpu(void)
>
> void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
> unsigned int sve_vl, void *za_state,
> - unsigned int sme_vl, u64 *svcr)
> + unsigned int sme_vl, u64 *svcr,
> + enum fp_state *type)
> {
> struct fpsimd_last_state_struct *last =
> this_cpu_ptr(&fpsimd_last_state);
> @@ -1704,6 +1731,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
> last->za_state = za_state;
> last->sve_vl = sve_vl;
> last->sme_vl = sme_vl;
> + last->fp_type = type;
> }
>
> /*
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 92bcc1768f0b..944d782d581b 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -335,6 +335,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
> clear_tsk_thread_flag(dst, TIF_SME);
> }
>
> + dst->thread.fp_type = FP_STATE_FPSIMD;
> +
> /* clear any pending asynchronous tag fault raised by the parent */
> clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
>
> diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
> index eb7c08dfb834..fb6189bc45c9 100644
> --- a/arch/arm64/kernel/ptrace.c
> +++ b/arch/arm64/kernel/ptrace.c
> @@ -894,6 +894,7 @@ static int sve_set_common(struct task_struct *target,
> clear_tsk_thread_flag(target, TIF_SVE);
> if (type == ARM64_VEC_SME)
> fpsimd_force_sync_to_sve(target);
> + target->thread.fp_type = FP_STATE_FPSIMD;
> goto out;
> }
>
> @@ -916,6 +917,7 @@ static int sve_set_common(struct task_struct *target,
> if (!target->thread.sve_state) {
> ret = -ENOMEM;
> clear_tsk_thread_flag(target, TIF_SVE);
> + target->thread.fp_type = FP_STATE_FPSIMD;
> goto out;
> }
>
> @@ -927,6 +929,7 @@ static int sve_set_common(struct task_struct *target,
> */
> fpsimd_sync_to_sve(target);
> set_tsk_thread_flag(target, TIF_SVE);
> + target->thread.fp_type = FP_STATE_SVE;
>
> BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
> start = SVE_PT_SVE_OFFSET;
> diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
> index f00e8b33170a..804cc00befc3 100644
> --- a/arch/arm64/kernel/signal.c
> +++ b/arch/arm64/kernel/signal.c
> @@ -207,6 +207,7 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
> __get_user_error(fpsimd.fpcr, &ctx->fpcr, err);
>
> clear_thread_flag(TIF_SVE);
> + current->thread.fp_type = FP_STATE_FPSIMD;
>
> /* load the hardware registers from the fpsimd_state structure */
> if (!err)
> @@ -292,6 +293,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
> if (sve.head.size <= sizeof(*user->sve)) {
> clear_thread_flag(TIF_SVE);
> current->thread.svcr &= ~SVCR_SM_MASK;
> + current->thread.fp_type = FP_STATE_FPSIMD;
> goto fpsimd_only;
> }
>
> @@ -327,6 +329,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
> current->thread.svcr |= SVCR_SM_MASK;
> else
> set_thread_flag(TIF_SVE);
> + current->thread.fp_type = FP_STATE_SVE;
>
> fpsimd_only:
> /* copy the FP and status/control registers */
> @@ -932,9 +935,11 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
> * FPSIMD register state - flush the saved FPSIMD
> * register state in case it gets loaded.
> */
> - if (current->thread.svcr & SVCR_SM_MASK)
> + if (current->thread.svcr & SVCR_SM_MASK) {
> memset(¤t->thread.uw.fpsimd_state, 0,
> sizeof(current->thread.uw.fpsimd_state));
> + current->thread.fp_type = FP_STATE_FPSIMD;
> + }
>
> current->thread.svcr &= ~(SVCR_ZA_MASK |
> SVCR_SM_MASK);
> diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
> index 1c1b309ef420..a92977759f8d 100644
> --- a/arch/arm64/kvm/fpsimd.c
> +++ b/arch/arm64/kvm/fpsimd.c
> @@ -140,7 +140,8 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
> fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs,
> vcpu->arch.sve_state,
> vcpu->arch.sve_max_vl,
> - NULL, 0, &vcpu->arch.svcr);
> + NULL, 0, &vcpu->arch.svcr,
> + &vcpu->arch.fp_type);
>
> clear_thread_flag(TIF_FOREIGN_FPSTATE);
> update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
More information about the linux-arm-kernel
mailing list