[PATCH v3 7/8] arm64/sve: Don't disable SVE on syscalls return

Dave Martin Dave.Martin at arm.com
Wed Jul 15 12:52:31 EDT 2020


On Mon, Jun 29, 2020 at 02:35:55PM +0100, Mark Brown wrote:
> From: Julien Grall <julien.grall at arm.com>
> 
> Per the syscalls ABI the state of the SVE registers is unknown after a
> syscall. In practice the kernel will disable SVE and zero all the
> registers but the first 128-bits of the vector on the next SVE
> instruction. In workloads mixing SVE and syscalls this will result in at
> least one extra entry/exit to the kernel per syscall when the SVE
> registers are accessed for the first time after the syscall.
> 
> To avoid the second entry/exit a new flag TIF_SVE_NEEDS_FLUSH is
> introduced to mark a task that needs to flush the SVE context on
> return to userspace.
> 
> On entry to a syscall the flag TIF_SVE will still be cleared, it will
> be restored on return to userspace once the SVE state has been flushed.
> This means that if a task requires to synchronize the FP state during a
> syscall (e.g context switch, signal) only the FPSIMD registers will be
> saved. When the task is rescheduled the SVE state will be loaded from
> FPSIMD state.
> 
> We could instead handle flushing the SVE state in do_el0_svc() however
> doing this reduces the potential for further optimisations such as
> initializing the SVE registers directly from the FPSIMD state when
> taking a SVE access trap and has some potential edge cases if we
> schedule before we return to userspace after do_el0_svc().

Looks reasonable overall.

> Signed-off-by: Julien Grall <julien.grall at arm.com>
> Signed-off-by: Mark Brown <broonie at kernel.org>
> ---
>  arch/arm64/include/asm/thread_info.h |  5 ++++-
>  arch/arm64/kernel/fpsimd.c           | 32 ++++++++++++++++++++++++++++
>  arch/arm64/kernel/process.c          |  1 +
>  arch/arm64/kernel/ptrace.c           |  7 ++++++
>  arch/arm64/kernel/signal.c           | 14 +++++++++++-
>  arch/arm64/kernel/syscall.c          | 13 +++++------
>  6 files changed, 62 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
> index 6ea8b6a26ae9..5d5c01a576d0 100644
> --- a/arch/arm64/include/asm/thread_info.h
> +++ b/arch/arm64/include/asm/thread_info.h
> @@ -67,6 +67,7 @@ void arch_release_task_struct(struct task_struct *tsk);
>  #define TIF_FOREIGN_FPSTATE	3	/* CPU's FP state is not current's */
>  #define TIF_UPROBE		4	/* uprobe breakpoint or singlestep */
>  #define TIF_FSCHECK		5	/* Check FS is USER_DS on return */
> +#define TIF_SVE_NEEDS_FLUSH	6	/* Flush SVE registers on return */
>  #define TIF_SYSCALL_TRACE	8	/* syscall trace active */
>  #define TIF_SYSCALL_AUDIT	9	/* syscall auditing */
>  #define TIF_SYSCALL_TRACEPOINT	10	/* syscall tracepoint for ftrace */
> @@ -95,10 +96,12 @@ void arch_release_task_struct(struct task_struct *tsk);
>  #define _TIF_FSCHECK		(1 << TIF_FSCHECK)
>  #define _TIF_32BIT		(1 << TIF_32BIT)
>  #define _TIF_SVE		(1 << TIF_SVE)
> +#define _TIF_SVE_NEEDS_FLUSH	(1 << TIF_SVE_NEEDS_FLUSH)
>  
>  #define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
>  				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
> -				 _TIF_UPROBE | _TIF_FSCHECK)
> +				 _TIF_UPROBE | _TIF_FSCHECK | \
> +				 _TIF_SVE_NEEDS_FLUSH)
>  
>  #define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
>  				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 1c6a82083d5c..ccbc38b71069 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -213,6 +213,8 @@ static bool have_cpu_fpsimd_context(void)
>   */
>  static void __sve_free(struct task_struct *task)
>  {
> +	/* SVE context will be zeroed when allocated. */
> +	clear_tsk_thread_flag(task, TIF_SVE_NEEDS_FLUSH);
>  	kfree(task->thread.sve_state);
>  	task->thread.sve_state = NULL;
>  }
> @@ -269,6 +271,11 @@ static void sve_free(struct task_struct *task)
>   *  * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state
>   *    irrespective of whether TIF_SVE is clear or set, since these are
>   *    not vector length dependent.
> + *
> + *  * When TIF_SVE_NEEDS_FLUSH is set, all the SVE registers but the first
> + *    128-bits of the Z-registers are logically zero but not stored anywhere.
> + *    Saving logically zero bits across context switches is therefore
> + *    pointless, although they must be zeroed before re-entering userspace.
>   */
>  
>  /*
> @@ -277,6 +284,14 @@ static void sve_free(struct task_struct *task)
>   * This function should be called only when the FPSIMD/SVE state in
>   * thread_struct is known to be up to date, when preparing to enter
>   * userspace.
> + *
> + * When TIF_SVE_NEEDS_FLUSH is set, the SVE state will be restored from the
> + * FPSIMD state.
> + *
> + * TIF_SVE_NEEDS_FLUSH and TIF_SVE set at the same time should never happen.
> + * In the unlikely case it happens, the code is able to cope with it. It will
> + * first restore the SVE registers and then flush them in
> + * fpsimd_restore_current_state.
>   */
>  static void task_fpsimd_load(void)
>  {
> @@ -287,6 +302,12 @@ static void task_fpsimd_load(void)
>  		sve_load_state(sve_pffr(&current->thread),
>  			       &current->thread.uw.fpsimd_state.fpsr,
>  			       sve_vq_from_vl(current->thread.sve_vl) - 1);

If we think that TIF_SVE_NEEDS_FLUSH and TIF_SVE should never be set at
the same time (have you satisfied yourself of this?), then I wonder if
it's worth having a WARN_ON(test_thread_flag(TIF_SVE_NEEDS_FLUSH)) here.

This may at least flag up if something suspicious is happening.

> +	else if (system_supports_sve() &&
> +		 test_and_clear_thread_flag(TIF_SVE_NEEDS_FLUSH)) {
> +		sve_load_from_fpsimd_state(&current->thread.uw.fpsimd_state,
> +					   sve_vq_from_vl(current->thread.sve_vl) - 1);
> +		set_thread_flag(TIF_SVE);
> +	}
>  	else

Nit: merge } and else onto one line.  (Possibly I mangled the patch
while editing this reply).

>  		fpsimd_load_state(&current->thread.uw.fpsimd_state);

It may still be preferable to merge the two system_supports_sve() calls,
since the compiler doesn't understand the constish-ness of this function.
I don't have a strong view on this though, or a good feel for the size of
the expected cost saving.

Unless you can think of something nicer, we could do something like:

	if (system_supports_sve()) {
		if (test_thread_flag(TIF_SVE)) {
			/* ... */
			return;
		} else if (test_and_clear_thread_flag(TIF_SVE_NEEDS_FLUSH)) {
			/* ... */
			return;
		}
	}

	/* otherwise */
	fpsimd_load_state(...);


>  }
> @@ -1163,6 +1184,17 @@ void fpsimd_restore_current_state(void)
>  		fpsimd_bind_task_to_cpu();
>  	}
>  
> +	if (system_supports_sve() &&
> +	    test_and_clear_thread_flag(TIF_SVE_NEEDS_FLUSH)) {

This if() and the previous one should be mutually exclusive, right?

It looks a bit weird to load the regs and then flush, but this won't
happen if TIF_FOREIGN_FPSTATE and TIF_SVE_NEEDS_FLUSH are never set at
the same time.

It would be worth checking that every bit of code that sets
TIF_FOREIGN_FPSTATE also clears TIF_SVE_NEEDS_FLUSH, or defining a
helper that ensures both are done together.

Similarly, it would be good to check that whenever TIF_SVE_NEEDS_FLUSH
gets set, TIF_FOREIGN_FPSTATE is clear.  This _should_ follow if
TIF_SVE_NEEDS_FLUSH is only set via el0_svc(), before reenabling
interrupts.  I believe that this is the case, but I haven't gone
digging.


We could perhaps make this an else if, if we're sufficiently
confident, but if so the comments at fpsimd_save() would need editing,
and we probably would want a strategic WARN() somewhere just in case
the two flags get erroneously set together.

> +		/*
> +		 * The userspace had SVE enabled on entry to the kernel
> +		 * and requires the state to be flushed.
> +		 */
> +		sve_flush_live();
> +		sve_user_enable();

Can we actually get here with SVE still disabled via CPACR_EL1 for EL0?

This might become true if future patches allow for a dynamic decision
about whether to disable SVE or flush the regs in place, but for now I
wonder whether it's possible: this patch removes the sve_user_disable()
call from sve_user_discard() after all.

Could be worth a comment, if nothing else.

> +		set_thread_flag(TIF_SVE);
> +	}
> +
>  	put_cpu_fpsimd_context();
>  }
>  
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 6089638c7d43..1352490a97bf 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -369,6 +369,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
>  	 */
>  	dst->thread.sve_state = NULL;
>  	clear_tsk_thread_flag(dst, TIF_SVE);
> +	clear_tsk_thread_flag(dst, TIF_SVE_NEEDS_FLUSH);
>  
>  	return 0;
>  }
> diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
> index 68b7f34a08f5..aed7230b1fb8 100644
> --- a/arch/arm64/kernel/ptrace.c
> +++ b/arch/arm64/kernel/ptrace.c
> @@ -900,6 +900,11 @@ static int sve_set(struct task_struct *target,
>  		ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
>  				SVE_PT_FPSIMD_OFFSET);
>  		clear_tsk_thread_flag(target, TIF_SVE);
> +		/*
> +		 * If ptrace requested to use FPSIMD, then don't try to
> +		 * re-enable SVE when the task is running again.
> +		 */
> +		clear_tsk_thread_flag(target, TIF_SVE_NEEDS_FLUSH);

I wonder whether we also want to do this when loading the SVE regs.

What happens if the task is at the syscall exit trap (so
TIF_SVE_NEEDS_FLUSH is set), and we try to update the SVE regs?  Do they
get flushed again before target re-enters userspace?

Similarly, what should we do if the tracer wants to read the regs and
TIF_SVE_NEEDS_FLUSH is set?  Should we give the tracer flushed versions
of the regs?


Or is TIF_SVE_NEEDS_FLUSH always clear, due fpsimd_save() being called
when target stopped?  In that case, we might reduce this to

	if (test_and_clear_tsk_thread_flag(target, TIF_SVE_NEEDS_FLUSH))
		WARN();

or similar.

>  		goto out;
>  	}
>  
> @@ -924,6 +929,8 @@ static int sve_set(struct task_struct *target,
>  	 */
>  	fpsimd_sync_to_sve(target);
>  	set_tsk_thread_flag(target, TIF_SVE);
> +	/* Don't flush SVE registers on return as ptrace will update them. */
> +	clear_tsk_thread_flag(target, TIF_SVE_NEEDS_FLUSH);

likewise

>  
>  	BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
>  	start = SVE_PT_SVE_OFFSET;
> diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
> index 5e9df0224609..5d80cdbbb603 100644
> --- a/arch/arm64/kernel/signal.c
> +++ b/arch/arm64/kernel/signal.c
> @@ -521,6 +521,17 @@ static int restore_sigframe(struct pt_regs *regs,
>  		} else {
>  			err = restore_fpsimd_context(user.fpsimd);
>  		}
> +
> +		/*
> +		 * When successfully restoring the:
> +		 *	- FPSIMD context, we don't want to re-enable SVE
> +		 *	- SVE context, we don't want to override what was
> +		 *	restored
> +		 */
> +		if (err == 0)
> +			clear_thread_flag(TIF_SVE_NEEDS_FLUSH);
> +
> +

I'm glad this is here -- if missing, we'd have a lot of fun debugging
weird sigreturn problems...

Nit: extra blank lines.

>  	}
>  
>  	return err;
> @@ -949,7 +960,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
>  				rseq_handle_notify_resume(NULL, regs);
>  			}
>  
> -			if (thread_flags & _TIF_FOREIGN_FPSTATE)
> +			if (thread_flags & (_TIF_FOREIGN_FPSTATE |
> +					    _TIF_SVE_NEEDS_FLUSH))
>  				fpsimd_restore_current_state();
>  		}
>  
> diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
> index 5f5b868292f5..78cccdecf818 100644
> --- a/arch/arm64/kernel/syscall.c
> +++ b/arch/arm64/kernel/syscall.c
> @@ -160,16 +160,13 @@ static inline void sve_user_discard(void)
>  	if (!system_supports_sve())
>  		return;
>  
> -	clear_thread_flag(TIF_SVE);
> -
>  	/*
> -	 * task_fpsimd_load() won't be called to update CPACR_EL1 in
> -	 * ret_to_user unless TIF_FOREIGN_FPSTATE is still set, which only
> -	 * happens if a context switch or kernel_neon_begin() or context
> -	 * modification (sigreturn, ptrace) intervenes.
> -	 * So, ensure that CPACR_EL1 is already correct for the fast-path case.
> +	 * TIF_SVE is cleared to save the FPSIMD state rather than the SVE
> +	 * state on context switch. The bit will be set again while
> +	 * restoring/zeroing the registers.
>  	 */
> -	sve_user_disable();
> +	if (test_and_clear_thread_flag(TIF_SVE))
> +		set_thread_flag(TIF_SVE_NEEDS_FLUSH);
>  }

Cheers
---Dave



More information about the linux-arm-kernel mailing list