[PATCH v4 7/8] arm64/sve: Don't disable SVE on syscalls return

Mark Brown broonie at kernel.org
Fri Aug 28 14:11:54 EDT 2020


From: Julien Grall <julien.grall at arm.com>

Per the syscalls ABI the state of the SVE registers is unknown after a
syscall. In practice the kernel will disable SVE and zero all the
registers but the first 128-bits of the vector on the next SVE
instruction. In workloads mixing SVE and syscalls this will result in at
least one extra entry/exit to the kernel per syscall when the SVE
registers are accessed for the first time after the syscall.

To avoid the second entry/exit a new flag TIF_SVE_NEEDS_FLUSH is
introduced to mark a task that needs to flush the SVE context on
return to userspace.

On entry to a syscall the flag TIF_SVE will still be cleared, it will
be restored on return to userspace once the SVE state has been flushed.
This means that if a task requires to synchronize the FP state during a
syscall (e.g context switch, signal) only the FPSIMD registers will be
saved. When the task is rescheduled the SVE state will be loaded from
FPSIMD state.

We could instead handle flushing the SVE state in do_el0_svc() however
doing this reduces the potential for further optimisations such as
initializing the SVE registers directly from the FPSIMD state when
taking a SVE access trap and has some potential edge cases if we
schedule before we return to userspace after do_el0_svc().

Signed-off-by: Julien Grall <julien.grall at arm.com>
Signed-off-by: Mark Brown <broonie at kernel.org>
---
 arch/arm64/include/asm/thread_info.h |  6 ++-
 arch/arm64/kernel/fpsimd.c           | 61 +++++++++++++++++++++++++---
 arch/arm64/kernel/process.c          |  1 +
 arch/arm64/kernel/ptrace.c           | 11 +++++
 arch/arm64/kernel/signal.c           | 16 +++++++-
 arch/arm64/kernel/syscall.c          | 13 +++---
 6 files changed, 91 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 5e784e16ee89..dfaf872c0a07 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -67,6 +67,7 @@ void arch_release_task_struct(struct task_struct *tsk);
 #define TIF_FOREIGN_FPSTATE	3	/* CPU's FP state is not current's */
 #define TIF_UPROBE		4	/* uprobe breakpoint or singlestep */
 #define TIF_FSCHECK		5	/* Check FS is USER_DS on return */
+#define TIF_SVE_NEEDS_FLUSH	6	/* Flush SVE registers on return */
 #define TIF_SYSCALL_TRACE	8	/* syscall trace active */
 #define TIF_SYSCALL_AUDIT	9	/* syscall auditing */
 #define TIF_SYSCALL_TRACEPOINT	10	/* syscall tracepoint for ftrace */
@@ -97,9 +98,12 @@ void arch_release_task_struct(struct task_struct *tsk);
 #define _TIF_32BIT		(1 << TIF_32BIT)
 #define _TIF_SVE		(1 << TIF_SVE)
 
+#define _TIF_SVE_NEEDS_FLUSH	(1 << TIF_SVE_NEEDS_FLUSH)
+
 #define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
 				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
-				 _TIF_UPROBE | _TIF_FSCHECK)
+				 _TIF_UPROBE | _TIF_FSCHECK | \
+				 _TIF_SVE_NEEDS_FLUSH)
 
 #define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
 				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 1c6a82083d5c..b0fc8823d731 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -213,6 +213,8 @@ static bool have_cpu_fpsimd_context(void)
  */
 static void __sve_free(struct task_struct *task)
 {
+	/* SVE context will be zeroed when allocated. */
+	clear_tsk_thread_flag(task, TIF_SVE_NEEDS_FLUSH);
 	kfree(task->thread.sve_state);
 	task->thread.sve_state = NULL;
 }
@@ -269,6 +271,14 @@ static void sve_free(struct task_struct *task)
  *  * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state
  *    irrespective of whether TIF_SVE is clear or set, since these are
  *    not vector length dependent.
+ *
+ *  * When TIF_SVE_NEEDS_FLUSH is set, all the SVE registers but the first
+ *    128-bits of the Z-registers are logically zero but not stored anywhere.
+ *    Saving logically zero bits across context switches is therefore
+ *    pointless, although they must be zeroed before re-entering userspace.
+ *    This can be set at the same time as TIF_FPSIMD_FOREIGN_STATE, when it
+ *    is then the first 128 bits of the SVE registers will be restored from
+ *    the FPSIMD state.
  */
 
 /*
@@ -277,18 +287,38 @@ static void sve_free(struct task_struct *task)
  * This function should be called only when the FPSIMD/SVE state in
  * thread_struct is known to be up to date, when preparing to enter
  * userspace.
+ *
+ * When TIF_SVE_NEEDS_FLUSH is set, the SVE state will be restored from the
+ * FPSIMD state.
+ *
+ * TIF_SVE_NEEDS_FLUSH and TIF_SVE set at the same time should never happen.
+ * In the unlikely case it happens, the code is able to cope with it. It will
+ * first restore the SVE registers and then flush them in
+ * fpsimd_restore_current_state.
  */
 static void task_fpsimd_load(void)
 {
 	WARN_ON(!system_supports_fpsimd());
 	WARN_ON(!have_cpu_fpsimd_context());
 
-	if (system_supports_sve() && test_thread_flag(TIF_SVE))
-		sve_load_state(sve_pffr(&current->thread),
-			       &current->thread.uw.fpsimd_state.fpsr,
-			       sve_vq_from_vl(current->thread.sve_vl) - 1);
-	else
-		fpsimd_load_state(&current->thread.uw.fpsimd_state);
+	/* Ensure that we only evaluate system_supports_sve() once. */
+	if (system_supports_sve()) {
+		if (test_thread_flag(TIF_SVE)) {
+			WARN_ON_ONCE(test_thread_flag(TIF_SVE_NEEDS_FLUSH));
+			sve_load_state(sve_pffr(&current->thread),
+				       &current->thread.uw.fpsimd_state.fpsr,
+				       sve_vq_from_vl(current->thread.sve_vl) - 1);
+			return;
+		} else if (test_and_clear_thread_flag(TIF_SVE_NEEDS_FLUSH)) {
+			WARN_ON_ONCE(test_thread_flag(TIF_SVE));
+			sve_load_from_fpsimd_state(&current->thread.uw.fpsimd_state,
+				   sve_vq_from_vl(current->thread.sve_vl) - 1);
+			set_thread_flag(TIF_SVE);
+			return;
+		}
+	}
+
+	fpsimd_load_state(&current->thread.uw.fpsimd_state);
 }
 
 /*
@@ -1159,10 +1189,29 @@ void fpsimd_restore_current_state(void)
 	get_cpu_fpsimd_context();
 
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
+		/*
+		 * If TIF_SVE_NEEDS_FLUSH is set this takes care of
+		 * restoring the SVE state that is preserved over
+		 * syscalls should we have context switched.
+		 */
 		task_fpsimd_load();
 		fpsimd_bind_task_to_cpu();
 	}
 
+	if (system_supports_sve() &&
+	    test_and_clear_thread_flag(TIF_SVE_NEEDS_FLUSH)) {
+		/*
+		 * The userspace had SVE enabled on entry to the kernel
+		 * and requires the state to be flushed.
+		 *
+		 * We rely on the vector length to be set correctly beforehand
+		 * when converting a loaded FPSIMD state to SVE state.
+		 */
+		sve_flush_live();
+		sve_user_enable();
+		set_thread_flag(TIF_SVE);
+	}
+
 	put_cpu_fpsimd_context();
 }
 
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index b63ce4c54cfe..db951c63fc6a 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -369,6 +369,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 	 */
 	dst->thread.sve_state = NULL;
 	clear_tsk_thread_flag(dst, TIF_SVE);
+	clear_tsk_thread_flag(dst, TIF_SVE_NEEDS_FLUSH);
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index d8ebfd813e28..2ab7102f5fd7 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -768,6 +768,10 @@ static int sve_get(struct task_struct *target,
 
 	/* Otherwise: full SVE case */
 
+	/* The flush should have happened when the thread was stopped */
+	if (test_and_clear_tsk_thread_flag(target, TIF_SVE_NEEDS_FLUSH))
+		WARN(1, "TIF_SVE_NEEDS_FLUSH was set");
+
 	BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
 	start = SVE_PT_SVE_OFFSET;
 	end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
@@ -830,6 +834,11 @@ static int sve_set(struct task_struct *target,
 		ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
 				SVE_PT_FPSIMD_OFFSET);
 		clear_tsk_thread_flag(target, TIF_SVE);
+		/*
+		 * If ptrace requested to use FPSIMD, then don't try to
+		 * re-enable SVE when the task is running again.
+		 */
+		clear_tsk_thread_flag(target, TIF_SVE_NEEDS_FLUSH);
 		goto out;
 	}
 
@@ -854,6 +863,8 @@ static int sve_set(struct task_struct *target,
 	 */
 	fpsimd_sync_to_sve(target);
 	set_tsk_thread_flag(target, TIF_SVE);
+	/* Don't flush SVE registers on return as ptrace will update them. */
+	clear_tsk_thread_flag(target, TIF_SVE_NEEDS_FLUSH);
 
 	BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
 	start = SVE_PT_SVE_OFFSET;
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index f3af68dc1cf8..0dec8e19edb8 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -307,8 +307,10 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
 	__get_user_error(fpsimd.fpcr, &user->fpsimd->fpcr, err);
 
 	/* load the hardware registers from the fpsimd_state structure */
-	if (!err)
+	if (!err) {
+		clear_thread_flag(TIF_SVE_NEEDS_FLUSH);
 		fpsimd_update_current_state(&fpsimd);
+	}
 
 	return err ? -EFAULT : 0;
 }
@@ -521,6 +523,15 @@ static int restore_sigframe(struct pt_regs *regs,
 		} else {
 			err = restore_fpsimd_context(user.fpsimd);
 		}
+
+		/*
+		 * When successfully restoring the:
+		 *	- FPSIMD context, we don't want to re-enable SVE
+		 *	- SVE context, we don't want to override what was
+		 *	restored
+		 */
+		if (err == 0)
+			clear_thread_flag(TIF_SVE_NEEDS_FLUSH);
 	}
 
 	return err;
@@ -942,7 +953,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
 				rseq_handle_notify_resume(NULL, regs);
 			}
 
-			if (thread_flags & _TIF_FOREIGN_FPSTATE)
+			if (thread_flags & (_TIF_FOREIGN_FPSTATE |
+					    _TIF_SVE_NEEDS_FLUSH))
 				fpsimd_restore_current_state();
 		}
 
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index 5f0c04863d2c..5b652b33498a 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -177,16 +177,13 @@ static inline void sve_user_discard(void)
 	if (!system_supports_sve())
 		return;
 
-	clear_thread_flag(TIF_SVE);
-
 	/*
-	 * task_fpsimd_load() won't be called to update CPACR_EL1 in
-	 * ret_to_user unless TIF_FOREIGN_FPSTATE is still set, which only
-	 * happens if a context switch or kernel_neon_begin() or context
-	 * modification (sigreturn, ptrace) intervenes.
-	 * So, ensure that CPACR_EL1 is already correct for the fast-path case.
+	 * TIF_SVE is cleared to save the FPSIMD state rather than the SVE
+	 * state on context switch. The bit will be set again while
+	 * restoring/zeroing the registers.
 	 */
-	sve_user_disable();
+	if (test_and_clear_thread_flag(TIF_SVE))
+		set_thread_flag(TIF_SVE_NEEDS_FLUSH);
 }
 
 void do_el0_svc(struct pt_regs *regs)
-- 
2.20.1




More information about the linux-arm-kernel mailing list