[PATCH v5 1/2] arm64/sve: Don't disable SVE on syscalls return

Mark Brown broonie at kernel.org
Fri Nov 6 14:35:52 EST 2020


From: Julien Grall <julien.grall at arm.com>

Per the syscalls ABI the state of the SVE registers is unknown after a
syscall. In practice the kernel will disable SVE and zero all the
registers but the first 128-bits of the vector on the next SVE
instruction. In workloads mixing SVE and syscalls this will result in at
least one extra entry/exit to the kernel per syscall when the SVE
registers are accessed for the first time after the syscall.

To avoid the second entry/exit a new flag TIF_SVE_NEEDS_FLUSH is
introduced to mark a task that needs to flush the SVE context on
return to userspace.

On entry to a syscall the flag TIF_SVE will still be cleared, it will
be restored on return to userspace once the SVE state has been flushed.
This means that if a task requires to synchronize the FP state during a
syscall (e.g context switch, signal) only the FPSIMD registers will be
saved. When the task is rescheduled the SVE state will be loaded from
FPSIMD state.

We could instead handle flushing the SVE state in do_el0_svc() however
doing this reduces the potential for further optimisations such as
initializing the SVE registers directly from the FPSIMD state when
taking a SVE access trap and has some potential edge cases if we
schedule before we return to userspace after do_el0_svc().

Signed-off-by: Julien Grall <julien.grall at arm.com>
[Revisions in response to review comments, mainly clarifications around
documentation -- broonie]
Signed-off-by: Mark Brown <broonie at kernel.org>
---
 arch/arm64/include/asm/thread_info.h |   6 +-
 arch/arm64/kernel/fpsimd.c           | 123 +++++++++++++++++++++------
 arch/arm64/kernel/process.c          |   1 +
 arch/arm64/kernel/ptrace.c           |  11 +++
 arch/arm64/kernel/signal.c           |  16 +++-
 arch/arm64/kernel/syscall.c          |  13 ++-
 6 files changed, 131 insertions(+), 39 deletions(-)

diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 1fbab854a51b..cabca12f1dc0 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -68,6 +68,7 @@ void arch_release_task_struct(struct task_struct *tsk);
 #define TIF_UPROBE		4	/* uprobe breakpoint or singlestep */
 #define TIF_FSCHECK		5	/* Check FS is USER_DS on return */
 #define TIF_MTE_ASYNC_FAULT	6	/* MTE Asynchronous Tag Check Fault */
+#define TIF_SVE_NEEDS_FLUSH	7	/* Flush SVE registers on return */
 #define TIF_SYSCALL_TRACE	8	/* syscall trace active */
 #define TIF_SYSCALL_AUDIT	9	/* syscall auditing */
 #define TIF_SYSCALL_TRACEPOINT	10	/* syscall tracepoint for ftrace */
@@ -99,9 +100,12 @@ void arch_release_task_struct(struct task_struct *tsk);
 #define _TIF_SVE		(1 << TIF_SVE)
 #define _TIF_MTE_ASYNC_FAULT	(1 << TIF_MTE_ASYNC_FAULT)
 
+#define _TIF_SVE_NEEDS_FLUSH	(1 << TIF_SVE_NEEDS_FLUSH)
+
 #define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
 				 _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
-				 _TIF_UPROBE | _TIF_FSCHECK | _TIF_MTE_ASYNC_FAULT)
+				 _TIF_UPROBE | _TIF_FSCHECK | _TIF_MTE_ASYNC_FAULT | \
+				 _TIF_SVE_NEEDS_FLUSH)
 
 #define _TIF_SYSCALL_WORK	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
 				 _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 062b21f30f94..e2dbc23ecf19 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -215,6 +215,8 @@ static bool have_cpu_fpsimd_context(void)
  */
 static void __sve_free(struct task_struct *task)
 {
+	/* SVE context will be zeroed when allocated. */
+	clear_tsk_thread_flag(task, TIF_SVE_NEEDS_FLUSH);
 	kfree(task->thread.sve_state);
 	task->thread.sve_state = NULL;
 }
@@ -227,14 +229,45 @@ static void sve_free(struct task_struct *task)
 }
 
 /*
+ * In order to avoid the expense of storing the SVE registers when not
+ * in active use by tasks we keep track of the task's SVE usage and
+ * only allocate space for SVE registers for tasks that need it.
+ *
  * TIF_SVE controls whether a task can use SVE without trapping while
  * in userspace, and also the way a task's FPSIMD/SVE state is stored
- * in thread_struct.
+ * in thread_struct. The kernel uses this flag to track whether a
+ * user task has active SVE state, and therefore whether full SVE
+ * register state needs to be tracked.  If not, the cheaper FPSIMD
+ * context handling code can be used instead of the more costly SVE
+ * equivalents.
+ *
+ * Since syscalls cause all the SVE register state other than that
+ * shared with FPSIMD to be discarded we also optimize the case where
+ * only the subset of SVE registers shared with FPSIMD needs to be
+ * stored, using an additional flag TIF_SVE_NEEDS_FLUSH to indicate
+ * this case.
+ *
+ *
+ *  * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state
+ *    irrespective of any flags, since these are not vector length
+ *    dependent.
+ *
+ *  * TIF_SVE and TIF_SVE_NEED_FLUSH both clear:
+ *
+ *    An attempt by the user task to execute an SVE instruction causes
+ *    do_sve_acc() to be called, which does some preparation and sets
+ *    TIF_SVE_NEEDS_FLUSH which will then in turn set TIF_SVE on
+ *    return to userspace.
+ *
+ *    When stored, FPSIMD registers V0-V31 are encoded in
+ *    task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are
+ *    logically zero but not stored anywhere; P0-P15 and FFR are not
+ *    stored and have unspecified values from userspace's point of
+ *    view.  For hygiene purposes, the kernel zeroes them on next use,
+ *    but userspace is discouraged from relying on this.
  *
- * The kernel uses this flag to track whether a user task is actively
- * using SVE, and therefore whether full SVE register state needs to
- * be tracked.  If not, the cheaper FPSIMD context handling code can
- * be used instead of the more costly SVE equivalents.
+ *    task->thread.sve_state does not need to be non-NULL, valid or any
+ *    particular size: it must not be dereferenced.
  *
  *  * TIF_SVE set:
  *
@@ -249,28 +282,25 @@ static void sve_free(struct task_struct *task)
  *    task->thread.sve_state must point to a valid buffer at least
  *    sve_state_size(task) bytes in size.
  *
- *    During any syscall, the kernel may optionally clear TIF_SVE and
- *    discard the vector state except for the FPSIMD subset.
+ *    During any syscall the ABI allows the kernel to discard the
+ *    vector state other than the FPSIMD subset. When this is done
+ *    TIF_SVE_NEEDS_FLUSH will be set and TIF_SVE will be cleared.
  *
- *  * TIF_SVE clear:
+ *  * TIF_SVE_NEEDS_FLUSH set:
  *
- *    An attempt by the user task to execute an SVE instruction causes
- *    do_sve_acc() to be called, which does some preparation and then
- *    sets TIF_SVE.
+ *    The task can execute SVE instructions while in userspace without
+ *    trapping to the kernel.
  *
- *    When stored, FPSIMD registers V0-V31 are encoded in
- *    task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are
- *    logically zero but not stored anywhere; P0-P15 and FFR are not
- *    stored and have unspecified values from userspace's point of
- *    view.  For hygiene purposes, the kernel zeroes them on next use,
- *    but userspace is discouraged from relying on this.
+ *    The SVE register state is stored as for the case with both
+ *    TIF_SVE and TIF_SVE_NEEDS_FLUSH clear. On return to userspace
+ *    the SVE registers will be set and TIF_SVE will be set.
  *
- *    task->thread.sve_state does not need to be non-NULL, valid or any
- *    particular size: it must not be dereferenced.
+ *  * TIF_SVE and TIF_SVE_NEED_FLUSH both set:
+ *
+ *    If both TIF_SVE and TIF_SVE_NEED_FLUSH are set then
+ *    TIF_SVE_NEED_FLUSH takes precedence, setting it is treated as
+ *    invalidating the SVE state.
  *
- *  * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state
- *    irrespective of whether TIF_SVE is clear or set, since these are
- *    not vector length dependent.
  */
 
 /*
@@ -279,18 +309,37 @@ static void sve_free(struct task_struct *task)
  * This function should be called only when the FPSIMD/SVE state in
  * thread_struct is known to be up to date, when preparing to enter
  * userspace.
+ *
+ * When TIF_SVE_NEEDS_FLUSH is set, the SVE state will be restored from the
+ * FPSIMD state.
+ *
+ * TIF_SVE_NEEDS_FLUSH and TIF_SVE set at the same time should never happen.
+ * In the unlikely case it happens, the code is able to cope with it. It will
+ * first restore the SVE registers and then flush them in
+ * fpsimd_restore_current_state.
  */
 static void task_fpsimd_load(void)
 {
 	WARN_ON(!system_supports_fpsimd());
 	WARN_ON(!have_cpu_fpsimd_context());
 
-	if (system_supports_sve() && test_thread_flag(TIF_SVE))
-		sve_load_state(sve_pffr(&current->thread),
-			       &current->thread.uw.fpsimd_state.fpsr,
-			       sve_vq_from_vl(current->thread.sve_vl) - 1);
-	else
-		fpsimd_load_state(&current->thread.uw.fpsimd_state);
+	/* Ensure that we only evaluate system_supports_sve() once. */
+	if (system_supports_sve()) {
+		if (test_and_clear_thread_flag(TIF_SVE_NEEDS_FLUSH)) {
+			sve_load_from_fpsimd_state(&current->thread.uw.fpsimd_state,
+				   sve_vq_from_vl(current->thread.sve_vl) - 1);
+			set_thread_flag(TIF_SVE);
+			return;
+
+		} else if (test_thread_flag(TIF_SVE)) {
+			sve_load_state(sve_pffr(&current->thread),
+				       &current->thread.uw.fpsimd_state.fpsr,
+				       sve_vq_from_vl(current->thread.sve_vl) - 1);
+			return;
+		}
+	}
+
+	fpsimd_load_state(&current->thread.uw.fpsimd_state);
 }
 
 /*
@@ -1161,10 +1210,28 @@ void fpsimd_restore_current_state(void)
 	get_cpu_fpsimd_context();
 
 	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
+		/*
+		 * If TIF_SVE_NEEDS_FLUSH is set this takes care of
+		 * restoring the SVE state that is preserved over
+		 * syscalls should we have context switched.
+		 */
 		task_fpsimd_load();
 		fpsimd_bind_task_to_cpu();
 	}
 
+	if (test_and_clear_thread_flag(TIF_SVE_NEEDS_FLUSH)) {
+		/*
+		 * The userspace had SVE enabled on entry to the kernel
+		 * and requires the state to be flushed.
+		 *
+		 * We rely on the vector length to be set correctly beforehand
+		 * when converting a loaded FPSIMD state to SVE state.
+		 */
+		sve_flush_live();
+		sve_user_enable();
+		set_thread_flag(TIF_SVE);
+	}
+
 	put_cpu_fpsimd_context();
 }
 
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 4784011cecac..197ecf885ace 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -371,6 +371,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 	 */
 	dst->thread.sve_state = NULL;
 	clear_tsk_thread_flag(dst, TIF_SVE);
+	clear_tsk_thread_flag(dst, TIF_SVE_NEEDS_FLUSH);
 
 	/* clear any pending asynchronous tag fault raised by the parent */
 	clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index f49b349e16a3..bf0434d61d68 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -769,6 +769,10 @@ static int sve_get(struct task_struct *target,
 
 	/* Otherwise: full SVE case */
 
+	/* The flush should have happened when the thread was stopped */
+	if (test_and_clear_tsk_thread_flag(target, TIF_SVE_NEEDS_FLUSH))
+		WARN(1, "TIF_SVE_NEEDS_FLUSH was set");
+
 	BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
 	start = SVE_PT_SVE_OFFSET;
 	end = SVE_PT_SVE_FFR_OFFSET(vq) + SVE_PT_SVE_FFR_SIZE(vq);
@@ -831,6 +835,11 @@ static int sve_set(struct task_struct *target,
 		ret = __fpr_set(target, regset, pos, count, kbuf, ubuf,
 				SVE_PT_FPSIMD_OFFSET);
 		clear_tsk_thread_flag(target, TIF_SVE);
+		/*
+		 * If ptrace requested to use FPSIMD, then don't try to
+		 * re-enable SVE when the task is running again.
+		 */
+		clear_tsk_thread_flag(target, TIF_SVE_NEEDS_FLUSH);
 		goto out;
 	}
 
@@ -855,6 +864,8 @@ static int sve_set(struct task_struct *target,
 	 */
 	fpsimd_sync_to_sve(target);
 	set_tsk_thread_flag(target, TIF_SVE);
+	/* Don't flush SVE registers on return as ptrace will update them. */
+	clear_tsk_thread_flag(target, TIF_SVE_NEEDS_FLUSH);
 
 	BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
 	start = SVE_PT_SVE_OFFSET;
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index a8184cad8890..53fba6a1a575 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -307,8 +307,10 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
 	__get_user_error(fpsimd.fpcr, &user->fpsimd->fpcr, err);
 
 	/* load the hardware registers from the fpsimd_state structure */
-	if (!err)
+	if (!err) {
+		clear_thread_flag(TIF_SVE_NEEDS_FLUSH);
 		fpsimd_update_current_state(&fpsimd);
+	}
 
 	return err ? -EFAULT : 0;
 }
@@ -521,6 +523,15 @@ static int restore_sigframe(struct pt_regs *regs,
 		} else {
 			err = restore_fpsimd_context(user.fpsimd);
 		}
+
+		/*
+		 * When successfully restoring the:
+		 *	- FPSIMD context, we don't want to re-enable SVE
+		 *	- SVE context, we don't want to override what was
+		 *	restored
+		 */
+		if (err == 0)
+			clear_thread_flag(TIF_SVE_NEEDS_FLUSH);
 	}
 
 	return err;
@@ -950,7 +961,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
 				rseq_handle_notify_resume(NULL, regs);
 			}
 
-			if (thread_flags & _TIF_FOREIGN_FPSTATE)
+			if (thread_flags & (_TIF_FOREIGN_FPSTATE |
+					    _TIF_SVE_NEEDS_FLUSH))
 				fpsimd_restore_current_state();
 		}
 
diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c
index e4c0dadf0d92..6e5857045474 100644
--- a/arch/arm64/kernel/syscall.c
+++ b/arch/arm64/kernel/syscall.c
@@ -187,16 +187,13 @@ static inline void sve_user_discard(void)
 	if (!system_supports_sve())
 		return;
 
-	clear_thread_flag(TIF_SVE);
-
 	/*
-	 * task_fpsimd_load() won't be called to update CPACR_EL1 in
-	 * ret_to_user unless TIF_FOREIGN_FPSTATE is still set, which only
-	 * happens if a context switch or kernel_neon_begin() or context
-	 * modification (sigreturn, ptrace) intervenes.
-	 * So, ensure that CPACR_EL1 is already correct for the fast-path case.
+	 * TIF_SVE is cleared to save the FPSIMD state rather than the SVE
+	 * state on context switch. The bit will be set again while
+	 * restoring/zeroing the registers.
 	 */
-	sve_user_disable();
+	if (test_and_clear_thread_flag(TIF_SVE))
+		set_thread_flag(TIF_SVE_NEEDS_FLUSH);
 }
 
 void do_el0_svc(struct pt_regs *regs)
-- 
2.20.1




More information about the linux-arm-kernel mailing list