[v7, 10/10] riscv: vector: allow kernel-mode Vector with preemption

Andy Chiu andy.chiu at sifive.com
Thu Dec 21 05:43:17 PST 2023


Add kernel_vstate to keep track of kernel-mode Vector registers when
trap introduced context switch happens. Also, provide riscv_v_flags to
let context save/restore routine track context status. Context tracking
happens whenever the core starts its in-kernel Vector executions. An
active (dirty) kernel task's V contexts will be saved to memory whenever
a trap-introduced context switch happens. Or, when a softirq, which
happens to nest on top of it, uses Vector. Context retoring happens when
the execution transfer back to the original Kernel context where it
first enable preempt_v.

Also, provide a config CONFIG_RISCV_ISA_V_PREEMPTIVE to give users an
option to disable preemptible kernel-mode Vector at build time. Users
with constraint memory may want to disable this config as preemptible
kernel-mode Vector needs extra space for tracking of per thread's
kernel-mode V context. Or, users might as well want to disable it if all
kernel-mode Vector code is time sensitive and cannot tolerate context
switch overhead.

Signed-off-by: Andy Chiu <andy.chiu at sifive.com>
---
Changelog v6:
 - re-write patch to handle context nesting for softirqs
 - drop thread flag and track context instead in riscv_v_flags
 - refine some asm code and constraint it into C functions
 - preallocate v context for preempt_v
 - Return non-zero in riscv_v_start_kernel_context with non-preemptible
   kernel-mode Vector
Changelog v4:
 - dropped from v4
Changelog v3:
 - Guard vstate_save with {get,set}_cpu_vector_context
 - Add comments on preventions of nesting V contexts
 - remove warnings in context switch when trap's reg is not pressent (Conor)
 - refactor code (Björn)
Changelog v2:
 - fix build fail when compiling without RISCV_ISA_V (Conor)
 - 's/TIF_RISCV_V_KMV/TIF_RISCV_V_KERNEL_MODE' and add comment (Conor)
 - merge Kconfig patch into this oine (Conor).
 - 's/CONFIG_RISCV_ISA_V_PREEMPTIVE_KMV/CONFIG_RISCV_ISA_V_PREEMPTIVE/'
   (Conor)
 - fix some typos (Conor)
 - enclose assembly with RISCV_ISA_V_PREEMPTIVE.
 - change riscv_v_vstate_ctrl_config_kmv() to
   kernel_vector_allow_preemption() for better understanding. (Conor)
 - 's/riscv_v_kmv_preempitble/kernel_vector_preemptible/'
---
 arch/riscv/Kconfig                     |  14 +++
 arch/riscv/include/asm/processor.h     |  26 +++++-
 arch/riscv/include/asm/simd.h          |  26 +++++-
 arch/riscv/include/asm/vector.h        |  57 +++++++++++-
 arch/riscv/kernel/entry.S              |   8 ++
 arch/riscv/kernel/kernel_mode_vector.c | 121 ++++++++++++++++++++++++-
 arch/riscv/kernel/process.c            |   3 +
 arch/riscv/kernel/vector.c             |  31 +++++--
 8 files changed, 265 insertions(+), 21 deletions(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index cba53dcc2ae0..70603c486593 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -557,6 +557,20 @@ config RISCV_ISA_V_MEMMOVE_THRESHOLD
 	  Prefer using vectorized memmove() when the workload size exceeds this
 	  value.
 
+config RISCV_ISA_V_PREEMPTIVE
+	bool "Run kernel-mode Vector with kernel preemption"
+	depends on PREEMPTION
+	depends on RISCV_ISA_V
+	default y
+	help
+	  Usually, in-kernel SIMD routines are run with preemption disabled.
+	  Functions which envoke long running SIMD thus must yield core's
+	  vector unit to prevent blocking other tasks for too long.
+
+	  This config allows kernel to run SIMD without explicitly disable
+	  preemption. Enabling this config will result in higher memory
+	  consumption due to the allocation of per-task's kernel Vector context.
+
 config TOOLCHAIN_HAS_ZBB
 	bool
 	default y
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 15781e2232e0..4de9124bcf4f 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -81,11 +81,32 @@ struct pt_regs;
  *    activation of this state disables the preemption. On a non-RT kernel, it
  *    also disable bh. Currently only 0 and 1 are valid value for this field.
  *    Other values are reserved for future uses.
+ *  - bits 8-15 are used for tracking preemptible kernel-mode Vector, when
+ *    RISCV_ISA_V_PREEMPTIVE is set. Calling kernel_vector_begin() does not
+ *    disable the preemption if the thread's kernel_vstate.datap is allocated.
+ *    Instead, the kernel adds 1 into this field. Then the trap entry/exit code
+ *    knows if we are entering/exiting the context that owns preempt_v.
+ *     - 0: the task is not using preempt_v
+ *     - 1: the task is actively using, and owns preempt_v
+ *     - >1: the task was using preempt_v, but then took a trap within. Thus,
+ *       the task does not own preempt_v. Any use of Vector will have to save
+ *       preempt_v, if dirty, and fallback to non-preemptible kernel-mode
+ *       Vector.
+ *   - bit 30: The in-kernel preempt_v context is saved, and requries to be
+ *     restored when returning to the context that owns the preempt_v.
+ *   - bit 31: The in-kernel preempt_v context is dirty, as signaled by the
+ *     trap entry code. Any context switches out-of current task need to save
+ *     it to the task's in-kernel V context. Also, any traps nesting on-top-of
+ *     preempt_v requesting to use V needs a save.
  */
 
-#define RISCV_KERNEL_MODE_V_MASK	0xff
+#define RISCV_KERNEL_MODE_V_MASK	0x000000ff
+#define RISCV_PREEMPT_V_MASK		0x0000ff00
 
-#define RISCV_KERNEL_MODE_V	0x1
+#define RISCV_KERNEL_MODE_V		0x00000001
+#define RISCV_PREEMPT_V			0x00000100
+#define RISCV_PREEMPT_V_DIRTY		0x80000000
+#define RISCV_PREEMPT_V_NEED_RESTORE	0x40000000
 
 /* CPU-specific state of a task */
 struct thread_struct {
@@ -99,6 +120,7 @@ struct thread_struct {
 	u32 vstate_ctrl;
 	struct __riscv_v_ext_state vstate;
 	unsigned long align_ctl;
+	struct __riscv_v_ext_state kernel_vstate;
 };
 
 /* Whitelist the fstate from the task_struct for hardened usercopy */
diff --git a/arch/riscv/include/asm/simd.h b/arch/riscv/include/asm/simd.h
index 7df5a976a80a..3fe6f201ee6e 100644
--- a/arch/riscv/include/asm/simd.h
+++ b/arch/riscv/include/asm/simd.h
@@ -12,6 +12,7 @@
 #include <linux/percpu.h>
 #include <linux/preempt.h>
 #include <linux/types.h>
+#include <linux/thread_info.h>
 
 #include <asm/vector.h>
 
@@ -28,12 +29,27 @@ static __must_check inline bool may_use_simd(void)
 	/*
 	 * RISCV_KERNEL_MODE_V is only set while preemption is disabled,
 	 * and is clear whenever preemption is enabled.
-	 *
-	 * Kernel-mode Vector temperarily disables bh. So we must not return
-	 * true on irq_disabled(). Otherwise we would fail the lockdep check
-	 * calling local_bh_enable()
 	 */
-	return !in_hardirq() && !in_nmi() && !irqs_disabled() && !(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK);
+	if (in_hardirq() || in_nmi())
+		return false;
+
+	/*
+	 * Nesting is acheived in preempt_v by spreading the control for
+	 * preemptible and non-preemptible kernel-mode Vector into two fields.
+	 * Always try to match with prempt_v if kernel V-context exists. Then,
+	 * fallback to check non preempt_v if nesting happens, or if the config
+	 * is not set.
+	 */
+	if (IS_ENABLED(CONFIG_RISCV_ISA_V_PREEMPTIVE) && current->thread.kernel_vstate.datap) {
+		if (!riscv_preempt_v_started(current))
+			return true;
+	}
+	/*
+	 * Non-preemptible kernel-mode Vector temperarily disables bh. So we
+	 * must not return true on irq_disabled(). Otherwise we would fail the
+	 * lockdep check calling local_bh_enable()
+	 */
+	return !irqs_disabled() && !(riscv_v_ctx_cnt() & RISCV_KERNEL_MODE_V_MASK);
 }
 
 #else /* ! CONFIG_RISCV_ISA_V */
diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index 0e6741dd9ef3..542eaf9227c3 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -28,6 +28,7 @@ void get_cpu_vector_context(void);
 void put_cpu_vector_context(void);
 void riscv_v_thread_free(struct task_struct *tsk);
 void __init riscv_v_setup_ctx_cache(void);
+void riscv_v_thread_alloc(struct task_struct *tsk);
 
 static inline void riscv_v_ctx_cnt_add(u32 offset)
 {
@@ -212,14 +213,63 @@ static inline void riscv_v_vstate_set_restore(struct task_struct *task,
 	}
 }
 
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+static inline bool riscv_preempt_v_dirty(struct task_struct *task)
+{
+	u32 val = READ_ONCE(task->thread.riscv_v_flags);
+
+	return !!(val & RISCV_PREEMPT_V_DIRTY);
+}
+
+static inline bool riscv_preempt_v_restore(struct task_struct *task)
+{
+	u32 val = READ_ONCE(task->thread.riscv_v_flags);
+
+	return !!(val & RISCV_PREEMPT_V_NEED_RESTORE);
+}
+
+static inline void riscv_preempt_v_clear_dirty(struct task_struct *task)
+{
+	barrier();
+	task->thread.riscv_v_flags &= ~RISCV_PREEMPT_V_DIRTY;
+}
+
+static inline void riscv_preempt_v_set_restore(struct task_struct *task)
+{
+	barrier();
+	task->thread.riscv_v_flags |= RISCV_PREEMPT_V_NEED_RESTORE;
+}
+
+static inline bool riscv_preempt_v_started(struct task_struct *task)
+{
+	return !!(READ_ONCE(task->thread.riscv_v_flags) & RISCV_PREEMPT_V_MASK);
+}
+#else /* !CONFIG_RISCV_ISA_V_PREEMPTIVE */
+static inline bool riscv_preempt_v_dirty(struct task_struct *task) { return false; }
+static inline bool riscv_preempt_v_restore(struct task_struct *task) { return false; }
+static inline bool riscv_preempt_v_started(struct task_struct *task) { return false; }
+#define riscv_preempt_v_clear_dirty(tsk)	do {} while (0)
+#define riscv_preempt_v_set_restore(tsk)	do {} while (0)
+#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
+
 static inline void __switch_to_vector(struct task_struct *prev,
 				      struct task_struct *next)
 {
 	struct pt_regs *regs;
 
-	regs = task_pt_regs(prev);
-	riscv_v_vstate_save(&prev->thread.vstate, regs);
-	riscv_v_vstate_set_restore(next, task_pt_regs(next));
+	if (riscv_preempt_v_dirty(prev)) {
+		__riscv_v_vstate_save(&prev->thread.kernel_vstate,
+				      prev->thread.kernel_vstate.datap);
+		riscv_preempt_v_clear_dirty(prev);
+	} else {
+		regs = task_pt_regs(prev);
+		riscv_v_vstate_save(&prev->thread.vstate, regs);
+	}
+
+	if (riscv_preempt_v_started(next))
+		riscv_preempt_v_set_restore(next);
+	else
+		riscv_v_vstate_set_restore(next, task_pt_regs(next));
 }
 
 void riscv_v_vstate_ctrl_init(struct task_struct *tsk);
@@ -243,6 +293,7 @@ static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }
 #define riscv_v_vstate_on(regs)			do {} while (0)
 #define riscv_v_thread_free(tsk)		do {} while (0)
 #define  riscv_v_setup_ctx_cache()		do {} while (0)
+#define riscv_v_thread_alloc(tsk)		do {} while (0)
 
 #endif /* CONFIG_RISCV_ISA_V */
 
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 54ca4564a926..9d1a305d5508 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -83,6 +83,10 @@ SYM_CODE_START(handle_exception)
 	/* Load the kernel shadow call stack pointer if coming from userspace */
 	scs_load_current_if_task_changed s5
 
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+	move a0, sp
+	call riscv_v_context_nesting_start
+#endif
 	move a0, sp /* pt_regs */
 	la ra, ret_from_exception
 
@@ -138,6 +142,10 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
 	 */
 	csrw CSR_SCRATCH, tp
 1:
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+	move a0, sp
+	call riscv_v_context_nesting_end
+#endif
 	REG_L a0, PT_STATUS(sp)
 	/*
 	 * The current load reservation is effectively part of the processor's
diff --git a/arch/riscv/kernel/kernel_mode_vector.c b/arch/riscv/kernel/kernel_mode_vector.c
index 238154cb4fce..6a7df511ccdc 100644
--- a/arch/riscv/kernel/kernel_mode_vector.c
+++ b/arch/riscv/kernel/kernel_mode_vector.c
@@ -50,6 +50,111 @@ void put_cpu_vector_context(void)
 		preempt_enable();
 }
 
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+static inline void riscv_preempt_v_set_dirty(void)
+{
+	current->thread.riscv_v_flags |= RISCV_PREEMPT_V_DIRTY;
+}
+
+static inline void riscv_preempt_v_reset_flags(void)
+{
+	current->thread.riscv_v_flags &= ~(RISCV_PREEMPT_V_DIRTY | RISCV_PREEMPT_V_NEED_RESTORE);
+}
+
+static inline void riscv_preempt_v_depth_inc(void)
+{
+	riscv_v_ctx_cnt_add(RISCV_PREEMPT_V);
+}
+
+static inline void riscv_preempt_v_depth_dec(void)
+{
+	riscv_v_ctx_cnt_sub(RISCV_PREEMPT_V);
+}
+
+static inline u32 riscv_preempt_v_get_depth(void)
+{
+	return riscv_v_ctx_cnt() & RISCV_PREEMPT_V_MASK;
+}
+
+#define PREEMPT_V_FIRST_DEPTH	RISCV_PREEMPT_V
+static int riscv_v_stop_kernel_context(void)
+{
+	if (riscv_preempt_v_get_depth() != PREEMPT_V_FIRST_DEPTH)
+		return 1;
+
+	riscv_preempt_v_depth_dec();
+	return 0;
+}
+
+static int riscv_v_start_kernel_context(bool *is_nested)
+{
+	struct __riscv_v_ext_state *vstate = &current->thread.kernel_vstate;
+
+	if (!vstate->datap)
+		return -ENOENT;
+
+	if (riscv_preempt_v_started(current)) {
+		WARN_ON(riscv_preempt_v_get_depth() == PREEMPT_V_FIRST_DEPTH);
+		if (riscv_preempt_v_dirty(current)) {
+			get_cpu_vector_context();
+			__riscv_v_vstate_save(vstate, vstate->datap);
+			riscv_preempt_v_clear_dirty(current);
+			put_cpu_vector_context();
+		}
+		get_cpu_vector_context();
+		riscv_preempt_v_set_restore(current);
+		*is_nested = true;
+		return 0;
+	}
+
+	get_cpu_vector_context();
+	riscv_v_vstate_save(&current->thread.vstate, task_pt_regs(current));
+	put_cpu_vector_context();
+
+	riscv_preempt_v_depth_inc();
+	return 0;
+}
+
+/* low-level V context handling code, called with irq disabled */
+asmlinkage void riscv_v_context_nesting_start(struct pt_regs *regs)
+{
+	int depth;
+
+	if (!riscv_preempt_v_started(current))
+		return;
+
+	depth = riscv_preempt_v_get_depth();
+	if (depth == PREEMPT_V_FIRST_DEPTH && (regs->status & SR_VS) == SR_VS_DIRTY)
+		riscv_preempt_v_set_dirty();
+
+	riscv_preempt_v_depth_inc();
+}
+
+asmlinkage void riscv_v_context_nesting_end(struct pt_regs *regs)
+{
+	struct __riscv_v_ext_state *vstate = &current->thread.kernel_vstate;
+	u32 depth;
+
+	lockdep_assert_irqs_disabled();
+
+	if (!riscv_preempt_v_started(current))
+		return;
+
+	riscv_preempt_v_depth_dec();
+	depth = riscv_preempt_v_get_depth();
+	if (depth == PREEMPT_V_FIRST_DEPTH) {
+		if (riscv_preempt_v_restore(current)) {
+			__riscv_v_vstate_restore(vstate, vstate->datap);
+			__riscv_v_vstate_clean(regs);
+		}
+		riscv_preempt_v_reset_flags();
+	}
+}
+#else
+#define riscv_v_start_kernel_context(nested)	(-ENOENT)
+#define riscv_v_stop_kernel_context()		(-ENOENT)
+#endif /* CONFIG_RISCV_ISA_V_PREEMPTIVE */
+
 /*
  * kernel_vector_begin(): obtain the CPU vector registers for use by the calling
  * context
@@ -65,14 +170,20 @@ void put_cpu_vector_context(void)
  */
 void kernel_vector_begin(void)
 {
+	bool nested = false;
+
 	if (WARN_ON(!has_vector()))
 		return;
 
 	BUG_ON(!may_use_simd());
 
-	get_cpu_vector_context();
+	if (riscv_v_start_kernel_context(&nested)) {
+		get_cpu_vector_context();
+		riscv_v_vstate_save(&current->thread.vstate, task_pt_regs(current));
+	}
 
-	riscv_v_vstate_save(&current->thread.vstate, task_pt_regs(current));
+	if (!nested)
+		riscv_v_vstate_set_restore(current, task_pt_regs(current));
 
 	riscv_v_enable();
 }
@@ -92,10 +203,10 @@ void kernel_vector_end(void)
 	if (WARN_ON(!has_vector()))
 		return;
 
-	riscv_v_vstate_set_restore(current, task_pt_regs(current));
-
 	riscv_v_disable();
 
-	put_cpu_vector_context();
+	if (riscv_v_stop_kernel_context()) {// we should call this early
+		put_cpu_vector_context();
+	}
 }
 EXPORT_SYMBOL_GPL(kernel_vector_end);
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 38bdbcf9b81d..1afdec4aeda6 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -188,6 +188,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 	*dst = *src;
 	/* clear entire V context, including datap for a new task */
 	memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
+	memset(&dst->thread.kernel_vstate, 0, sizeof(struct __riscv_v_ext_state));
 	clear_tsk_thread_flag(dst, TIF_RISCV_V_DEFER_RESTORE);
 	return 0;
 }
@@ -223,6 +224,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 		p->thread.s[0] = 0;
 	}
 	p->thread.riscv_v_flags = 0;
+	if (has_vector())
+		riscv_v_thread_alloc(p);
 	p->thread.ra = (unsigned long)ret_from_fork;
 	p->thread.sp = (unsigned long)childregs; /* kernel sp */
 	return 0;
diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
index 1fe140e34557..f9769703fd39 100644
--- a/arch/riscv/kernel/vector.c
+++ b/arch/riscv/kernel/vector.c
@@ -22,6 +22,9 @@
 
 static bool riscv_v_implicit_uacc = IS_ENABLED(CONFIG_RISCV_ISA_V_DEFAULT_ENABLE);
 static struct kmem_cache *riscv_v_user_cachep;
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+static struct kmem_cache *riscv_v_kernel_cachep;
+#endif
 
 unsigned long riscv_v_vsize __read_mostly;
 EXPORT_SYMBOL_GPL(riscv_v_vsize);
@@ -53,6 +56,11 @@ void __init riscv_v_setup_ctx_cache(void)
 	riscv_v_user_cachep = kmem_cache_create_usercopy("riscv_vector_ctx",
 							 riscv_v_vsize, 16, SLAB_PANIC,
 							 0, riscv_v_vsize, NULL);
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+	riscv_v_kernel_cachep = kmem_cache_create("riscv_vector_kctx",
+						  riscv_v_vsize, 16,
+						  SLAB_PANIC, NULL);
+#endif
 }
 
 static bool insn_is_vector(u32 insn_buf)
@@ -88,24 +96,35 @@ static bool insn_is_vector(u32 insn_buf)
 	return false;
 }
 
-static int riscv_v_thread_zalloc(void)
+static int riscv_v_thread_zalloc(struct kmem_cache *cache,
+				 struct __riscv_v_ext_state *ctx)
 {
 	void *datap;
 
-	datap = kmem_cache_zalloc(riscv_v_user_cachep, GFP_KERNEL);
+	datap = kmem_cache_zalloc(cache, GFP_KERNEL);
 	if (!datap)
 		return -ENOMEM;
 
-	current->thread.vstate.datap = datap;
-	memset(&current->thread.vstate, 0, offsetof(struct __riscv_v_ext_state,
-						    datap));
+	ctx->datap = datap;
+	memset(ctx, 0, offsetof(struct __riscv_v_ext_state, datap));
 	return 0;
 }
 
+void riscv_v_thread_alloc(struct task_struct *tsk)
+{
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+	riscv_v_thread_zalloc(riscv_v_kernel_cachep, &tsk->thread.kernel_vstate);
+#endif
+}
+
 void riscv_v_thread_free(struct task_struct *tsk)
 {
 	if (tsk->thread.vstate.datap)
 		kmem_cache_free(riscv_v_user_cachep, tsk->thread.vstate.datap);
+#ifdef CONFIG_RISCV_ISA_V_PREEMPTIVE
+	if (tsk->thread.kernel_vstate.datap)
+		kmem_cache_free(riscv_v_kernel_cachep, tsk->thread.kernel_vstate.datap);
+#endif
 }
 
 #define VSTATE_CTRL_GET_CUR(x) ((x) & PR_RISCV_V_VSTATE_CTRL_CUR_MASK)
@@ -177,7 +196,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
 	 * context where VS has been off. So, try to allocate the user's V
 	 * context and resume execution.
 	 */
-	if (riscv_v_thread_zalloc()) {
+	if (riscv_v_thread_zalloc(riscv_v_user_cachep, &current->thread.vstate)) {
 		force_sig(SIGBUS);
 		return true;
 	}
-- 
2.17.1




More information about the linux-riscv mailing list