[PATCH v3] riscv: add irq stack support

Palmer Dabbelt palmer at dabbelt.com
Thu Jun 2 14:44:46 PDT 2022


On Sat, 14 May 2022 22:03:36 PDT (-0700), jszhang at kernel.org wrote:
> Currently, IRQs are still handled on the kernel stack of the current
> task on riscv platforms. If the task has a deep call stack at the time
> of interrupt, and handling the interrupt also requires a deep stack,
> it's possible to see stack overflow.
>
> Before this patch, the stack_max_size of a v5.17-rc1 kernel running on
> a lichee RV board gave:
> ~ # cat /sys/kernel/debug/tracing/stack_max_size
> 3736
>
> After this patch,
> ~ # cat /sys/kernel/debug/tracing/stack_max_size
> 3176
>
> We reduce the max kernel stack usage by 560 bytes!
>
> Signed-off-by: Jisheng Zhang <jszhang at kernel.org>
> ---
> since v2:
>  - rebase on v5.18-rcN
>  - update commit msg, I.E remove the "it's possible to reduce the
> THREAD_SIZE to 8KB for RV64 platforms..."
>
> since v1:
>  - add __ro_after_init to the irq_stack[] array.
>
>  arch/riscv/include/asm/thread_info.h |  1 +
>  arch/riscv/kernel/asm-offsets.c      |  2 ++
>  arch/riscv/kernel/entry.S            | 33 +++++++++++++++++++++++++---
>  arch/riscv/kernel/irq.c              | 16 ++++++++++++++
>  4 files changed, 49 insertions(+), 3 deletions(-)
>
> diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
> index 74d888c8d631..98ea73721a0b 100644
> --- a/arch/riscv/include/asm/thread_info.h
> +++ b/arch/riscv/include/asm/thread_info.h
> @@ -25,6 +25,7 @@
>  #endif
>  #define THREAD_SIZE		(PAGE_SIZE << THREAD_SIZE_ORDER)
>
> +#define IRQ_STACK_SIZE		THREAD_SIZE
>  /*
>   * By aligning VMAP'd stacks to 2 * THREAD_SIZE, we can detect overflow by
>   * checking sp & (1 << THREAD_SHIFT), which we can do cheaply in the entry
> diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
> index df9444397908..9e32748af0e8 100644
> --- a/arch/riscv/kernel/asm-offsets.c
> +++ b/arch/riscv/kernel/asm-offsets.c
> @@ -37,6 +37,8 @@ void asm_offsets(void)
>  	OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count);
>  	OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp);
>  	OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp);
> +	OFFSET(TASK_TI_CPU, task_struct, thread_info.cpu);
> +	OFFSET(TASK_STACK, task_struct, stack);
>
>  	OFFSET(TASK_THREAD_F0,  task_struct, thread.fstate.f[0]);
>  	OFFSET(TASK_THREAD_F1,  task_struct, thread.fstate.f[1]);
> diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
> index c8b9ce274b9a..e91cae183ef4 100644
> --- a/arch/riscv/kernel/entry.S
> +++ b/arch/riscv/kernel/entry.S
> @@ -126,12 +126,39 @@ skip_context_tracking:
>  	 */
>  	bge s4, zero, 1f
>
> -	la ra, ret_from_exception
> +	/* preserve the sp */
> +	move s0, sp
>
> -	/* Handle interrupts */
>  	move a0, sp /* pt_regs */
> +
> +	/*
> +	 * Compare sp with the base of the task stack.
> +	 * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack,
> +	 * and should switch to the irq stack.
> +	 */
> +	REG_L t0, TASK_STACK(tp)

This fails to build on some configurations, as TASK_STACK doesn't fit 
within the load immediate.  IIRC we added a macro for this at some 
point (to select the short/long addressing sequence), but there's a 
trivial fix

diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 99bc411d12f4..095fef82e910 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -136,7 +136,9 @@ skip_context_tracking:
 	 * If the top ~(THREAD_SIZE - 1) bits match, we are on a task stack,
 	 * and should switch to the irq stack.
 	 */
-	REG_L t0, TASK_STACK(tp)
+	li t0, TASK_STACK
+	add t0, t0, tp
+	REG_L t0, 0(t0)
 	xor t0, t0, s0
 	li t1, ~(THREAD_SIZE - 1)
 	and t0, t0, t1

Unfortunatly even with that fix this still blows up early in boot on 
some of my test configs.  Looks like pretty much anything with kasan is 
failing.  Nothing is jumping out at me and it's pretty late so I won't 
have time to debug it myself, sorry.

> +	xor t0, t0, s0
> +	li t1, ~(THREAD_SIZE - 1)
> +	and t0, t0, t1
> +	bnez t0, 2f
> +
> +	la t1, irq_stack
> +	REG_L t2, TASK_TI_CPU(tp)
> +	slli t2, t2, RISCV_LGPTR
> +	add t1, t1, t2
> +	REG_L t2, 0(t1)
> +	li t1, IRQ_STACK_SIZE
> +	/* switch to the irq stack */
> +	add sp, t2, t1
> +
> +2:
> +	/* Handle interrupts */
>  	la a1, generic_handle_arch_irq
> -	jr a1
> +	jalr a1
> +
> +	/* Restore sp */
> +	move sp, s0
> +	j ret_from_exception
>  1:
>  	/*
>  	 * Exceptions run with interrupts enabled or disabled depending on the
> diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
> index 7207fa08d78f..f20cbfd42e82 100644
> --- a/arch/riscv/kernel/irq.c
> +++ b/arch/riscv/kernel/irq.c
> @@ -10,6 +10,8 @@
>  #include <linux/seq_file.h>
>  #include <asm/smp.h>
>
> +void *irq_stack[NR_CPUS] __ro_after_init;
> +
>  int arch_show_interrupts(struct seq_file *p, int prec)
>  {
>  	show_ipi_stats(p, prec);
> @@ -18,7 +20,21 @@ int arch_show_interrupts(struct seq_file *p, int prec)
>
>  void __init init_IRQ(void)
>  {
> +	int cpu;
> +
>  	irqchip_init();
>  	if (!handle_arch_irq)
>  		panic("No interrupt controller found.");
> +
> +	for_each_possible_cpu(cpu) {
> +#ifdef CONFIG_VMAP_STACK
> +		void *s = __vmalloc_node(IRQ_STACK_SIZE, THREAD_ALIGN,
> +					 THREADINFO_GFP, cpu_to_node(cpu),
> +					 __builtin_return_address(0));
> +#else
> +		void *s = (void *)__get_free_pages(GFP_KERNEL, get_order(IRQ_STACK_SIZE));
> +#endif
> +
> +		irq_stack[cpu] = s;
> +	}
>  }



More information about the linux-riscv mailing list