[PATCH v2] arm64: Introduce IRQ stack

Fri Sep 18 06:46:50 PDT 2015

Hi Jungseok Lee,

I gave this a go on a Juno board, while generating usb/network interrupts:

Tested-by: James Morse <james.morse at arm.com>

On 13/09/15 15:42, Jungseok Lee wrote:
> Currently, kernel context and interrupts are handled using a single
> kernel stack navigated by sp_el1. This forces many systems to use
> 16KB stack, not 8KB one. Low memory platforms naturally suffer from
> memory pressure accompanied by performance degradation.
> 
> This patch addresses the issue as introducing a separate percpu IRQ
> stack to handle both hard and soft interrupts with two ground rules:
> 
>   - Utilize sp_el0 in EL1 context, which is not used currently
>   - Do not complicate current_thread_info calculation
> 
> It is a core concept to trace struct thread_info using sp_el0 instead
> of sp_el1. This approach helps arm64 align with other architectures
> regarding object_is_on_stack() without additional complexity.

I think you are missing a 'mov <reg>, sp; msr sp_el0, <reg>' in
kernel/sleep.S:cpu_resume():175. This code finds the saved stack pointer
from 'sleep_save_sp', and is called when the cpu wakes up from suspend.

It didn't show up in testing, because the wake-up is always under the idle
task, which evidently doesn't call current_thread_info() after wake-up.

> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 4306c93..c156540 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -88,7 +88,7 @@
>  
>  	.if	\el == 0
>  	mrs	x21, sp_el0
> -	get_thread_info tsk			// Ensure MDSCR_EL1.SS is clear,
> +	get_thread_info \el, tsk		// Ensure MDSCR_EL1.SS is clear,
>  	ldr	x19, [tsk, #TI_FLAGS]		// since we can unmask debug
>  	disable_step_tsk x19, x20		// exceptions when scheduling.
>  	.else
> @@ -105,6 +105,8 @@
>  	.if	\el == 0
>  	mvn	x21, xzr
>  	str	x21, [sp, #S_SYSCALLNO]
> +	mov	x25, sp
> +	msr	sp_el0, x25
>  	.endif
>  
>  	/*
> @@ -163,9 +165,45 @@ alternative_endif
>  	eret					// return to kernel
>  	.endm
>  
> -	.macro	get_thread_info, rd
> +	.macro	get_thread_info, el, rd
> +	.if	\el == 0

Why does \el matter here?
If \el==0, we interrupted an el0 thread, and set sp_el0 in kernel_entry()
to the el1 stack.
If \el==1, we interrupted an el1 thread, didn't overwrite its sp_el0, so
sp_el0 & ~(THREAD_SIZE-1) will give us the struct thread_info of the
interrupted task.

So either way, sp_el0 is correct...

>  	mov	\rd, sp
> -	and	\rd, \rd, #~(THREAD_SIZE - 1)	// top of stack
> +	.else
> +	mrs	\rd, sp_el0
> +	.endif
> +	and	\rd, \rd, #~(THREAD_SIZE - 1)	// bottom of thread stack
> +	.endm
> +
> +	.macro	get_irq_stack
> +	adr_l	x21, irq_stacks
> +	mrs	x22, tpidr_el1
> +	add	x21, x21, x22
> +	.endm
> +
> +	.macro	irq_stack_entry
> +	get_irq_stack
> +	ldr	w23, [x21, #IRQ_COUNT]
> +	cbnz	w23, 1f				// check irq recursion
> +	mov	x23, sp
> +	str	x23, [x21, #IRQ_THREAD_SP]
> +	ldr	x23, [x21, #IRQ_STACK]
> +	mov	sp, x23
> +	mov	x23, xzr
> +1:	add	w23, w23, #1
> +	str	w23, [x21, #IRQ_COUNT]

A (largely untested) example for the 'compare the high-order bits' way of
doing this:

	.macro	irq_stack_entry
	get_irq_stack
	ldr	x22, [x21, #IRQ_STACK]
	and	x23, x22, #~(THREAD_SIZE -1)
	mov	x24, sp
	and	x24, x24, #~(THREAD_SIZE -1)
	cmp	x23, x24	// irq_recursion?
	mov	x24, sp
	csel	x23, x24, x22, eq
	mov	sp, x23
	.endm

	/* preserve x24 between irq_stack_entry/irq_stack_exit */

	.macro	irq_stack_exit
	mov	sp, x24
	.endm

This would let you remove IRQ_COUNT and IRQ_THREAD_SP, and avoid the two
stores and a conditional-branch in irq_stack_entry/irq_stack_exit.

Thoughts?

> +	.endm
> +
> +	.macro	irq_stack_exit
> +	get_irq_stack
> +	ldr	w23, [x21, #IRQ_COUNT]
> +	sub	w23, w23, #1
> +	cbnz	w23, 1f				// check irq recursion
> +	mov	x23, sp
> +	str	x23, [x21, #IRQ_STACK]
> +	ldr	x23, [x21, #IRQ_THREAD_SP]
> +	mov	sp, x23
> +	mov	x23, xzr
> +1:	str	w23, [x21, #IRQ_COUNT]
>  	.endm
>  
>  /*
> @@ -183,10 +221,11 @@ tsk	.req	x28		// current thread_info
>   * Interrupt handling.
>   */
>  	.macro	irq_handler
> -	adrp	x1, handle_arch_irq
> -	ldr	x1, [x1, #:lo12:handle_arch_irq]
> +	ldr_l	x1, handle_arch_irq
>  	mov	x0, sp
> +	irq_stack_entry
>  	blr	x1
> +	irq_stack_exit
>  	.endm
>  
>  	.text
> @@ -361,7 +400,7 @@ el1_irq:
>  	irq_handler
>  
>  #ifdef CONFIG_PREEMPT
> -	get_thread_info tsk
> +	get_thread_info 1, tsk
>  	ldr	w24, [tsk, #TI_PREEMPT]		// get preempt count
>  	cbnz	w24, 1f				// preempt count != 0
>  	ldr	x0, [tsk, #TI_FLAGS]		// get flags
> @@ -597,6 +636,7 @@ ENTRY(cpu_switch_to)
>  	ldp	x29, x9, [x8], #16
>  	ldr	lr, [x8]
>  	mov	sp, x9
> +	msr	sp_el0, x9
>  	ret
>  ENDPROC(cpu_switch_to)
>  
> @@ -655,7 +695,7 @@ ENTRY(ret_from_fork)
>  	cbz	x19, 1f				// not a kernel thread
>  	mov	x0, x20
>  	blr	x19
> -1:	get_thread_info tsk
> +1:	get_thread_info 1, tsk
>  	b	ret_to_user
>  ENDPROC(ret_from_fork)
>  
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index a055be6..cb13290 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -441,6 +441,8 @@ __mmap_switched:
>  	b	1b
>  2:
>  	adr_l	sp, initial_sp, x4
> +	mov	x4, sp

There should probably a comment explaining why sp_el0 is being set (for the
changes outside entry.S). Something like:

	msr	sp_el0, x4		// stash struct thread_info

> +	msr	sp_el0, x4
>  	str_l	x21, __fdt_pointer, x5		// Save FDT pointer
>  	str_l	x24, memstart_addr, x6		// Save PHYS_OFFSET
>  	mov	x29, #0
> @@ -613,6 +615,7 @@ ENDPROC(secondary_startup)
>  ENTRY(__secondary_switched)
>  	ldr	x0, [x21]			// get secondary_data.stack
>  	mov	sp, x0
> +	msr	sp_el0, x0
>  	mov	x29, #0
>  	b	secondary_start_kernel
>  ENDPROC(__secondary_switched)

Thanks,

James