[PATCH 10/10] arm64: split thread_info from task stack

James Morse james.morse at arm.com
Fri Oct 21 07:50:34 PDT 2016


Hi Mark,

This looks great, we should definitely do this.
There are a few things missing from entry.S below:

On 19/10/16 20:10, Mark Rutland wrote:
> This patch moves arm64's struct thread_info from the task stack into
> task_struct. This protects thread_info from corruption in the case of
> stack overflows, and makes its address harder to determine if stack
> addresses are leaked, making a number of attacks more difficult. Precise
> detection and handling of overflow is left for subsequent patches.
> 
> Largely, this involves changing code to store the task_struct in sp_el0,
> and acquire the thread_info from the task struct (which is the opposite
> way around to the current code). Both secondary entry and idle are
> updated to stash the sp and task pointer separately.
> 
> Userspace clobbers sp_el0, and we can no longer restore this from the
> stack. Instead, the current task is cached in a per-cpu variable that we
> can safely access from early assembly as interrupts are disabled (and we

>  arch/arm64/Kconfig                   |  1 +
>  arch/arm64/include/asm/Kbuild        |  1 -
>  arch/arm64/include/asm/current.h     | 22 ++++++++++++++++++++++
>  arch/arm64/include/asm/smp.h         |  1 +
>  arch/arm64/include/asm/thread_info.h | 24 ------------------------
>  arch/arm64/kernel/asm-offsets.c      |  1 +

>  arch/arm64/kernel/entry.S            |  4 ++--

4? That was too easy...


>  arch/arm64/kernel/head.S             | 11 ++++++-----
>  arch/arm64/kernel/process.c          | 13 +++++++++++++
>  arch/arm64/kernel/smp.c              |  2 ++
>  10 files changed, 48 insertions(+), 32 deletions(-)


> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 2d4c83b..e781391 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -123,6 +123,7 @@
>  	 * Set sp_el0 to current thread_info.
>  	 */
>  	.if	\el == 0
> +	ldr_this_cpu	tsk, __entry_task, x21
>  	msr	sp_el0, tsk
>  	.endif
>  
> @@ -674,8 +675,7 @@ ENTRY(cpu_switch_to)
>  	ldp	x29, x9, [x8], #16
>  	ldr	lr, [x8]
>  	mov	sp, x9
> -	and	x9, x9, #~(THREAD_SIZE - 1)
> -	msr	sp_el0, x9
> +	msr	sp_el0, x1
>  	ret
>  ENDPROC(cpu_switch_to)
>  

So now tsk is current instead of current_thread_info(), but we still access it
with TI_* offsets:
entry.S:102
> 	/* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */
> 	ldr	x20, [tsk, #TI_ADDR_LIMIT]
> 	str	x20, [sp, #S_ORIG_ADDR_LIMIT]
> 	mov	x20, #TASK_SIZE_64
> 	str	x20, [tsk, #TI_ADDR_LIMIT]

entry.S:143
> 	/* Restore the task's original addr_limit. */
> 	ldr	x20, [sp, #S_ORIG_ADDR_LIMIT]
> 	str	x20, [tsk, #TI_ADDR_LIMIT]


The 're-entered irq stack' check is going to need re-thinking:
entry.S:195
> 	/*
> 	 * Compare sp with the current thread_info, if the top
> 	 * ~(THREAD_SIZE - 1) bits match, we are on a task stack, and
> 	 * should switch to the irq stack.
> 	 */
> 	and	x25, x19, #~(THREAD_SIZE - 1)
> 	cmp	x25, tsk
> 	b.ne	9998f

It was done like this as the irq stack isn't naturally aligned, so this check
implicitly assumes tsk is on the stack. I will try and come up with an alternative.


And there are a few other things like this:
entry.S:431
> 	ldr	w24, [tsk, #TI_PREEMPT]		// get preempt count
> 	cbnz	w24, 1f				// preempt count != 0
> 	ldr	x0, [tsk, #TI_FLAGS]		// get flags
> 	tbz	x0, #TIF_NEED_RESCHED, 1f	// needs rescheduling?
> 	bl	el1_preempt


(It may be worth renaming the register alias 'tsk' as it isn't really a
 struct_task. This would catch any missed users at build time, including
 any patches in flight...)


> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 2f39036..ddce61b 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -45,6 +45,7 @@
>  #include <linux/personality.h>
>  #include <linux/notifier.h>
>  #include <trace/events/power.h>
> +#include <linux/percpu.h>
>  
>  #include <asm/alternative.h>
>  #include <asm/compat.h>
> @@ -312,6 +313,17 @@ static void uao_thread_switch(struct task_struct *next)
>  }
>  
>  /*
> + * We store our current task in sp_el0, which is clobbered by userspace. Keep a
> + * shadow copy so that we can restore this upon entry from userspace.
> + */
> +DEFINE_PER_CPU(struct task_struct *, __entry_task) = &init_task;
> +
> +static void entry_task_switch(struct task_struct *next)
> +{
> +	__this_cpu_write(__entry_task, next);
> +}
> +
> +/*
>   * Thread switching.
>   */
>  struct task_struct *__switch_to(struct task_struct *prev,
> @@ -323,6 +335,7 @@ struct task_struct *__switch_to(struct task_struct *prev,
>  	tls_thread_switch(next);
>  	hw_breakpoint_thread_switch(next);
>  	contextidr_thread_switch(next);
> +	entry_task_switch(next);
>  	uao_thread_switch(next);
>  
>  	/*
> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index 2679722..cde25f4 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -149,6 +149,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
>  	 * We need to tell the secondary core where to find its stack and the
>  	 * page tables.
>  	 */
> +	secondary_data.task = idle;
>  	secondary_data.stack = task_stack_page(idle) + THREAD_START_SP;
>  	update_cpu_boot_status(CPU_MMU_OFF);
>  	__flush_dcache_area(&secondary_data, sizeof(secondary_data));
> @@ -173,6 +174,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
>  		pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
>  	}
>  
> +	secondary_data.task = NULL;
>  	secondary_data.stack = NULL;
>  	status = READ_ONCE(secondary_data.status);
>  	if (ret && status) {
> 

Nit-territory: Something we should remember is that __entry_task isn't written
on secondary startup, so its stale (CPU0s idle task) until the first
__switch_to(). This isn't a problem as its only read on entry from EL0.


Thanks,

James




More information about the linux-arm-kernel mailing list