[RFC PATCH 6/6] ARM: implement THREAD_INFO_IN_TASK for uniprocessor systems

Ard Biesheuvel ardb at kernel.org
Fri Nov 26 02:10:06 PST 2021


On UP systems, only a single task can be 'current' at the same time,
which means we can use a global variable to track it. This means we can
also enable THREAD_INFO_IN_TASK for those systems, as in that case,
thread_info is accessed via current rather than the other way around,
removing the need to store thread_info at the base of the task stack.
This, in turn, permits us to enable IRQ stacks and vmap'ed stacks on UP
systems as well.

To partially mitigate the performance overhead of this arrangement, use
a ADD/ADD/LDR sequence with the appropriate PC-relative group
relocations to load the value of current when needed. This means that
accessing current will still only require a single load as before,
avoiding the need for a literal to carry the address of the global
variable in each function. However, accessing thread_info will now
require this load as well.

Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
---
 arch/arm/Kconfig                   |   8 +-
 arch/arm/include/asm/assembler.h   | 115 ++++++++++++++------
 arch/arm/include/asm/current.h     |  56 ++++++++--
 arch/arm/include/asm/switch_to.h   |   3 +-
 arch/arm/include/asm/thread_info.h |  27 -----
 arch/arm/kernel/asm-offsets.c      |   3 -
 arch/arm/kernel/entry-armv.S       |  11 +-
 arch/arm/kernel/head-common.S      |   4 +-
 arch/arm/kernel/process.c          |   7 +-
 arch/arm/kernel/traps.c            |   4 +
 10 files changed, 156 insertions(+), 82 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 61fc5cc03042..3d7476ca4d94 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -126,8 +126,8 @@ config ARM
 	select PERF_USE_VMALLOC
 	select RTC_LIB
 	select SYS_SUPPORTS_APM_EMULATION
-	select THREAD_INFO_IN_TASK if CURRENT_POINTER_IN_TPIDRURO
-	select HAVE_ARCH_VMAP_STACK if MMU && THREAD_INFO_IN_TASK && (!LD_IS_LLD || LLD_VERSION >= 140000)
+	select THREAD_INFO_IN_TASK
+	select HAVE_ARCH_VMAP_STACK if MMU && (!LD_IS_LLD || LLD_VERSION >= 140000)
 	select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M
 	# Above selects are sorted alphabetically; please add new ones
 	# according to that.  Thanks.
@@ -1169,7 +1169,7 @@ config CURRENT_POINTER_IN_TPIDRURO
 
 config IRQSTACKS
 	def_bool y
-	depends on GENERIC_IRQ_MULTI_HANDLER && THREAD_INFO_IN_TASK
+	depends on GENERIC_IRQ_MULTI_HANDLER
 	select HAVE_IRQ_EXIT_ON_IRQ_STACK
 	select HAVE_SOFTIRQ_ON_OWN_STACK
 
@@ -1619,7 +1619,7 @@ config CC_HAVE_STACKPROTECTOR_TLS
 
 config STACKPROTECTOR_PER_TASK
 	bool "Use a unique stack canary value for each task"
-	depends on STACKPROTECTOR && THREAD_INFO_IN_TASK && !XIP_DEFLATED_DATA
+	depends on STACKPROTECTOR && CURRENT_POINTER_IN_TPIDRURO && !XIP_DEFLATED_DATA
 	depends on GCC_PLUGINS || CC_HAVE_STACKPROTECTOR_TLS
 	select GCC_PLUGIN_ARM_SSP_PER_TASK if !CC_HAVE_STACKPROTECTOR_TLS
 	default y
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index c4c1d5b2edf5..978fdaaac680 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -203,43 +203,12 @@ THUMB(	fpreg	.req	r7	)
 	.endm
 	.endr
 
-	.macro	get_current, rd
-#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
-	mrc	p15, 0, \rd, c13, c0, 3		@ get TPIDRURO register
-#else
-	get_thread_info \rd
-	ldr	\rd, [\rd, #TI_TASK]
-#endif
-	.endm
-
-	.macro	set_current, rn
-#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
-	mcr	p15, 0, \rn, c13, c0, 3		@ set TPIDRURO register
-#endif
-	.endm
-
-	.macro	reload_current, t1:req, t2:req
-#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
-	adr_l	\t1, __entry_task		@ get __entry_task base address
-	mrc	p15, 0, \t2, c13, c0, 4		@ get per-CPU offset
-	ldr	\t1, [\t1, \t2]			@ load variable
-	mcr	p15, 0, \t1, c13, c0, 3		@ store in TPIDRURO
-#endif
-	.endm
-
 /*
  * Get current thread_info.
  */
 	.macro	get_thread_info, rd
-#ifdef CONFIG_THREAD_INFO_IN_TASK
 	/* thread_info is the first member of struct task_struct */
 	get_current \rd
-#else
- ARM(	mov	\rd, sp, lsr #THREAD_SIZE_ORDER + PAGE_SHIFT	)
- THUMB(	mov	\rd, sp			)
- THUMB(	lsr	\rd, \rd, #THREAD_SIZE_ORDER + PAGE_SHIFT	)
-	mov	\rd, \rd, lsl #THREAD_SIZE_ORDER + PAGE_SHIFT
-#endif
 	.endm
 
 /*
@@ -333,6 +302,90 @@ ALT_UP_B(.L1_\@)
 #endif
 	.endm
 
+	/*
+	 * load_current - load the current task pointer from the global
+	 * 		  variable '__current'
+	 */
+	.macro	load_current, rd
+#if defined(CONFIG_THUMB2_KERNEL) || \
+    (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS)) || \
+    (defined(CONFIG_LD_IS_LLD) && CONFIG_LLD_VERSION < 140000)
+	mov_l	\rd, __current
+	ldr	\rd, [\rd]
+#else
+	/*
+	 * Avoid a literal load, by emitting a sequence of ADD/LDR instructions
+	 * with the appropriate relocations. The combined sequence has a range
+	 * of -/+ 256 MiB, which should be sufficient for the core kernel and
+	 * for modules loaded into the module region.
+	 */
+	.globl	__current
+	.reloc	.L0_\@, R_ARM_ALU_PC_G0_NC, __current
+	.reloc	.L1_\@, R_ARM_ALU_PC_G1_NC, __current
+	.reloc	.L2_\@, R_ARM_LDR_PC_G2, __current
+.L0_\@: sub	\rd, pc, #8
+.L1_\@: sub	\rd, \rd, #4
+.L2_\@: ldr	\rd, [\rd, #0]
+#endif
+	.endm
+
+	/*
+	 * set_current - store the task pointer of this CPU's current task
+	 */
+	.macro	set_current, rn:req, tmp:req
+#if defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO) || defined(CONFIG_SMP)
+9998:	mcr p15, 0, \rn, c13, c0, 3		@ set TPIDRURO register
+#ifdef CONFIG_CPU_V6
+ALT_UP_B(.L1_\@)
+.L0_\@:
+	.subsection 1
+.L1_\@: ldr	\tmp, =__current
+	str	\rn, [\tmp]
+	b	.L0_\@
+	.previous
+#endif
+#else
+	str_l	\rn, __current, \tmp
+#endif
+	.endm
+
+	/*
+	 * get_current - load the task pointer of this CPU's current task
+	 */
+	.macro	get_current, rd
+#if defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO) || defined(CONFIG_SMP)
+9998:	mrc p15, 0, \rd, c13, c0, 3		@ get TPIDRURO register
+#ifdef CONFIG_CPU_V6
+ALT_UP_B(.L1_\@)
+.L0_\@:
+	.subsection 1
+.L1_\@: load_current \rd
+	b	.L0_\@
+	.previous
+#endif
+#else
+	load_current \rd
+#endif
+	.endm
+
+	/*
+	 * reload_current - reload the task pointer of this CPU's current task
+	 *		    into the TLS register
+	 */
+	.macro	reload_current, t1:req, t2:req
+#if defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO) || defined(CONFIG_SMP)
+#ifdef CONFIG_CPU_V6
+ALT_SMP(nop)
+ALT_UP_B(.L0_\@)
+#endif
+	adr_l	\t1, __entry_task		@ get __entry_task base address
+	mrc	p15, 0, \t2, c13, c0, 4		@ get per-CPU offset
+	ldr	\t1, [\t1, \t2]			@ load variable
+	mcr	p15, 0, \t1, c13, c0, 3		@ store in TPIDRURO
+.L0_\@:
+#endif
+	.endm
+
 /*
  * Instruction barrier
  */
diff --git a/arch/arm/include/asm/current.h b/arch/arm/include/asm/current.h
index 6bf0aad672c3..68d6907c9d54 100644
--- a/arch/arm/include/asm/current.h
+++ b/arch/arm/include/asm/current.h
@@ -11,22 +11,50 @@
 
 struct task_struct;
 
+extern struct task_struct *__current;
+extern unsigned int smp_on_up;
+
 static inline void set_current(struct task_struct *cur)
 {
-	if (!IS_ENABLED(CONFIG_CURRENT_POINTER_IN_TPIDRURO))
+	if (!IS_ENABLED(CONFIG_CURRENT_POINTER_IN_TPIDRURO) &&
+	    !(IS_ENABLED(CONFIG_SMP) &&
+	      IS_ENABLED(CONFIG_SMP_ON_UP) &&
+	      smp_on_up)) {
+		__current = cur;
 		return;
+	}
 
 	/* Set TPIDRURO */
 	asm("mcr p15, 0, %0, c13, c0, 3" :: "r"(cur) : "memory");
 }
 
-#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+/*
+ * Avoid a literal load by emitting a sequence of ADD/LDR instructions with the
+ * appropriate relocations. The combined sequence has a range of -/+ 256 MiB,
+ * which should be sufficient for the core kernel as well as modules loaded
+ * into the module region. (Not supported by LLD before release 14)
+ */
+#if !defined(CONFIG_LD_IS_LLD) || CONFIG_LLD_VERSION >= 140000
+#define LOAD_CURRENT							\
+	"	.globl	__current				\n\t"	\
+	"	.reloc	10f, R_ARM_ALU_PC_G0_NC, __current	\n\t"	\
+	"	.reloc	11f, R_ARM_ALU_PC_G1_NC, __current	\n\t"	\
+	"	.reloc	12f, R_ARM_LDR_PC_G2, __current		\n\t"	\
+	"10:	sub	%0, pc, #8				\n\t"	\
+	"11:	sub	%0, %0, #4				\n\t"	\
+	"12:	ldr	%0, [%0, #0]				\n\t"
+#else
+#define LOAD_CURRENT							\
+	"	ldr	%0, =__current				\n\t"	\
+	"	ldr	%0, [%0]				\n\t"
+#endif
 
-static inline struct task_struct *get_current(void)
+static inline __attribute_const__ struct task_struct *get_current(void)
 {
 	struct task_struct *cur;
 
 #if __has_builtin(__builtin_thread_pointer) && \
+    defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO) && \
     !(defined(CONFIG_THUMB2_KERNEL) && \
       defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 130001)
 	/*
@@ -39,16 +67,30 @@ static inline struct task_struct *get_current(void)
 	 * https://github.com/ClangBuiltLinux/linux/issues/1485
 	 */
 	cur = __builtin_thread_pointer();
+#elif defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO) || defined(CONFIG_SMP)
+	asm("0:	mrc p15, 0, %0, c13, c0, 3			\n\t"
+#ifdef CONFIG_CPU_V6
+	    "1:							\n\t"
+	    "	.subsection 1					\n\t"
+	    "2: " LOAD_CURRENT
+	    "	b	1b					\n\t"
+	    "	.previous					\n\t"
+	    "	.pushsection \".alt.smp.init\", \"a\"		\n\t"
+	    "	.long	0b - .					\n\t"
+	    "	b	. + (2b - 0b)				\n\t"
+	    "	.popsection					\n\t"
+#endif
+	    : "=r"(cur));
+#elif defined(CONFIG_THUMB2_KERNEL) || \
+      (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
+	cur = __current;
 #else
-	asm("mrc p15, 0, %0, c13, c0, 3" : "=r"(cur));
+	asm(LOAD_CURRENT : "=r"(cur));
 #endif
 	return cur;
 }
 
 #define current get_current()
-#else
-#include <asm-generic/current.h>
-#endif /* CONFIG_CURRENT_POINTER_IN_TPIDRURO */
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h
index b55c7b2755e4..a482c99934ff 100644
--- a/arch/arm/include/asm/switch_to.h
+++ b/arch/arm/include/asm/switch_to.h
@@ -40,7 +40,8 @@ static inline void set_ti_cpu(struct task_struct *p)
 do {									\
 	__complete_pending_tlbi();					\
 	set_ti_cpu(next);						\
-	if (IS_ENABLED(CONFIG_CURRENT_POINTER_IN_TPIDRURO))		\
+	if (IS_ENABLED(CONFIG_CURRENT_POINTER_IN_TPIDRURO) ||		\
+	    IS_ENABLED(CONFIG_SMP))					\
 		__this_cpu_write(__entry_task, next);			\
 	last = __switch_to(prev,task_thread_info(prev), task_thread_info(next));	\
 } while (0)
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 004b89d86224..aecc403b2880 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -62,9 +62,6 @@ struct cpu_context_save {
 struct thread_info {
 	unsigned long		flags;		/* low level flags */
 	int			preempt_count;	/* 0 => preemptable, <0 => bug */
-#ifndef CONFIG_THREAD_INFO_IN_TASK
-	struct task_struct	*task;		/* main task structure */
-#endif
 	__u32			cpu;		/* cpu */
 	__u32			cpu_domain;	/* cpu domain */
 	struct cpu_context_save	cpu_context;	/* cpu context */
@@ -80,39 +77,15 @@ struct thread_info {
 
 #define INIT_THREAD_INFO(tsk)						\
 {									\
-	INIT_THREAD_INFO_TASK(tsk)					\
 	.flags		= 0,						\
 	.preempt_count	= INIT_PREEMPT_COUNT,				\
 }
 
-#ifdef CONFIG_THREAD_INFO_IN_TASK
-#define INIT_THREAD_INFO_TASK(tsk)
-
 static inline struct task_struct *thread_task(struct thread_info* ti)
 {
 	return (struct task_struct *)ti;
 }
 
-#else
-#define INIT_THREAD_INFO_TASK(tsk)	.task = &(tsk),
-
-static inline struct task_struct *thread_task(struct thread_info* ti)
-{
-	return ti->task;
-}
-
-/*
- * how to get the thread information struct from C
- */
-static inline struct thread_info *current_thread_info(void) __attribute_const__;
-
-static inline struct thread_info *current_thread_info(void)
-{
-	return (struct thread_info *)
-		(current_stack_pointer & ~(THREAD_SIZE - 1));
-}
-#endif
-
 #define thread_saved_pc(tsk)	\
 	((unsigned long)(task_thread_info(tsk)->cpu_context.pc))
 #define thread_saved_sp(tsk)	\
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 645845e4982a..2c8d76fd7c66 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -43,9 +43,6 @@ int main(void)
   BLANK();
   DEFINE(TI_FLAGS,		offsetof(struct thread_info, flags));
   DEFINE(TI_PREEMPT,		offsetof(struct thread_info, preempt_count));
-#ifndef CONFIG_THREAD_INFO_IN_TASK
-  DEFINE(TI_TASK,		offsetof(struct thread_info, task));
-#endif
   DEFINE(TI_CPU,		offsetof(struct thread_info, cpu));
   DEFINE(TI_CPU_DOMAIN,		offsetof(struct thread_info, cpu_domain));
   DEFINE(TI_CPU_SAVE,		offsetof(struct thread_info, cpu_context));
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 09a9fe501094..5f3b882d53b7 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -816,12 +816,13 @@ ENTRY(__switch_to)
 	switch_tls r1, r4, r5, r3, r7
 #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP) && \
     !defined(CONFIG_STACKPROTECTOR_PER_TASK)
-	ldr	r9, [r2, #TI_TASK]
 	ldr	r8, =__stack_chk_guard
 	.if (TSK_STACK_CANARY > IMM12_MASK)
-	add	r9, r9, #TSK_STACK_CANARY & ~IMM12_MASK
-	.endif
+	add	r9, r2, #TSK_STACK_CANARY & ~IMM12_MASK
 	ldr	r9, [r9, #TSK_STACK_CANARY & IMM12_MASK]
+	.else
+	ldr	r9, [r2, #TSK_STACK_CANARY & IMM12_MASK]
+	.endif
 #endif
 	mov	r7, r2				@ Preserve 'next'
 #ifdef CONFIG_CPU_USE_DOMAINS
@@ -838,7 +839,7 @@ ENTRY(__switch_to)
 #endif
 	mov	r0, r5
 #if !defined(CONFIG_THUMB2_KERNEL) && !defined(CONFIG_VMAP_STACK)
-	set_current r7
+	set_current r7, r8
 	ldmia	r4, {r4 - sl, fp, sp, pc}	@ Load all regs saved previously
 #else
 	mov	r1, r7
@@ -860,7 +861,7 @@ ENTRY(__switch_to)
 	@ switches us to another stack, with few other side effects. In order
 	@ to prevent this distinction from causing any inconsistencies, let's
 	@ keep the 'set_current' call as close as we can to the update of SP.
-	set_current r1
+	set_current r1, r2
 	mov	sp, ip
 	ret	lr
 #endif
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index da18e0a17dc2..42cae73fcc19 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -105,10 +105,8 @@ __mmap_switched:
 	mov	r1, #0
 	bl	__memset			@ clear .bss
 
-#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
 	adr_l	r0, init_task			@ get swapper task_struct
-	set_current r0
-#endif
+	set_current r0, r1
 
 	ldmia	r4, {r0, r1, r2, r3}
 	str	r9, [r0]			@ Save processor ID
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index d47159f3791c..0617af11377f 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -36,7 +36,7 @@
 
 #include "signal.h"
 
-#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+#if defined(CONFIG_CURRENT_POINTER_IN_TPIDRURO) || defined(CONFIG_SMP)
 DEFINE_PER_CPU(struct task_struct *, __entry_task);
 #endif
 
@@ -46,6 +46,11 @@ unsigned long __stack_chk_guard __read_mostly;
 EXPORT_SYMBOL(__stack_chk_guard);
 #endif
 
+#ifndef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+asmlinkage struct task_struct *__current;
+EXPORT_SYMBOL(__current);
+#endif
+
 static const char *processor_modes[] __maybe_unused = {
   "USER_26", "FIQ_26" , "IRQ_26" , "SVC_26" , "UK4_26" , "UK5_26" , "UK6_26" , "UK7_26" ,
   "UK8_26" , "UK9_26" , "UK10_26", "UK11_26", "UK12_26", "UK13_26", "UK14_26", "UK15_26",
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index b28a705c49cb..3f38357efc46 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -865,7 +865,9 @@ early_initcall(allocate_overflow_stacks);
 asmlinkage void handle_bad_stack(struct pt_regs *regs)
 {
 	unsigned long tsk_stk = (unsigned long)current->stack;
+#ifdef CONFIG_IRQSTACKS
 	unsigned long irq_stk = (unsigned long)this_cpu_read(irq_stack_ptr);
+#endif
 	unsigned long ovf_stk = (unsigned long)this_cpu_read(overflow_stack_ptr);
 
 	console_verbose();
@@ -873,8 +875,10 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
 
 	pr_emerg("Task stack:     [0x%08lx..0x%08lx]\n",
 		 tsk_stk, tsk_stk + THREAD_SIZE);
+#ifdef CONFIG_IRQSTACKS
 	pr_emerg("IRQ stack:      [0x%08lx..0x%08lx]\n",
 		 irq_stk - THREAD_SIZE, irq_stk);
+#endif
 	pr_emerg("Overflow stack: [0x%08lx..0x%08lx]\n",
 		 ovf_stk - OVERFLOW_STACK_SIZE, ovf_stk);
 
-- 
2.30.2




More information about the linux-arm-kernel mailing list