[PATCH v4 4/5] ARM: smp: Store current pointer in TPIDRURO register if available

Ard Biesheuvel ardb at kernel.org
Mon Sep 13 03:40:00 PDT 2021


Now that the user space TLS register is assigned on every return to user
space, we can use it to keep the 'current' pointer while running in the
kernel. This removes the need to access it via thread_info, which is
located at the base of the stack, but will be moved out of there in a
subsequent patch.

Use the __builtin_thread_pointer() helper when available - this will
help GCC understand that reloading the value within the same function is
not necessary, even when using the per-task stack protector (which also
generates accesses via the TLS register). For example, the generated
code below loads TPIDRURO only once, and uses it to access both the
stack canary and the preempt_count fields.

<do_one_initcall>:
       e92d 41f0       stmdb   sp!, {r4, r5, r6, r7, r8, lr}
       ee1d 4f70       mrc     15, 0, r4, cr13, cr0, {3}
       4606            mov     r6, r0
       b094            sub     sp, #80 ; 0x50
       f8d4 34e8       ldr.w   r3, [r4, #1256] ; 0x4e8  <- stack canary
       9313            str     r3, [sp, #76]   ; 0x4c
       f8d4 8004       ldr.w   r8, [r4, #4]             <- preempt count

Co-developed-by: Keith Packard <keithpac at amazon.com>
Signed-off-by: Keith Packard <keithpac at amazon.com>
Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
---
 arch/arm/Kconfig                   |  5 ++
 arch/arm/include/asm/assembler.h   | 24 +++++++++
 arch/arm/include/asm/current.h     | 51 ++++++++++++++++++++
 arch/arm/include/asm/switch_to.h   |  2 +
 arch/arm/include/asm/thread_info.h |  2 +
 arch/arm/kernel/entry-armv.S       |  9 +++-
 arch/arm/kernel/entry-common.S     |  1 +
 arch/arm/kernel/head-common.S      |  5 ++
 arch/arm/kernel/process.c          |  4 ++
 arch/arm/kernel/smp.c              |  2 +
 arch/arm/mm/proc-macros.S          |  3 +-
 11 files changed, 105 insertions(+), 3 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ff3e64ae959e..24c756299d48 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1157,6 +1157,11 @@ config SMP_ON_UP
 
 	  If you don't know what to do here, say Y.
 
+
+config CURRENT_POINTER_IN_TPIDRURO
+	def_bool y
+	depends on SMP && (CPU_V6K || CPU_V7) && !CPU_V6
+
 config ARM_CPU_TOPOLOGY
 	bool "Support cpu topology definition"
 	depends on SMP && CPU_V7
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index e2b1fd558bf3..c1551dee28be 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -199,6 +199,30 @@
 	.endm
 	.endr
 
+	.macro	get_current, rd
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+	mrc	p15, 0, \rd, c13, c0, 3		@ get TPIDRURO register
+#else
+	get_thread_info \rd
+	ldr	\rd, [\rd, #TI_TASK]
+#endif
+	.endm
+
+	.macro	set_current, rn
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+	mcr	p15, 0, \rn, c13, c0, 3		@ set TPIDRURO register
+#endif
+	.endm
+
+	.macro	reload_current, t1:req, t2:req
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+	adr_l	\t1, __entry_task		@ get __entry_task base address
+	mrc	p15, 0, \t2, c13, c0, 4		@ get per-CPU offset
+	ldr	\t1, [\t1, \t2]			@ load variable
+	mcr	p15, 0, \t1, c13, c0, 3		@ store in TPIDRURO
+#endif
+	.endm
+
 /*
  * Get current thread_info.
  */
diff --git a/arch/arm/include/asm/current.h b/arch/arm/include/asm/current.h
new file mode 100644
index 000000000000..adce38a4c798
--- /dev/null
+++ b/arch/arm/include/asm/current.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021 Keith Packard <keithp at keithp.com>
+ * Copyright (c) 2021 Google, LLC <ardb at kernel.org>
+ */
+
+#ifndef _ASM_ARM_CURRENT_H
+#define _ASM_ARM_CURRENT_H
+
+#ifndef __ASSEMBLY__
+
+struct task_struct;
+
+static inline void set_current(struct task_struct *cur)
+{
+	if (!IS_ENABLED(CONFIG_CURRENT_POINTER_IN_TPIDRURO))
+		return;
+
+	/* Set TPIDRURO */
+	asm("mcr p15, 0, %0, c13, c0, 3" :: "r"(cur) : "memory");
+}
+
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+
+static inline struct task_struct *get_current(void)
+{
+	struct task_struct *cur;
+
+#if defined(CONFIG_CC_IS_GCC) && __has_builtin(__builtin_thread_pointer)
+	/*
+	 * Use the __builtin helper when available - this results in better
+	 * code, especially in combination with the per-task stack protector,
+	 * as GCC will recognize that it needs to load the TLS register only
+	 * once in every function.  Clang would generate a call to
+	 * __aeabi_read_tp() here, and does not support the plugin anyway.
+	 */
+	cur = __builtin_thread_pointer();
+#else
+	asm("mrc p15, 0, %0, c13, c0, 3" : "=r"(cur));
+#endif
+	return cur;
+}
+
+#define current get_current()
+#else
+#include <asm-generic/current.h>
+#endif /* CONFIG_CURRENT_POINTER_IN_TPIDRURO */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_ARM_CURRENT_H */
diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h
index 007d8fea7157..61e4a3c4ca6e 100644
--- a/arch/arm/include/asm/switch_to.h
+++ b/arch/arm/include/asm/switch_to.h
@@ -26,6 +26,8 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info
 #define switch_to(prev,next,last)					\
 do {									\
 	__complete_pending_tlbi();					\
+	if (IS_ENABLED(CONFIG_CURRENT_POINTER_IN_TPIDRURO))		\
+		__this_cpu_write(__entry_task, next);			\
 	last = __switch_to(prev,task_thread_info(prev), task_thread_info(next));	\
 } while (0)
 
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index f0cacc733231..76b6fbd5540c 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -29,6 +29,8 @@
 
 struct task_struct;
 
+DECLARE_PER_CPU(struct task_struct *, __entry_task);
+
 #include <asm/types.h>
 
 struct cpu_context_save {
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 241b73d64df7..aedcaa329fc9 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -384,6 +384,8 @@ ENDPROC(__fiq_abt)
  ATRAP(	teq	r8, r7)
  ATRAP( mcrne	p15, 0, r8, c1, c0, 0)
 
+	reload_current r7, r8
+
 	@
 	@ Clear FP to mark the first stack frame
 	@
@@ -767,13 +769,18 @@ ENTRY(__switch_to)
 	mcr	p15, 0, r6, c3, c0, 0		@ Set domain register
 #endif
 	mov	r5, r0
-	add	r4, r2, #TI_CPU_SAVE
+	mov	r4, r2
 	ldr	r0, =thread_notify_head
 	mov	r1, #THREAD_NOTIFY_SWITCH
 	bl	atomic_notifier_call_chain
 #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP)
 	str	r7, [r8]
 #endif
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+	ldr	ip, [r4, #TI_TASK]
+	set_current ip
+#endif
+	add	r4, r4, #TI_CPU_SAVE
  THUMB(	mov	ip, r4			   )
 	mov	r0, r5
  ARM(	ldmia	r4, {r4 - sl, fp, sp, pc}  )	@ Load all regs saved previously
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index d9c99db50243..ac86c34682bb 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -170,6 +170,7 @@ ENTRY(vector_swi)
 	str	saved_psr, [sp, #S_PSR]		@ Save CPSR
 	str	r0, [sp, #S_OLD_R0]		@ Save OLD_R0
 #endif
+	reload_current r10, ip
 	zero_fp
 	alignment_trap r10, ip, __cr_alignment
 	asm_trace_hardirqs_on save=0
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 29b2eda136bb..da18e0a17dc2 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -105,6 +105,11 @@ __mmap_switched:
 	mov	r1, #0
 	bl	__memset			@ clear .bss
 
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+	adr_l	r0, init_task			@ get swapper task_struct
+	set_current r0
+#endif
+
 	ldmia	r4, {r0, r1, r2, r3}
 	str	r9, [r0]			@ Save processor ID
 	str	r7, [r1]			@ Save machine type
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index cd73c216b272..30428d756515 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -36,6 +36,10 @@
 
 #include "signal.h"
 
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+DEFINE_PER_CPU(struct task_struct *, __entry_task);
+#endif
+
 #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
 #include <linux/stackprotector.h>
 unsigned long __stack_chk_guard __read_mostly;
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 8979d548ec17..97ee6b1567e9 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -409,6 +409,8 @@ asmlinkage void secondary_start_kernel(struct task_struct *task)
 	struct mm_struct *mm = &init_mm;
 	unsigned int cpu;
 
+	set_current(task);
+
 	secondary_biglittle_init();
 
 	/*
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index e2c743aa2eb2..d48ba99d739c 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -30,8 +30,7 @@
  * act_mm - get current->active_mm
  */
 	.macro	act_mm, rd
-	get_thread_info \rd
-	ldr	\rd, [\rd, #TI_TASK]
+	get_current \rd
 	.if (TSK_ACTIVE_MM > IMM12_MASK)
 	add	\rd, \rd, #TSK_ACTIVE_MM & ~IMM12_MASK
 	.endif
-- 
2.30.2




More information about the linux-arm-kernel mailing list