[PATCHv3] arm64: initialize per-cpu offsets earlier

Mark Rutland mark.rutland at arm.com
Tue Mar 2 11:53:35 GMT 2021


The current initialization of the per-cpu offset register is difficult
to follow and this initialization is not always early enough for
upcoming instrumentation with KCSAN, where the instrumentation callbacks
use the per-cpu offset.

To make it possible to support KCSAN, and to simplify reasoning about
early bringup code, let's initialize the per-cpu offset earlier, before
we run any C code that may consume it. To do so, this patch adds a new
init_this_cpu_offset() helper that's called before the usual
primary/secondary start functions. For consistency, this is also used to
re-initialize the per-cpu offset after the runtime per-cpu areas have
been allocated (which can change CPU0's offset).

So that init_this_cpu_offset() isn't subject to any instrumentation that
might consume the per-cpu offset:

* init_this_cpu_offset() is marked with noinstr to prevent any direct
  instrumentation.

* set_my_cpu_offset() is marked __always_inline so that this cannot be
  compiled as a non-inline function subject to instrumentation.

* We directly use `current->cpu` rather than `task_cpu()`, as the latter
  is subject to instrumentation.

Note: as per_cpu_offset() is a macro, this will not be instrumented
within a noinstr function.

Signed-off-by: Mark Rutland <mark.rutland at arm.com>
Cc: Catalin Marinas <catalin.marinas at arm.com>
Cc: James Morse <james.morse at arm.com>
Cc: Marco Elver <elver at google.com>
Cc: Qian Cai <cai at redhat.com>
Cc: Will Deacon <will at kernel.org>
---
 arch/arm64/include/asm/cpu.h    |  2 ++
 arch/arm64/include/asm/percpu.h |  2 +-
 arch/arm64/kernel/head.S        |  2 ++
 arch/arm64/kernel/setup.c       | 12 ++++++------
 arch/arm64/kernel/smp.c         | 13 ++++++++-----
 5 files changed, 19 insertions(+), 12 deletions(-)

Since v1 [1]:
* Fix typos
* Rebase atop v5.9-rc4

Since v2 [2]:
* Prevent instrumentation issues reported by Qian Cai
* Improve commit message and comments
* Rebase to v5.12-rc1

I've built this with and without KASAN_INLINE, inspected the results with
objdump, and given this basic boot testing, so I'm fairly confident we
shouldn't hit the boot-time hand Qian Cai previously hit.

There are still a few other things we'll need to sort out before we can enable
KCSAN on arm64, but it'd be nice to get this out of way soon if possible.

Mark.

[1] https://lore.kernel.org/r/20200730163806.23053-1-mark.rutland@arm.com
[2] https://lore.kernel.org/r/20201005164303.21389-1-mark.rutland@arm.com

diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 7faae6ff3ab4..d9d60b18e811 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -68,4 +68,6 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info);
 void update_cpu_features(int cpu, struct cpuinfo_arm64 *info,
 				 struct cpuinfo_arm64 *boot);
 
+void init_this_cpu_offset(void);
+
 #endif /* __ASM_CPU_H */
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 8f1661603b78..97cf71d18a7c 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -11,7 +11,7 @@
 #include <asm/cmpxchg.h>
 #include <asm/stack_pointer.h>
 
-static inline void set_my_cpu_offset(unsigned long off)
+static __always_inline void set_my_cpu_offset(unsigned long off)
 {
 	asm volatile(ALTERNATIVE("msr tpidr_el1, %0",
 				 "msr tpidr_el2, %0",
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 66b0e0b66e31..1db8a1f543fb 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -449,6 +449,7 @@ SYM_FUNC_START_LOCAL(__primary_switched)
 	add	sp, sp, #16
 	mov	x29, #0
 	mov	x30, #0
+	bl	init_this_cpu_offset
 	b	start_kernel
 SYM_FUNC_END(__primary_switched)
 
@@ -613,6 +614,7 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
 	ptrauth_keys_init_cpu x2, x3, x4, x5
 #endif
 
+	bl	init_this_cpu_offset
 	b	secondary_start_kernel
 SYM_FUNC_END(__secondary_switched)
 
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 61845c0821d9..46edcaa5e42b 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -87,12 +87,6 @@ void __init smp_setup_processor_id(void)
 	u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
 	set_cpu_logical_map(0, mpidr);
 
-	/*
-	 * clear __my_cpu_offset on boot CPU to avoid hang caused by
-	 * using percpu variable early, for example, lockdep will
-	 * access percpu variable inside lock_release
-	 */
-	set_my_cpu_offset(0);
 	pr_info("Booting Linux on physical CPU 0x%010lx [0x%08x]\n",
 		(unsigned long)mpidr, read_cpuid_id());
 }
@@ -296,6 +290,12 @@ u64 cpu_logical_map(unsigned int cpu)
 	return __cpu_logical_map[cpu];
 }
 
+void noinstr init_this_cpu_offset(void)
+{
+	unsigned int cpu = current->cpu;
+	set_my_cpu_offset(per_cpu_offset(cpu));
+}
+
 void __init __no_sanitize_address setup_arch(char **cmdline_p)
 {
 	init_mm.start_code = (unsigned long) _stext;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 357590beaabb..0c906e61a7fc 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -201,10 +201,7 @@ asmlinkage notrace void secondary_start_kernel(void)
 	u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
 	struct mm_struct *mm = &init_mm;
 	const struct cpu_operations *ops;
-	unsigned int cpu;
-
-	cpu = task_cpu(current);
-	set_my_cpu_offset(per_cpu_offset(cpu));
+	unsigned int cpu = smp_processor_id();
 
 	/*
 	 * All kernel threads share the same mm context; grab a
@@ -451,7 +448,13 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
 void __init smp_prepare_boot_cpu(void)
 {
-	set_my_cpu_offset(per_cpu_offset(smp_processor_id()));
+	/*
+	 * Now that setup_per_cpu_areas() has allocated the runtime per-cpu
+	 * areas we must reinitialize CPU0's offset to point to its runtime
+	 * area before the boot area is freed.
+	 */
+	init_this_cpu_offset();
+
 	cpuinfo_store_boot_cpu();
 
 	/*
-- 
2.11.0




More information about the linux-arm-kernel mailing list