[PATCH RFC 3/3] arm64: Add HOTPLUG_PARALLEL support for secondary CPUs

Jinjie Ruan ruanjinjie at huawei.com
Thu Jun 11 06:38:09 PDT 2026


Support for parallel secondary CPU bringup is already utilized by x86,
MIPS, and RISC-V. This patch brings this capability to the arm64
architecture.

Rework the global `secondary_data` accessed during early boot into
a per-CPU array. This array maps logical CPU IDs to MPIDR_EL1 values,
enabling the early boot code in head.S to resolve each secondary CPU's
logical ID concurrently.

To fully enable HOTPLUG_PARALLEL, this patch implements:
1) An arm64-specific arch_cpuhp_kick_ap_alive() handler.
2) Callbacks to cpuhp_ap_sync_alive() inside secondary_start_kernel().

Successfully tested on QEMU ARM64 virt machine (KVM on, 128 vCPUs).

|     test kernel	   | secondary CPUs boot time |
|  ---------------------   |	--------------------  |
|   Without this patch     |		155.672	      |
|   cpuhp.parallel=0	   |		62.897	      |
|   cpuhp.parallel=1	   |		166.703	      |

Signed-off-by: Jinjie Ruan <ruanjinjie at huawei.com>
---
 arch/arm64/Kconfig           |  1 +
 arch/arm64/include/asm/smp.h |  8 ++++++++
 arch/arm64/kernel/head.S     | 23 +++++++++++++++++++++++
 arch/arm64/kernel/smp.c      | 27 +++++++++++++++++++++++++++
 4 files changed, 59 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9091c67e1cc2..8735e9d8ed13 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -113,6 +113,7 @@ config ARM64
 	select CPUMASK_OFFSTACK if NR_CPUS > 256
 	select DCACHE_WORD_ACCESS
 	select HAVE_EXTRA_IPI_TRACEPOINTS
+	select HOTPLUG_PARALLEL if SMP && HOTPLUG_CPU
 	select DYNAMIC_FTRACE if FUNCTION_TRACER
 	select DMA_BOUNCE_UNALIGNED_KMALLOC
 	select DMA_DIRECT_REMAP
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index e2151a01731f..c39d4c0f8a37 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -92,6 +92,10 @@ struct secondary_data {
 	long status;
 };
 
+#ifdef CONFIG_HOTPLUG_PARALLEL
+extern struct secondary_data cpu_boot_data[NR_CPUS];
+#endif
+
 extern struct secondary_data secondary_data;
 extern long __early_cpu_boot_status;
 extern void secondary_entry(void);
@@ -124,7 +128,11 @@ static inline void __noreturn cpu_park_loop(void)
 
 static inline void update_cpu_boot_status(unsigned int cpu, int val)
 {
+#ifdef CONFIG_HOTPLUG_PARALLEL
+	WRITE_ONCE(cpu_boot_data[cpu].status, val);
+#else
 	WRITE_ONCE(secondary_data.status, val);
+#endif
 	/* Ensure the visibility of the status update */
 	dsb(ishst);
 }
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 87a822e5c4ca..8f6c1c5e66d2 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -12,6 +12,7 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <linux/pgtable.h>
+#include <linux/threads.h>
 
 #include <asm/asm_pointer_auth.h>
 #include <asm/assembler.h>
@@ -391,7 +392,29 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
 	msr	vbar_el1, x5
 	isb
 
+#ifdef CONFIG_HOTPLUG_PARALLEL
+	mrs	x0, mpidr_el1
+	mov_q	x1, MPIDR_HWID_BITMASK
+	and	x0, x0, x1
+
+	adr_l	x1, __mpidr_to_cpuid_table
+	mov	x19, #0
+.Lfind_cpuid:
+	ldr	x3, [x1, x19, lsl #3]
+	cmp	x3, x0
+	b.eq	.Lfound_cpuid
+	add	x19, x19, #1
+	cmp	x19, #NR_CPUS
+	b.ne	.Lfind_cpuid
+	b	.
+
+.Lfound_cpuid:
+	adr_l	x0, cpu_boot_data
+	lsl	x3, x19, #4
+	add	x0, x0, x3
+#else
 	adr_l	x0, secondary_data
+#endif
 	ldr	x2, [x0, #CPU_BOOT_TASK]
 	cbz	x2, __secondary_too_slow
 
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 6bc90ee4820a..c909bf1c5119 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -61,6 +61,12 @@
  * where to place its SVC stack
  */
 struct secondary_data secondary_data;
+
+#ifdef CONFIG_HOTPLUG_PARALLEL
+struct secondary_data cpu_boot_data[NR_CPUS] ____cacheline_aligned __ro_after_init;
+unsigned long __mpidr_to_cpuid_table[NR_CPUS] ____cacheline_aligned;
+#endif
+
 /* Number of CPUs which aren't online, but looping in kernel text. */
 static int cpus_stuck_in_kernel;
 
@@ -106,8 +112,19 @@ static int boot_secondary(unsigned int cpu, struct task_struct *idle)
 	return -EOPNOTSUPP;
 }
 
+#ifndef CONFIG_HOTPLUG_PARALLEL
 static DECLARE_COMPLETION(cpu_running);
+#endif
+
+#ifdef CONFIG_HOTPLUG_PARALLEL
+int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle)
+{
+	cpu_boot_data[cpu].task = tidle;
+	update_cpu_boot_status(cpu, CPU_MMU_OFF);
 
+	return boot_secondary(cpu, tidle);
+}
+#else
 int __cpu_up(unsigned int cpu, struct task_struct *idle)
 {
 	int ret;
@@ -172,6 +189,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 
 	return -EIO;
 }
+#endif /* CONFIG_HOTPLUG_PARALLEL */
 
 static void init_gic_priority_masking(void)
 {
@@ -206,6 +224,10 @@ asmlinkage notrace void secondary_start_kernel(void)
 	mmgrab(mm);
 	current->active_mm = mm;
 
+#ifdef CONFIG_HOTPLUG_PARALLEL
+	cpuhp_ap_sync_alive();
+#endif
+
 	/*
 	 * TTBR0 is only used for the identity mapping at this stage. Make it
 	 * point to zero page to avoid speculatively fetching new entries.
@@ -254,7 +276,9 @@ asmlinkage notrace void secondary_start_kernel(void)
 					 read_cpuid_id());
 	update_cpu_boot_status(cpu, CPU_BOOT_SUCCESS);
 	set_cpu_online(cpu, true);
+#ifndef CONFIG_HOTPLUG_PARALLEL
 	complete(&cpu_running);
+#endif
 
 	/*
 	 * Secondary CPUs enter the kernel with all DAIF exceptions masked.
@@ -762,6 +786,9 @@ void __init smp_init_cpus(void)
 	 */
 	for (i = 1; i < nr_cpu_ids; i++) {
 		if (cpu_logical_map(i) != INVALID_HWID) {
+#ifdef CONFIG_HOTPLUG_PARALLEL
+			__mpidr_to_cpuid_table[i] = cpu_logical_map(i);
+#endif
 			if (smp_cpu_setup(i))
 				set_cpu_logical_map(i, INVALID_HWID);
 		}
-- 
2.34.1




More information about the linux-arm-kernel mailing list