[PATCH v2 4/4] arm64: Add HOTPLUG_PARALLEL support for secondary CPUs

Jinjie Ruan ruanjinjie at huawei.com
Thu Jun 18 02:24:44 PDT 2026


Support for parallel secondary CPU bringup is already utilized by x86,
MIPS, and RISC-V. This patch brings this capability to the arm64
architecture.

Rework the global `secondary_data` accessed during early boot into
a per-CPU array `cpu_boot_data` to allow secondary CPUs to boot
in parallel.

And reuse `__cpu_logical_map` array in the early boot code in head.S
to resolve each secondary CPU's logical ID concurrently.

To fully enable HOTPLUG_PARALLEL, this patch implements:
1) An arm64-specific arch_cpuhp_init_parallel_bringup() handler.
2) An arm64-specific arch_cpuhp_kick_ap_alive() handler.
3) Callbacks to cpuhp_ap_sync_alive() inside secondary_start_kernel().

Tested natively with ATF on QEMU arm64 virt machine with 64 cores
and also tested with KVM arm64 guest with 128 vCPUs.

Bringup Time Comparison (ms, lower is better):

|     Platform			   | Baseline|   P=0   |   P=1  | Delta(%)|
| -------------------------------- | ------- | ------- | ------ | ------- |
| 128 vCPUs KVM (256-core HIP09)   | 1921.5  | 1895.9  | 2776.9 | -44.52% |
| 48 vCPUs KVM (64-core Cortex-A72)| 3644.7  | 3883.6  | 4406.3 | -20.9%  |
| 64-core ATF QEMU		   | 2075.8  | 2080.7  | 1653.4 | 20.34%  |
| 192-core server(HIP12)	   | 14619.2 | 14619.1 | 8589.4 | 41.21%  |
| 32-core board			   | 2776.5  | 2881.0  | 1045.0 | 62.36%  |

Tested-by: Michael Kelley <mhklinux at outlook.com>
Signed-off-by: Jinjie Ruan <ruanjinjie at huawei.com>
---
 arch/arm64/Kconfig              |  1 +
 arch/arm64/include/asm/smp.h    | 11 ++++++++++
 arch/arm64/kernel/asm-offsets.c |  4 ++++
 arch/arm64/kernel/head.S        | 36 +++++++++++++++++++++++++++++++++
 arch/arm64/kernel/smp.c         | 33 ++++++++++++++++++++++++++++++
 5 files changed, 85 insertions(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9091c67e1cc2..8735e9d8ed13 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -113,6 +113,7 @@ config ARM64
 	select CPUMASK_OFFSTACK if NR_CPUS > 256
 	select DCACHE_WORD_ACCESS
 	select HAVE_EXTRA_IPI_TRACEPOINTS
+	select HOTPLUG_PARALLEL if SMP && HOTPLUG_CPU
 	select DYNAMIC_FTRACE if FUNCTION_TRACER
 	select DMA_BOUNCE_UNALIGNED_KMALLOC
 	select DMA_DIRECT_REMAP
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index e2151a01731f..30025030489c 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -92,7 +92,14 @@ struct secondary_data {
 	long status;
 };
 
+#ifdef CONFIG_HOTPLUG_PARALLEL
+static_assert((sizeof(struct secondary_data) & (sizeof(struct secondary_data) - 1)) == 0,
+	      "secondary_data size must be a power of 2 for assembly lsl assembly!");
+
+extern struct secondary_data cpu_boot_data[NR_CPUS];
+#else
 extern struct secondary_data secondary_data;
+#endif
 extern long __early_cpu_boot_status;
 extern void secondary_entry(void);
 
@@ -124,7 +131,11 @@ static inline void __noreturn cpu_park_loop(void)
 
 static inline void update_cpu_boot_status(unsigned int cpu, int val)
 {
+#ifdef CONFIG_HOTPLUG_PARALLEL
+	WRITE_ONCE(cpu_boot_data[cpu].status, val);
+#else
 	WRITE_ONCE(secondary_data.status, val);
+#endif
 	/* Ensure the visibility of the status update */
 	dsb(ishst);
 }
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index b6367ff3a49c..0a0aa965dbb4 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -11,6 +11,7 @@
 #include <linux/arm_sdei.h>
 #include <linux/sched.h>
 #include <linux/ftrace.h>
+#include <linux/log2.h>
 #include <linux/kexec.h>
 #include <linux/mm.h>
 #include <linux/kvm_host.h>
@@ -97,6 +98,9 @@ int main(void)
   BLANK();
 #endif
   DEFINE(CPU_BOOT_TASK,		offsetof(struct secondary_data, task));
+#ifdef CONFIG_HOTPLUG_PARALLEL
+  DEFINE(SECONDARY_DATA_SHIFT,	ilog2(sizeof(struct secondary_data)));
+#endif
   BLANK();
   DEFINE(FTR_OVR_VAL_OFFSET,	offsetof(struct arm64_ftr_override, val));
   DEFINE(FTR_OVR_MASK_OFFSET,	offsetof(struct arm64_ftr_override, mask));
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 87a822e5c4ca..000ab1acf7c5 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -12,6 +12,7 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <linux/pgtable.h>
+#include <linux/threads.h>
 
 #include <asm/asm_pointer_auth.h>
 #include <asm/assembler.h>
@@ -378,6 +379,33 @@ alternative_else_nop_endif
 	br	x8
 SYM_FUNC_END(secondary_startup)
 
+#ifdef CONFIG_HOTPLUG_PARALLEL
+	/*
+	 * Convert the physical MPIDR of the current secondary CPU
+	 * to its logical CPUID by traversing __cpu_logical_map
+	 * in parallel.
+	 */
+	.macro	mpidr_to_cpuid, mpidr, cpuid, tmp1, tmp2
+	mov_q	\tmp1, MPIDR_HWID_BITMASK
+	and	\mpidr, \mpidr, \tmp1
+
+	adr_l	\tmp1, __cpu_logical_map
+	mov	\cpuid, #0
+.Lfind_cpuid\@:
+	ldr	\tmp2, [\tmp1, \cpuid, lsl #3]
+	cmp	\tmp2, #-1
+	b.eq	.Lnext_cpu\@
+	cmp	\tmp2, \mpidr
+	b.eq	.Lfound_cpuid\@
+.Lnext_cpu\@:
+	add	\cpuid, \cpuid, #1
+	cmp	\cpuid, #NR_CPUS
+	b.ne	.Lfind_cpuid\@
+	b	__secondary_too_slow
+.Lfound_cpuid\@:
+	.endm
+#endif
+
 	.text
 SYM_FUNC_START_LOCAL(__secondary_switched)
 	mov	x0, x20
@@ -391,7 +419,15 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
 	msr	vbar_el1, x5
 	isb
 
+#ifdef CONFIG_HOTPLUG_PARALLEL
+	mrs	x0, mpidr_el1
+	mpidr_to_cpuid mpidr=x0, cpuid=x2, tmp1=x1, tmp2=x3
+
+	adr_l	x0, cpu_boot_data
+	add	x0, x0, x2, lsl #SECONDARY_DATA_SHIFT
+#else
 	adr_l	x0, secondary_data
+#endif
 	ldr	x2, [x0, #CPU_BOOT_TASK]
 	cbz	x2, __secondary_too_slow
 
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 52edabc13d51..f7562c38d724 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -61,6 +61,11 @@
  * where to place its SVC stack
  */
 struct secondary_data secondary_data;
+
+#ifdef CONFIG_HOTPLUG_PARALLEL
+struct secondary_data cpu_boot_data[NR_CPUS] ____cacheline_aligned;
+#endif
+
 /* Number of CPUs which aren't online, but looping in kernel text. */
 static int cpus_stuck_in_kernel;
 
@@ -106,8 +111,30 @@ static int boot_secondary(unsigned int cpu, struct task_struct *idle)
 	return -EOPNOTSUPP;
 }
 
+#ifndef CONFIG_HOTPLUG_PARALLEL
 static DECLARE_COMPLETION(cpu_running);
+#endif
+
+#ifdef CONFIG_HOTPLUG_PARALLEL
+extern const struct cpu_operations smp_spin_table_ops;
+
+/* Establish whether parallel bringup can be supported. */
+bool __init arch_cpuhp_init_parallel_bringup(void)
+{
+	int cpu = smp_processor_id();
+	const struct cpu_operations *ops = get_cpu_ops(cpu);
 
+	return ops && ops != &smp_spin_table_ops;
+}
+
+int arch_cpuhp_kick_ap_alive(unsigned int cpu, struct task_struct *tidle)
+{
+	cpu_boot_data[cpu].task = tidle;
+	update_cpu_boot_status(cpu, CPU_MMU_OFF);
+
+	return boot_secondary(cpu, tidle);
+}
+#else
 int __cpu_up(unsigned int cpu, struct task_struct *idle)
 {
 	int ret;
@@ -172,6 +199,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 
 	return -EIO;
 }
+#endif /* CONFIG_HOTPLUG_PARALLEL */
 
 static void init_gic_priority_masking(void)
 {
@@ -223,6 +251,9 @@ asmlinkage notrace void secondary_start_kernel(void)
 	 * fail to come online.
 	 */
 	check_local_cpu_capabilities();
+#ifdef CONFIG_HOTPLUG_PARALLEL
+	cpuhp_ap_sync_alive();
+#endif
 	rcutree_report_cpu_starting(cpu);
 
 	ops = get_cpu_ops(cpu);
@@ -254,7 +285,9 @@ asmlinkage notrace void secondary_start_kernel(void)
 					 read_cpuid_id());
 	update_cpu_boot_status(cpu, CPU_BOOT_SUCCESS);
 	set_cpu_online(cpu, true);
+#ifndef CONFIG_HOTPLUG_PARALLEL
 	complete(&cpu_running);
+#endif
 
 	/*
 	 * Secondary CPUs enter the kernel with all DAIF exceptions masked.
-- 
2.34.1




More information about the linux-riscv mailing list