[PATCH] arm64: smp: Add support for cpu park

Sang Yan sangyan at huawei.com
Sat Dec 19 00:23:17 EST 2020


Introducing a feature of CPU PARK in order to save time
of cpus down and up during kexec, which may cost 250ms of
per cpu's down and 30ms of up.

As a result, for 128 cores, it costs more than 30 seconds
to down and up cpus during kexec. Think about 256 cores and more.

CPU PARK is a state that cpu power-on and staying in spin loop, polling
for exit chances, such as writing exit address.

Reserving a block of memory, to fill with cpu park text section,
exit address and park-magic-flag of each cpu. In implementation,
reserved one page for one cpu core.

Cpus going to park state instead of down in machine_shutdown().
Cpus going out of park state in smp_init instead of brought up.

One of cpu park sections in pre-reserved memory blocks,:
+--------------+
+ exit address +
+--------------+
+ park magic   +
+--------------+
+ park codes   +
+      .       +
+      .       +
+      .       +
+--------------+

Signed-off-by: Sang Yan <sangyan at huawei.com>
---
 arch/arm64/Kconfig                |  12 +++
 arch/arm64/include/asm/cpu.h      |   9 ++
 arch/arm64/include/asm/kexec.h    |   7 ++
 arch/arm64/include/asm/smp.h      |   3 +
 arch/arm64/kernel/Makefile        |   1 +
 arch/arm64/kernel/cpu-park.S      |  49 +++++++++
 arch/arm64/kernel/machine_kexec.c |   2 +-
 arch/arm64/kernel/process.c       |   4 +
 arch/arm64/kernel/smp.c           | 220 ++++++++++++++++++++++++++++++++++++++
 arch/arm64/mm/init.c              |  56 ++++++++++
 10 files changed, 362 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm64/kernel/cpu-park.S

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9f0139b..7a9defd 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -347,6 +347,18 @@ config KASAN_SHADOW_OFFSET
 	default 0xeffffff800000000 if ARM64_VA_BITS_36 && KASAN_SW_TAGS
 	default 0xffffffffffffffff
 
+config ARM64_CPU_PARK
+	bool "Support CPU PARK on kexec"
+	depends on SMP
+	depends on KEXEC_CORE
+	help
+	 This enables support for CPU PARK feature in
+	 order to save time of cpu down to up.
+	 CPU park is a state through kexec, spin loop
+	 instead of cpu die before jumping to new kernel,
+	 jumping out from loop to new kernel entry in
+	 smp_init.
+
 source "arch/arm64/Kconfig.platforms"
 
 menu "Kernel Features"
diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 7faae6f..e616a50 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -68,4 +68,13 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info);
 void update_cpu_features(int cpu, struct cpuinfo_arm64 *info,
 				 struct cpuinfo_arm64 *boot);
 
+#ifdef CONFIG_ARM64_CPU_PARK
+#define PARK_SECTION_SIZE PAGE_SIZE
+extern unsigned long park_start;
+extern unsigned long park_len;
+extern unsigned long park_start_v;
+extern void __cpu_park(unsigned long text, unsigned long exit);
+extern void __do_cpu_park(unsigned long exit);
+#endif
+
 #endif /* __ASM_CPU_H */
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
index d24b527..69a66ca 100644
--- a/arch/arm64/include/asm/kexec.h
+++ b/arch/arm64/include/asm/kexec.h
@@ -25,6 +25,11 @@
 
 #define KEXEC_ARCH KEXEC_ARCH_AARCH64
 
+#ifdef CONFIG_ARM64_CPU_PARK
+/* CPU park state flag: "park" */
+#define PARK_MAGIC 0x7061726b
+#endif
+
 #ifndef __ASSEMBLY__
 
 /**
@@ -90,6 +95,8 @@ static inline void crash_prepare_suspend(void) {}
 static inline void crash_post_resume(void) {}
 #endif
 
+void machine_kexec_mask_interrupts(void);
+
 #ifdef CONFIG_KEXEC_FILE
 #define ARCH_HAS_KIMAGE_ARCH
 
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index 2e7f529..9141fa8 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -145,6 +145,9 @@ bool cpus_are_stuck_in_kernel(void);
 
 extern void crash_smp_send_stop(void);
 extern bool smp_crash_stop_failed(void);
+#ifdef CONFIG_ARM64_CPU_PARK
+extern int kexec_smp_send_park(void);
+#endif
 
 #endif /* ifndef __ASSEMBLY__ */
 
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 86364ab..7ea26ab 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -51,6 +51,7 @@ obj-$(CONFIG_RANDOMIZE_BASE)		+= kaslr.o
 obj-$(CONFIG_HIBERNATION)		+= hibernate.o hibernate-asm.o
 obj-$(CONFIG_KEXEC_CORE)		+= machine_kexec.o relocate_kernel.o	\
 					   cpu-reset.o
+obj-$(CONFIG_ARM64_CPU_PARK)		+= cpu-park.o
 obj-$(CONFIG_KEXEC_FILE)		+= machine_kexec_file.o kexec_image.o
 obj-$(CONFIG_ARM64_RELOC_TEST)		+= arm64-reloc-test.o
 arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
diff --git a/arch/arm64/kernel/cpu-park.S b/arch/arm64/kernel/cpu-park.S
new file mode 100644
index 00000000..8c01484
--- /dev/null
+++ b/arch/arm64/kernel/cpu-park.S
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * CPU park routines
+ *
+ * Copyright (C) 2020 Huawei Technologies., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/kexec.h>
+#include <asm/sysreg.h>
+#include <asm/virt.h>
+
+.text
+.pushsection    .idmap.text, "awx"
+
+/* cpu park helper in idmap section */
+SYM_CODE_START(__cpu_park)
+	/* Clear sctlr_el1 flags. */
+	mrs	x12, sctlr_el1
+	ldr	x13, =SCTLR_ELx_FLAGS
+	bic	x12, x12, x13
+	pre_disable_mmu_workaround
+	msr	sctlr_el1, x12		/* disable mmu */
+	isb
+
+	mov	x18, x0
+	mov	x0, x1			/* secondary_entry addr */
+	br	x18			/* call __do_cpu_park of each cpu */
+SYM_CODE_END(__cpu_park)
+
+.popsection
+
+SYM_CODE_START(__do_cpu_park)
+	ldr	x18, =PARK_MAGIC	/* magic number "park" */
+	str	x18, [x0, #8]		/* set on-park flag */
+
+.Lloop:	ldr	x19, [x0]
+	cmp	x19, #0			/* test secondary_entry */
+	wfe
+	b.eq	.Lloop
+
+	br	x19			/* jump to secondary_entry */
+SYM_CODE_END(__do_cpu_park)
+
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
index a0b144c..f47ce96 100644
--- a/arch/arm64/kernel/machine_kexec.c
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -213,7 +213,7 @@ void machine_kexec(struct kimage *kimage)
 	BUG(); /* Should never get here. */
 }
 
-static void machine_kexec_mask_interrupts(void)
+void machine_kexec_mask_interrupts(void)
 {
 	unsigned int i;
 	struct irq_desc *desc;
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 6616486..70d8dac 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -147,6 +147,10 @@ void arch_cpu_idle_dead(void)
  */
 void machine_shutdown(void)
 {
+#ifdef CONFIG_ARM64_CPU_PARK
+	if (kexec_smp_send_park() == 0)
+		return;
+#endif
 	smp_shutdown_nonboot_cpus(reboot_cpu);
 }
 
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 2499b89..80a7a28 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -93,6 +93,159 @@ static inline int op_cpu_kill(unsigned int cpu)
 }
 #endif
 
+#ifdef CONFIG_ARM64_CPU_PARK
+struct cpu_park_section {
+	unsigned long exit;	/* exit address of park look */
+	unsigned long magic;	/* maigc represent park state */
+	char text[0];		/* text section of park */
+};
+
+static int mmap_cpu_park_mem(void)
+{
+	if (!park_start)
+		return -ENOMEM;
+
+	if (park_start_v)
+		return 0;
+
+	park_start_v = (unsigned long)__ioremap(park_start, park_len,
+						PAGE_KERNEL_EXEC);
+	if (!park_start_v) {
+		pr_warn("map park memory failed.");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static inline unsigned long cpu_park_section_v(unsigned int cpu)
+{
+	return PAGE_ALIGN(park_start_v + PARK_SECTION_SIZE * (cpu - 1));
+}
+
+static inline unsigned long cpu_park_section_p(unsigned int cpu)
+{
+	return PAGE_ALIGN(park_start + PARK_SECTION_SIZE * (cpu - 1));
+}
+
+/*
+ * Write the secondary_entry to exit section of park state.
+ * Then the secondary cpu will jump straight into the kernel
+ * by the secondary_entry.
+ */
+static int write_park_exit(unsigned int cpu)
+{
+	struct cpu_park_section *park_section;
+	unsigned long *park_exit;
+	unsigned long *park_text;
+
+	if (mmap_cpu_park_mem() != 0)
+		return -EPERM;
+
+	park_section = (struct cpu_park_section *)cpu_park_section_v(cpu);
+	park_exit = &park_section->exit;
+	park_text = (unsigned long *)park_section->text;
+	pr_info("park_text 0x%lx : 0x%lx, __do_cpu_park text 0x%lx : 0x%lx",
+		(unsigned long)park_text, *park_text,
+		(unsigned long)__do_cpu_park,
+		*(unsigned long *)__do_cpu_park);
+
+	isb();
+
+	/*
+	 * Test first 8 bytes to determine
+	 * whether needs to write cpu park exit.
+	 */
+	if (*park_text == *(unsigned long *)__do_cpu_park) {
+		*park_exit = __pa_symbol(secondary_entry);
+		isb();
+		pr_info("Write cpu %u secondary entry 0x%lx to 0x%lx",
+			 cpu, *park_exit, (unsigned long)park_exit);
+		return 0;
+	}
+
+	return -EPERM;
+}
+
+/* Install cpu park sections for the specific cpu. */
+static int install_cpu_park(unsigned int cpu)
+{
+	struct cpu_park_section *park_section;
+	unsigned long *park_exit;
+	unsigned long *park_magic;
+	unsigned long park_text_len;
+
+	park_section = (struct cpu_park_section *)cpu_park_section_v(cpu);
+	pr_debug("Install cpu park on cpu %u park exit 0x%lx park text 0x%lx",
+		 cpu, (unsigned long)park_section,
+		 (unsigned long)(park_section->text));
+
+	park_exit = &park_section->exit;
+	park_magic = &park_section->magic;
+	park_text_len = PARK_SECTION_SIZE - sizeof(struct cpu_park_section);
+
+	*park_exit = 0UL;
+	*park_magic = 0UL;
+	memcpy((void *)park_section->text, __do_cpu_park, park_text_len);
+
+	isb();
+
+	return 0;
+}
+
+static int uninstall_cpu_park(unsigned int cpu)
+{
+	unsigned long park_section;
+
+	if (mmap_cpu_park_mem() != 0)
+		return -EPERM;
+
+	park_section = cpu_park_section_v(cpu);
+	memset((void *)park_section, 0, PARK_SECTION_SIZE);
+
+	return 0;
+}
+
+static int cpu_wait_park(unsigned int cpu)
+{
+	unsigned long timeout;
+	struct cpu_park_section *park_section;
+	unsigned long *park_magic;
+
+	park_section = (struct cpu_park_section *)cpu_park_section_v(cpu);
+	park_magic = &park_section->magic;
+
+	timeout = USEC_PER_SEC;
+	while (*park_magic != PARK_MAGIC && timeout--)
+		udelay(1);
+
+	if (timeout)
+		pr_debug("cpu %u park done.", cpu);
+	else
+		pr_err("cpu %u park failed.", cpu);
+
+	return *park_magic == PARK_MAGIC;
+}
+
+static void cpu_park(unsigned int cpu)
+{
+	unsigned long park_section_p;
+	unsigned long park_exit_phy;
+	unsigned long do_park;
+	typeof(__cpu_park) *park;
+
+	park_section_p = cpu_park_section_p(cpu);
+	park_exit_phy = park_section_p;
+	pr_debug("Go to park cpu %u exit address 0x%lx", cpu, park_exit_phy);
+
+	do_park = park_section_p + sizeof(struct cpu_park_section);
+	park = (void *)__pa_symbol(__cpu_park);
+
+	cpu_install_idmap();
+	park(do_park, park_exit_phy);
+	unreachable();
+}
+#endif
 
 /*
  * Boot a secondary CPU, and assign it the specified idle task.
@@ -102,6 +255,10 @@ static int boot_secondary(unsigned int cpu, struct task_struct *idle)
 {
 	const struct cpu_operations *ops = get_cpu_ops(cpu);
 
+#ifdef CONFIG_ARM64_CPU_PARK
+	if (write_park_exit(cpu) == 0)
+		return 0;
+#endif
 	if (ops->cpu_boot)
 		return ops->cpu_boot(cpu);
 
@@ -131,6 +288,9 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
 		return ret;
 	}
 
+#ifdef CONFIG_ARM64_CPU_PARK
+	uninstall_cpu_park(cpu);
+#endif
 	/*
 	 * CPU was successfully started, wait for it to come online or
 	 * time out.
@@ -843,10 +1003,31 @@ void arch_irq_work_raise(void)
 
 static void local_cpu_stop(void)
 {
+	int cpu;
+	const struct cpu_operations *ops = NULL;
+
 	set_cpu_online(smp_processor_id(), false);
 
 	local_daif_mask();
 	sdei_mask_local_cpu();
+
+#ifdef CONFIG_ARM64_CPU_PARK
+	/*
+	 * Go to cpu park state.
+	 * Otherwise go to cpu die.
+	 */
+	cpu = smp_processor_id();
+	if (kexec_in_progress) {
+		machine_kexec_mask_interrupts();
+		if (park_start_v)
+			cpu_park(cpu);
+
+		ops = get_cpu_ops(cpu);
+		if (ops && ops->cpu_die)
+			ops->cpu_die(cpu);
+	}
+#endif
+
 	cpu_park_loop();
 }
 
@@ -1052,6 +1233,45 @@ void smp_send_stop(void)
 	sdei_mask_local_cpu();
 }
 
+#ifdef CONFIG_ARM64_CPU_PARK
+int kexec_smp_send_park(void)
+{
+	unsigned long cpu;
+
+	if (WARN_ON(!kexec_in_progress)) {
+		pr_crit("%s called not in kexec progress.", __func__);
+		return -EPERM;
+	}
+
+	if (mmap_cpu_park_mem() != 0) {
+		pr_info("no cpuparkmem, goto normal way.");
+		return -EPERM;
+	}
+
+	local_irq_disable();
+
+	if (num_online_cpus() > 1) {
+		cpumask_t mask;
+
+		cpumask_copy(&mask, cpu_online_mask);
+		cpumask_clear_cpu(smp_processor_id(), &mask);
+
+		for_each_cpu(cpu, &mask)
+			install_cpu_park(cpu);
+		smp_cross_call(&mask, IPI_CPU_STOP);
+
+		/* Wait for other CPUs to park */
+		for_each_cpu(cpu, &mask)
+			cpu_wait_park(cpu);
+		pr_info("smp park other cpus done\n");
+	}
+
+	sdei_mask_local_cpu();
+
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_KEXEC_CORE
 void crash_smp_send_stop(void)
 {
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 69d4251..d25bb61 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -279,6 +279,58 @@ static void __init fdt_enforce_memory_region(void)
 		memblock_cap_memory_range(reg.base, reg.size);
 }
 
+#ifdef CONFIG_ARM64_CPU_PARK
+/* Physical address of reserved park memory. */
+unsigned long park_start;
+/* Virtual address of reserved park memory. */
+unsigned long park_start_v;
+/* park reserve mem len should be PAGE_SIZE * NR_CPUS */
+unsigned long park_len = PAGE_SIZE * NR_CPUS;
+
+static int __init parse_park_mem(char *p)
+{
+	if (!p)
+		return 0;
+
+	park_start = PAGE_ALIGN(memparse(p, NULL));
+	if (park_start == 0)
+		pr_info("cpu park mem params[%s]", p);
+
+	return 0;
+}
+early_param("cpuparkmem", parse_park_mem);
+
+static int __init reserve_park_mem(void)
+{
+	if (park_start == 0 || park_len == 0)
+		return 0;
+
+	park_start = PAGE_ALIGN(park_start);
+	park_len = PAGE_ALIGN(park_len);
+
+	if (!memblock_is_region_memory(park_start, park_len)) {
+		pr_warn("cannot reserve park mem: region is not memory!");
+		goto out;
+	}
+
+	if (memblock_is_region_reserved(park_start, park_len)) {
+		pr_warn("cannot reserve park mem: region overlaps reserved memory!");
+		goto out;
+	}
+
+	memblock_reserve(park_start, park_len);
+	memblock_remove(park_start, park_len);
+	pr_info("cpu park mem reserved: 0x%016lx - 0x%016lx (%ld MB)",
+		park_start, park_start + park_len, park_len >> 20);
+
+	return 0;
+out:
+	park_start = 0;
+	park_len = 0;
+	return -EINVAL;
+}
+#endif
+
 void __init arm64_memblock_init(void)
 {
 	const s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual);
@@ -396,6 +448,10 @@ void __init arm64_memblock_init(void)
 	else
 		arm64_dma32_phys_limit = PHYS_MASK + 1;
 
+#ifdef CONFIG_ARM64_CPU_PARK
+	reserve_park_mem();
+#endif
+
 	reserve_elfcorehdr();
 
 	high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
-- 
2.9.5




More information about the linux-arm-kernel mailing list