[PATCH 7/8] arm64/kexec: Add core kexec support

Geoff Levand geoff at infradead.org
Thu May 8 17:48:17 PDT 2014


Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S, to the
arm64 architecture that add support for the kexec re-boot mechanism on arm64
(CONFIG_KEXEC).

This implementation supports re-boots of kernels with either PSCI or spin-table
enable methods, but with some limitations on the match of 1st and 2nd stage
kernels.  The known limitations are checked in the kexec_compat_check() routine,
which is called during a kexec_load syscall.  If any limitations are reached an
error is returned by the kexec_load syscall.  Many of the limitations can be
removed with some enhancment to the CPU shutdown management code in
machine_kexec.c.

Signed-off-by: Geoff Levand <geoff at infradead.org>
---
 MAINTAINERS                         |   9 +
 arch/arm64/Kconfig                  |   8 +
 arch/arm64/include/asm/kexec.h      |  44 +++
 arch/arm64/kernel/Makefile          |   1 +
 arch/arm64/kernel/machine_kexec.c   | 623 ++++++++++++++++++++++++++++++++++++
 arch/arm64/kernel/relocate_kernel.S | 239 ++++++++++++++
 include/uapi/linux/kexec.h          |   1 +
 7 files changed, 925 insertions(+)
 create mode 100644 arch/arm64/include/asm/kexec.h
 create mode 100644 arch/arm64/kernel/machine_kexec.c
 create mode 100644 arch/arm64/kernel/relocate_kernel.S

diff --git a/MAINTAINERS b/MAINTAINERS
index 1066264..bb666bb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5144,6 +5144,15 @@ F:	include/linux/kexec.h
 F:	include/uapi/linux/kexec.h
 F:	kernel/kexec.c
 
+KEXEC FOR ARM64
+M:	Geoff Levand <geoff at infradead.org>
+W:	http://kernel.org/pub/linux/utils/kernel/kexec/
+L:	kexec at lists.infradead.org
+L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	arch/arm64/machine_kexec.c
+F:	arch/arm64/relocate_kernel.S
+
 KEYS/KEYRINGS:
 M:	David Howells <dhowells at redhat.com>
 L:	keyrings at linux-nfs.org
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e759af5..dcd5ebc 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -244,6 +244,14 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE
 
 source "mm/Kconfig"
 
+config KEXEC
+	bool "kexec system call"
+	---help---
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is independent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
 config XEN_DOM0
 	def_bool y
 	depends on XEN
diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
new file mode 100644
index 0000000..41a6244
--- /dev/null
+++ b/arch/arm64/include/asm/kexec.h
@@ -0,0 +1,44 @@
+#ifndef _ARM64_KEXEC_H
+#define _ARM64_KEXEC_H
+
+#if defined(CONFIG_KEXEC)
+
+/* Maximum physical address we can use pages from */
+
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control code buffer */
+
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+#define KEXEC_CONTROL_PAGE_SIZE	4096
+
+#define KEXEC_ARCH KEXEC_ARCH_ARM64
+
+#if !defined(__ASSEMBLY__)
+
+/**
+ * crash_setup_regs() - save registers for the panic kernel
+ *
+ * @newregs: registers are saved here
+ * @oldregs: registers to be saved (may be %NULL)
+ */
+
+static inline void crash_setup_regs(struct pt_regs *newregs,
+				    struct pt_regs *oldregs)
+{
+}
+
+/* Function pointer to optional machine-specific reinitialization */
+
+extern void (*kexec_reinit)(void);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_KEXEC */
+
+#endif /* _ARM64_KEXEC_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 7d811d9..7272510 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -22,6 +22,7 @@ arm64-obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND)	+= sleep.o suspend.o
 arm64-obj-$(CONFIG_JUMP_LABEL)		+= jump_label.o
 arm64-obj-$(CONFIG_KGDB)		+= kgdb.o
+arm64-obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
 
 obj-y					+= $(arm64-obj-y) vdso/
 obj-m					+= $(arm64-obj-m)
diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c
new file mode 100644
index 0000000..62779e5
--- /dev/null
+++ b/arch/arm64/kernel/machine_kexec.c
@@ -0,0 +1,623 @@
+/*
+ * kexec for arm64
+ */
+
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/kexec.h>
+#include <linux/mm.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cpu_ops.h>
+#include <asm/system_misc.h>
+
+/* Global variables for the relocate_kernel routine. */
+
+extern const unsigned char relocate_new_kernel[];
+extern const unsigned long relocate_new_kernel_size;
+extern unsigned long kexec_signal_addr;
+extern unsigned long kexec_kimage_head;
+extern unsigned long kexec_dtb_addr;
+extern unsigned long kexec_kimage_start;
+extern unsigned long kexec_spinner_count;
+
+/* Global variables for the kexec_cpu_spin routine. */
+
+extern const unsigned char kexec_cpu_spin[];
+extern const unsigned long kexec_cpu_spin_size;
+
+void (*kexec_reinit)(void);
+
+/**
+ * struct kexec_cpu_info_spin - Info needed for the "spin table" enable method.
+ */
+
+struct kexec_cpu_info_spin {
+	phys_addr_t phy_release_addr; /* cpu order */
+};
+
+/**
+ * struct kexec_cpu_info - Info for a specific cpu in the device tree.
+ */
+
+struct kexec_cpu_info {
+	unsigned int cpu;
+	const struct cpu_operations *cpu_ops;
+	bool spinner;
+	struct kexec_cpu_info_spin spin;
+};
+
+/**
+ * struct kexec_dt_info - Device tree info needed by the local kexec routines.
+ */
+
+struct kexec_dt_info {
+	unsigned int cpu_count;
+	struct kexec_cpu_info *cpu_info;
+	unsigned int spinner_count;
+	phys_addr_t phy_memreserve_addr;
+	unsigned int memreserve_size;
+};
+
+/**
+ * struct kexec_ctx - Kexec runtime context.
+ *
+ * @dt_1st: Device tree info for the 1st stage kernel.
+ * @dt_2nd: Device tree info for the 2nd stage kernel.
+ */
+
+struct kexec_ctx {
+	struct kexec_dt_info dt_1st;
+	struct kexec_dt_info dt_2nd;
+};
+
+static struct kexec_ctx *ctx;
+
+/**
+ * kexec_spin_code_offset - Offset into memreserve area of the spin code.
+ */
+
+static const unsigned int kexec_spin_code_offset = PAGE_SIZE;
+
+/**
+ * kexec_is_dtb - Helper routine to check the device tree header signature.
+ */
+
+static int kexec_is_dtb(__be32 magic)
+{
+	int result = be32_to_cpu(magic) == OF_DT_HEADER;
+
+	return result;
+}
+
+/**
+ * kexec_is_dtb_user - Helper routine to check the device tree header signature.
+ */
+
+static int kexec_is_dtb_user(const void *dtb)
+{
+	__be32 magic;
+
+	get_user(magic, (__be32 *)dtb);
+
+	return kexec_is_dtb(magic);
+}
+
+/**
+ * kexec_find_dtb_seg - Helper routine to find the dtb segment.
+ */
+
+static const struct kexec_segment *kexec_find_dtb_seg(
+	const struct kimage *image)
+{
+	int i;
+
+	for (i = 0; i < image->nr_segments; i++) {
+		if (kexec_is_dtb_user(image->segment[i].buf))
+			return &image->segment[i];
+	}
+
+	return NULL;
+}
+
+/**
+ * kexec_copy_dtb - Helper routine to copy dtb from user space.
+ */
+
+static void *kexec_copy_dtb(const struct kexec_segment *seg)
+{
+	int result;
+	void *dtb;
+
+	BUG_ON(!seg && !seg->bufsz);
+
+	dtb = kmalloc(seg->bufsz, GFP_KERNEL);
+
+	if (!dtb) {
+		pr_debug("%s: out of memory", __func__);
+		return NULL;
+	}
+
+	result = copy_from_user(dtb, seg->buf, seg->bufsz);
+
+	if (result) {
+		kfree(dtb);
+		return NULL;
+	}
+
+	return dtb;
+}
+
+
+/**
+ * kexec_read_memreserve - Initialize memreserve info from a dtb.
+ */
+
+static int kexec_read_memreserve(const void *dtb, struct kexec_dt_info *info)
+{
+	const struct boot_param_header *h = dtb;
+	struct pair {
+		__be64 phy_addr;
+		__be64 size;
+	} const *pair;
+
+	pair = dtb + be32_to_cpu(h->off_mem_rsvmap);
+
+	if ((pair + 1)->size)
+		pr_warn("kexec: Multiple memreserve regions found.");
+
+	info->phy_memreserve_addr = be64_to_cpu(pair->phy_addr);
+	info->memreserve_size = be64_to_cpu(pair->size);
+
+	pr_debug("%s:%d: memreserve_addr:  %pa (%p)\n", __func__, __LINE__,
+		&info->phy_memreserve_addr,
+		phys_to_virt(info->phy_memreserve_addr));
+	pr_debug("%s:%d: memreserve_size:  %u (%xh)\n", __func__, __LINE__,
+		info->memreserve_size, info->memreserve_size);
+
+	return 0;
+}
+
+/**
+ * kexec_setup_cpu_spin - Initialize cpu spin info from a device tree cpu node.
+ */
+
+static int kexec_setup_cpu_spin(const struct device_node *dn,
+	struct kexec_cpu_info_spin *info)
+{
+	int result;
+	u64 t1;
+
+	memset(info, 0, sizeof(*info));
+
+	result = of_property_read_u64(dn, "cpu-release-addr", &t1);
+
+	if (result) {
+		pr_warn("kexec: Read cpu-release-addr failed.\n");
+		return result;
+	}
+
+	info->phy_release_addr = le64_to_cpu(t1);
+
+	return 0;
+}
+
+/**
+ * kexec_cpu_info_init - Initialize an array of kexec_cpu_info structures.
+ *
+ * Allocates a cpu info array and fills it with info for all cpus found in
+ * the device tree passed.  The cpu info array is zero terminated.
+ */
+
+int kexec_cpu_info_init(const struct device_node *dn,
+	struct kexec_dt_info *info)
+{
+	int result;
+	unsigned int cpu;
+	const struct device_node *i;
+
+	info->cpu_info = kmalloc(
+		(1 + info->cpu_count) * sizeof(struct kexec_cpu_info),
+		GFP_KERNEL);
+
+	if (!info->cpu_info) {
+		pr_debug("%s: out of memory", __func__);
+		return -ENOMEM;
+	}
+
+	info->spinner_count = 0;
+
+	for (cpu = 0, i = dn; cpu < info->cpu_count; cpu++) {
+		struct kexec_cpu_info *cpu_info = &info->cpu_info[cpu];
+
+		i = of_find_node_by_type((struct device_node *)i, "cpu");
+
+		BUG_ON(!i);
+
+		cpu_info->cpu = cpu;
+
+		result = cpu_read_ops((struct device_node *)i, cpu,
+			&cpu_info->cpu_ops);
+
+		if (result)
+			goto on_error;
+
+		cpu_info->spinner = !strcmp(cpu_info->cpu_ops->name,
+			"spin-table");
+
+		if (cpu_info->spinner) {
+			info->spinner_count++;
+
+			result = kexec_setup_cpu_spin(i, &cpu_info->spin);
+
+			if (result)
+				goto on_error;
+		}
+
+		if (cpu_info->spinner)
+			pr_devel("%s:%d: cpu-%u: '%s' release_addr: %pa\n",
+				__func__, __LINE__, cpu,
+				cpu_info->cpu_ops->name,
+				&cpu_info->spin.phy_release_addr);
+		else
+			pr_devel("%s:%d: cpu-%u: '%s'\n", __func__, __LINE__,
+				cpu, cpu_info->cpu_ops->name);
+	}
+
+	return 0;
+
+on_error:
+	kfree(info->cpu_info);
+	info->cpu_info = NULL;
+
+	return result;
+}
+
+/**
+ * kexec_dt_info_init - Initialize a kexec_dt_info structure from a dtb.
+ */
+
+int kexec_dt_info_init(void *dtb, struct kexec_dt_info *info)
+{
+	int result;
+	struct device_node *i;
+	struct device_node *dn;
+
+	if (!dtb) {
+		/* 1st stage. */
+		dn = NULL;
+	} else {
+		/* 2nd stage. */
+
+		of_fdt_unflatten_tree(dtb, &dn);
+
+		result = kexec_read_memreserve(dtb, info);
+
+		if (result)
+			return result;
+	}
+
+	/*
+	 * We may need to work with offline cpus to get them into the correct
+	 * state for a given enable method to work, and so need an info_array
+	 * that has info about all the platform cpus.
+	 */
+
+	for (info->cpu_count = 0, i = dn; (i = of_find_node_by_type(i, "cpu"));
+		info->cpu_count++)
+		(void)0;
+
+	pr_devel("%s:%d: cpu_count: %u\n", __func__, __LINE__, info->cpu_count);
+
+	if (!info->cpu_count) {
+		pr_err("kexec: Error: No cpu nodes found in device tree.\n");
+		return -EINVAL;
+	}
+
+	result = kexec_cpu_info_init(dn, info);
+
+	return result;
+}
+
+/**
+* kexec_spin_2 - The identity map spin loop.
+*/
+
+void kexec_spin_2(unsigned int cpu, phys_addr_t signal_1,
+	phys_addr_t phy_release_addr, phys_addr_t signal_2)
+{
+	typedef void (*fn_t)(phys_addr_t, phys_addr_t);
+
+	fn_t spin_3;
+
+	atomic_dec((atomic_t *)signal_1);
+
+	/* Wait for next signal. */
+
+	while (!atomic_read((atomic_t *)signal_2))
+		(void)0;
+
+	/* Enter the memreserve spin code. */
+
+	spin_3 = (fn_t)(ctx->dt_2nd.phy_memreserve_addr
+		+ kexec_spin_code_offset);
+
+	spin_3(phy_release_addr, signal_2);
+
+	BUG();
+}
+
+static atomic_t spin_1_signal = ATOMIC_INIT(0);
+static atomic_t spin_2_signal = ATOMIC_INIT(0);
+
+/**
+* kexec_spin_1 - The virtual address spin loop.
+*/
+
+static void kexec_spin_1(unsigned int cpu)
+{
+	typedef void (*fn_t)(unsigned int, phys_addr_t, phys_addr_t,
+		phys_addr_t);
+	fn_t fn;
+
+	pr_devel("%s:%d: id: %u\n", __func__, __LINE__, smp_processor_id());
+
+	/* Wait for the signal. */
+
+	while (!atomic_read(&spin_1_signal))
+		(void)0;
+
+	/* Enter the identity mapped spin code. */
+
+	setup_mm_for_reboot();
+
+	fn = (fn_t)virt_to_phys(kexec_spin_2);
+
+	fn(cpu, virt_to_phys(&spin_1_signal),
+		ctx->dt_2nd.cpu_info[cpu].spin.phy_release_addr,
+		virt_to_phys(&spin_2_signal));
+
+	BUG();
+}
+
+/**
+* kexec_restart - Called after the identity mapping is enabled.
+*/
+
+static void kexec_restart(void)
+{
+	unsigned long timeout = 1000;
+
+	atomic_set(&spin_1_signal, ctx->dt_1st.spinner_count - 1);
+
+	__flush_dcache_area(&spin_1_signal, sizeof(spin_1_signal));
+
+	while (timeout-- && atomic64_read(&spin_1_signal))
+		udelay(10);
+}
+
+/**
+* kexec_compat_check - Helper to check compatability of 2nd stage kernel.
+*/
+
+static int kexec_compat_check(const struct kexec_dt_info *dt1,
+	const struct kexec_dt_info *dt2)
+{
+	int result = 0;
+
+	/* No checks needed for psci to psci. */
+
+	if (!dt1->spinner_count && !dt2->spinner_count)
+		goto done;
+
+	/* Check for a cpu count mismatch. */
+
+	if (dt1->cpu_count != dt2->cpu_count) {
+		pr_err("kexec: Error: CPU count mismatch %u -> %u.\n",
+			dt1->cpu_count, dt2->cpu_count);
+		result++;
+	}
+
+	/* Check for an enable method mismatch. */
+
+	if (dt1->spinner_count != dt2->spinner_count) {
+		pr_err("kexec: Error: Enable method mismatch %s -> %s.\n",
+			dt1->cpu_info[0].cpu_ops->name,
+			dt2->cpu_info[0].cpu_ops->name);
+		result++;
+	}
+
+	/* Check for mixed enable methods. */
+
+	if (dt1->spinner_count && (dt1->cpu_count != dt1->spinner_count)) {
+		pr_err("kexec: Error: Mixed enable methods in 1st stage.\n");
+		result++;
+	}
+
+	if (dt2->spinner_count && (dt2->cpu_count != dt2->spinner_count)) {
+		pr_err("kexec: Error: Mixed enable methods in 2nd stage.\n");
+		result++;
+	}
+
+	/* Check for cpus still spinning in secondary_holding_pen. */
+
+	if (NR_CPUS < dt1->spinner_count) {
+		pr_err("kexec: Error: NR_CPUS too small for spin enable %u < %u.\n",
+			NR_CPUS, dt1->spinner_count + 1);
+		result++;
+	}
+
+done:
+	return result ? -EINVAL : 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+	if (ctx) {
+		kfree(ctx->dt_1st.cpu_info);
+		ctx->dt_1st.cpu_info = NULL;
+
+		kfree(ctx->dt_2nd.cpu_info);
+		ctx->dt_2nd.cpu_info = NULL;
+	}
+
+	kfree(ctx);
+	ctx = NULL;
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+}
+
+/**
+ * machine_kexec_prepare - Prepare for a kexec reboot.
+ *
+ * Called from the core kexec code when a kernel image is loaded.
+ */
+
+int machine_kexec_prepare(struct kimage *image)
+{
+	int result;
+	const struct kexec_segment *seg;
+	void *dtb;
+
+	machine_kexec_cleanup(NULL);
+
+	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+
+	if (!ctx) {
+		pr_debug("%s: out of memory", __func__);
+		return -ENOMEM;
+	}
+
+	seg = kexec_find_dtb_seg(image);
+	BUG_ON(!seg);
+
+	dtb = kexec_copy_dtb(seg);
+	BUG_ON(!dtb);
+	BUG_ON(!kexec_is_dtb(*(const __be32 *)dtb));
+
+	result = kexec_dt_info_init(NULL, &ctx->dt_1st);
+
+	if (result)
+		goto on_error;
+
+	result = kexec_dt_info_init(dtb, &ctx->dt_2nd);
+
+	if (result)
+		goto on_error;
+
+	if (ctx->dt_2nd.spinner_count) {
+		BUG_ON(!ctx->dt_2nd.phy_memreserve_addr);
+		BUG_ON(kexec_cpu_spin_size >= ctx->dt_2nd.memreserve_size
+			- kexec_spin_code_offset);
+	}
+
+	result = kexec_compat_check(&ctx->dt_1st, &ctx->dt_2nd);
+
+	if (result)
+		goto on_error;
+
+	kexec_dtb_addr = seg->mem;
+	kexec_kimage_start = image->start;
+	kexec_spinner_count = ctx->dt_1st.spinner_count - 1;
+
+	smp_spin_table_set_die(kexec_spin_1);
+
+	goto on_exit;
+
+on_error:
+	machine_kexec_cleanup(NULL);
+on_exit:
+	kfree(dtb);
+
+	return result;
+}
+
+/**
+ * machine_kexec - Do the kexec reboot.
+ *
+ * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
+ */
+
+void machine_kexec(struct kimage *image)
+{
+	phys_addr_t reboot_code_buffer_phys;
+	void *reboot_code_buffer;
+	unsigned int cpu;
+
+	BUG_ON(relocate_new_kernel_size > KEXEC_CONTROL_PAGE_SIZE);
+	BUG_ON(num_online_cpus() > 1);
+
+	pr_devel("%s:%d: id: %u\n", __func__, __LINE__, smp_processor_id());
+
+	kexec_kimage_head = image->head;
+	kexec_signal_addr = virt_to_phys(&spin_2_signal);
+
+	reboot_code_buffer_phys = page_to_phys(image->control_code_page);
+	reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
+
+	if (ctx->dt_2nd.spinner_count) {
+		void *va;
+
+		/*
+		* Copy the spin code to the 2nd stage memreserve area as
+		* dictated by the arm64 boot specification.
+		*/
+
+		va = phys_to_virt(ctx->dt_2nd.phy_memreserve_addr
+			+ kexec_spin_code_offset);
+
+		memcpy(va, kexec_cpu_spin, kexec_cpu_spin_size);
+
+		flush_icache_range((unsigned long)va,
+			(unsigned long)va + kexec_cpu_spin_size);
+
+		/*
+		 * Zero the release address for all the 2nd stage cpus.
+		 */
+
+		for (cpu = 0; cpu < ctx->dt_2nd.cpu_count; cpu++) {
+			u64 *release_addr;
+
+			if (!ctx->dt_2nd.cpu_info[cpu].spinner)
+				continue;
+
+			release_addr = phys_to_virt(
+				ctx->dt_2nd.cpu_info[cpu].spin.phy_release_addr);
+
+			*release_addr = 0;
+
+			__flush_dcache_area(release_addr, sizeof(u64));
+		}
+	}
+
+	/*
+	 * Copy relocate_new_kernel to the reboot_code_buffer for use
+	 * after the kernel is shut down.
+	 */
+
+	memcpy(reboot_code_buffer, relocate_new_kernel,
+		relocate_new_kernel_size);
+
+	flush_icache_range((unsigned long)reboot_code_buffer,
+		(unsigned long)reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
+
+	/* TODO: Adjust any mismatch in cpu enable methods. */
+
+	pr_info("Bye!\n");
+
+	if (kexec_reinit)
+		kexec_reinit();
+
+	local_irq_disable();
+	local_fiq_disable();
+
+	setup_restart();
+	kexec_restart();
+	soft_restart(reboot_code_buffer_phys);
+}
diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S
new file mode 100644
index 0000000..15a49d6
--- /dev/null
+++ b/arch/arm64/kernel/relocate_kernel.S
@@ -0,0 +1,239 @@
+/*
+ * kexec for arm64
+ */
+
+#include <asm/memory.h>
+#include <asm/page.h>
+
+/*
+ * kexec_cpu_spin - Spin the CPU as described in the arm64/booting.txt document.
+ *
+ * Prototype: void kexec_cpu_spin(phys_addr_t release_addr, phys_addr_t signal);
+ *
+ * The caller must initialize release_addr to zero or a valid address
+ * prior to calling kexec_cpu_spin.  Note that if the MMU will be turned on
+ * or off while the CPU is spinning here this code must be in an identity
+ * mapped page.  The value written to release_addr must be in little endian
+ * order.
+ */
+
+.align 3
+
+.globl kexec_cpu_spin
+kexec_cpu_spin:
+
+	/* Signal that this cpu has entered. */
+1:
+	ldxr    x2, [x1]
+	sub     x2, x2, 1
+	stxr    w3, x2, [x1]
+	cbnz    w3, 1b
+
+
+	/* Spin while release_addr is zero. */
+1:
+	wfe
+	ldr	x4, [x0]
+	cbz	x4, 1b
+
+	/* Convert LE to CPU. */
+
+#if defined(__AARCH64EB__)
+	rev	x4, x4
+#endif
+
+	/* Jump to new kernel. */
+
+	mov	x0, xzr
+	mov	x1, xzr
+	mov	x2, xzr
+	mov	x3, xzr
+
+	br	x4
+
+.align 3
+
+.kexec_cpu_spin_end:
+
+/*
+ * kexec_cpu_spin_size - Byte count for copy operations.
+ */
+
+.globl kexec_cpu_spin_size
+kexec_cpu_spin_size:
+	.quad .kexec_cpu_spin_end - kexec_cpu_spin
+
+
+/*
+ * relocate_new_kernel - Put the 2nd stage kernel image in place and boot it.
+ *
+ * The memory that the old kernel occupies may be overwritten when coping the
+ * new kernel to its final location.  To assure that the relocate_new_kernel
+ * routine which does that copy is not overwritten, all code and data needed
+ * by relocate_new_kernel must be between the symbols relocate_new_kernel and
+ * relocate_new_kernel_end.  The machine_kexec() routine will copy
+ * relocate_new_kernel to the kexec control_code_page, a special page which
+ * has been set up to be preserved during the kernel copy operation.
+ */
+
+/* These definitions correspond to the kimage_entry flags in linux/kexec.h */
+
+#define IND_DESTINATION_BIT 0
+#define IND_INDIRECTION_BIT 1
+#define IND_DONE_BIT        2
+#define IND_SOURCE_BIT      3
+
+.align 3
+
+.globl relocate_new_kernel
+relocate_new_kernel:
+
+	/* Signal secondary cpus to enter the memreserve spin code. */
+
+	ldr	x1, kexec_signal_addr
+	ldr	x2, kexec_spinner_count
+	str	x2, [x1]
+
+	/* Wait for all secondary cpus to enter. */
+1:
+	ldr	x2, [x1]
+	cbnz	x2, 1b
+
+	/* Copy the new kernel image. */
+
+	ldr	x10, kexec_kimage_head		/* x10 = entry */
+
+	/* Check if the new kernel needs relocation. */
+
+	cbz	x10, .done
+	tbnz	x10, IND_DONE_BIT, .done
+
+	/* Setup loop variables. */
+
+	mov	x12, xzr			/* x12 = ptr */
+	mov	x13, xzr			/* x13 = dest */
+
+.loop:
+	/* addr = entry & PAGE_MASK */
+
+	and	x14, x10, PAGE_MASK		/* x14 = addr */
+
+	/* switch (entry & IND_FLAGS) */
+
+.case_source:
+	tbz	x10, IND_SOURCE_BIT, .case_indirection
+
+	/* copy_page(x20 = dest, x21 = addr) */
+
+	mov x20, x13
+	mov x21, x14
+
+	prfm	pldl1strm, [x21, #64]
+1:	ldp	x22, x23, [x21]
+	ldp	x24, x25, [x21, #16]
+	ldp	x26, x27, [x21, #32]
+	ldp	x28, x29, [x21, #48]
+	add	x21, x21, #64
+	prfm	pldl1strm, [x21, #64]
+	stnp	x22, x23, [x20]
+	stnp	x24, x25, [x20, #16]
+	stnp	x26, x27, [x20, #32]
+	stnp	x28, x29, [x20, #48]
+	add	x20, x20, #64
+	tst	x21, #(PAGE_SIZE - 1)
+	b.ne	1b
+
+	/* dest += PAGE_SIZE */
+
+	add	x13, x13, PAGE_SIZE
+	b	.next_entry
+
+.case_indirection:
+	tbz	x10, IND_INDIRECTION_BIT, .case_destination
+
+	/* ptr = addr */
+
+	mov	x12, x14
+	b	.next_entry
+
+.case_destination:
+	tbz	x10, IND_DESTINATION_BIT, .next_entry
+
+	/* dest = addr */
+
+	mov	x13, x14
+
+.next_entry:
+	/* entry = *ptr++ */
+
+	ldr	x10, [x12]
+	add	x12, x12, 8
+
+	/* while (!(entry & IND_DONE)) */
+
+	tbz	x10, IND_DONE_BIT, .loop
+
+.done:
+	/* Jump to new kernel. */
+
+	ldr	x0, kexec_dtb_addr
+	mov	x1, xzr
+	mov	x2, xzr
+	mov	x3, xzr
+
+	ldr	x4, kexec_kimage_start
+	br	x4
+
+.align 3
+
+/* The machine_kexec routines set these variables. */
+
+/*
+ * kexec_signal_addr - Physical address of the spin signal variable.
+ */
+
+.globl kexec_signal_addr
+kexec_signal_addr:
+	.quad	0x0
+
+/*
+ * kexec_spinner_count - Count of spinning cpus.
+ */
+
+.globl kexec_spinner_count
+kexec_spinner_count:
+	.quad	0x0
+
+/*
+ * kexec_dtb_addr - The address of the new kernel's device tree.
+ */
+
+.globl kexec_dtb_addr
+kexec_dtb_addr:
+	.quad	0x0
+
+/*
+ * kexec_kimage_head - Copy of image->head, the list of kimage entries.
+ */
+
+.globl kexec_kimage_head
+kexec_kimage_head:
+	.quad	0x0
+
+/*
+ * kexec_kimage_start - Copy of image->start, the entry point of the new kernel.
+ */
+
+.globl kexec_kimage_start
+kexec_kimage_start:
+	.quad	0x0
+
+.relocate_new_kernel_end:
+
+/*
+ * relocate_new_kernel_size - Byte count to copy to kimage control_code_page.
+ */
+
+.globl relocate_new_kernel_size
+relocate_new_kernel_size:
+	.quad .relocate_new_kernel_end - relocate_new_kernel
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index d6629d4..b0bc56d 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -28,6 +28,7 @@
 #define KEXEC_ARCH_SH      (42 << 16)
 #define KEXEC_ARCH_MIPS_LE (10 << 16)
 #define KEXEC_ARCH_MIPS    ( 8 << 16)
+#define KEXEC_ARCH_ARM64   (183 << 16)
 
 /* The artificial cap on the number of segments passed to kexec_load. */
 #define KEXEC_SEGMENT_MAX 16
-- 
1.9.1





More information about the kexec mailing list