[PATCH v10 5/5] powerpc/kexec: add crash memory hotplug support
Sourabh Jain
sourabhjain at linux.ibm.com
Sun Apr 23 03:52:13 PDT 2023
Extend PowerPC arch crash hotplug handler to support memory hotplug
events. Since elfcorehdr is used to exchange the memory info between the
kernels hence it needs to be recreated to reflect the changes due to
memory hotplug events.
The way memory hotplug events are handled on PowerPC and the notifier
call chain used in generic code to trigger the arch crash handler, the
process to recreate the elfcorehdr is different for memory add and
remove case.
For memory remove case the memory change notifier call chain is
triggered first and then memblock regions is updated. Whereas for the
memory hot add case, memblock regions are updated before invoking the
memory change notifier call chain.
On PowerPC, memblock regions list is used to prepare the elfcorehdr. In
case of memory hot remove the memblock regions are updated after the
arch crash hotplug handler is triggered, hence an additional step is
taken to ensure that memory ranges used to prepare elfcorehdr do not
include hot removed memory.
When memory is hot removed it possible that memory regions count may
increase. So to accommodate a growing number of memory regions, the
elfcorehdr kexec segment is built with additional buffer space.
The changes done here will also work for the kexec_load system call given
that the kexec tool builds the elfcoredhr with additional space to
accommodate future memory regions as it is done for kexec_file_load
system call in the kernel.
Signed-off-by: Sourabh Jain <sourabhjain at linux.ibm.com>
Reviewed-by: Laurent Dufour <laurent.dufour at fr.ibm.com>
---
arch/powerpc/include/asm/kexec_ranges.h | 1 +
arch/powerpc/kexec/core_64.c | 77 +++++++++++++++++++++-
arch/powerpc/kexec/file_load_64.c | 36 ++++++++++-
arch/powerpc/kexec/ranges.c | 85 +++++++++++++++++++++++++
4 files changed, 195 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h
index f83866a19e870..802abf580cf0f 100644
--- a/arch/powerpc/include/asm/kexec_ranges.h
+++ b/arch/powerpc/include/asm/kexec_ranges.h
@@ -7,6 +7,7 @@
void sort_memory_ranges(struct crash_mem *mrngs, bool merge);
struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges);
int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
+int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
int add_tce_mem_ranges(struct crash_mem **mem_ranges);
int add_initrd_mem_range(struct crash_mem **mem_ranges);
#ifdef CONFIG_PPC_64S_HASH_MMU
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index 147ea6288a526..01a764b1c9b07 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -19,6 +19,7 @@
#include <linux/of.h>
#include <linux/libfdt.h>
#include <linux/memblock.h>
+#include <linux/memory.h>
#include <asm/page.h>
#include <asm/current.h>
@@ -547,6 +548,76 @@ int update_cpus_node(void *fdt)
#undef pr_fmt
#define pr_fmt(fmt) "crash hp: " fmt
+/**
+ * update_crash_elfcorehdr() - Recreate the elfcorehdr and replace it with old
+ * elfcorehdr in the kexec segment array.
+ * @image: the active struct kimage
+ * @arg: struct memory_notify data handler
+ */
+static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify *mn)
+{
+ int ret;
+ struct crash_mem *cmem = NULL;
+ struct kexec_segment *ksegment;
+ void *ptr, *mem, *elfbuf = NULL;
+ unsigned long elfsz, memsz, base_addr, size;
+
+ ksegment = &image->segment[image->elfcorehdr_index];
+ mem = (void *) ksegment->mem;
+ memsz = ksegment->memsz;
+
+ ret = get_crash_memory_ranges(&cmem);
+ if (ret) {
+ pr_err("Failed to get crash mem range\n");
+ return;
+ }
+
+ /*
+ * The hot unplugged memory is not yet removed from crash memory
+ * ranges, remove it here.
+ */
+ if (image->hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY) {
+ base_addr = PFN_PHYS(mn->start_pfn);
+ size = mn->nr_pages * PAGE_SIZE;
+ ret = remove_mem_range(&cmem, base_addr, size);
+ if (ret) {
+ pr_err("Failed to remove hot-unplugged from crash memory ranges.\n");
+ return;
+ }
+ }
+
+ ret = crash_prepare_elf64_headers(cmem, false, &elfbuf, &elfsz);
+ if (ret) {
+ pr_err("Failed to prepare elf header\n");
+ return;
+ }
+
+ /*
+ * It is unlikely that kernel hit this because elfcorehdr kexec
+ * segment (memsz) is built with addition space to accommodate growing
+ * number of crash memory ranges while loading the kdump kernel. It is
+ * Just to avoid any unforeseen case.
+ */
+ if (elfsz > memsz) {
+ pr_err("Updated crash elfcorehdr elfsz %lu > memsz %lu", elfsz, memsz);
+ goto out;
+ }
+
+ ptr = __va(mem);
+ if (ptr) {
+ /* Temporarily invalidate the crash image while it is replaced */
+ xchg(&kexec_crash_image, NULL);
+
+ /* Replace the old elfcorehdr with newly prepared elfcorehdr */
+ memcpy((void *)ptr, elfbuf, elfsz);
+
+ /* The crash image is now valid once again */
+ xchg(&kexec_crash_image, image);
+ }
+out:
+ vfree(elfbuf);
+}
+
/**
* arch_crash_hotplug_handler() - Handle crash CPU/Memory hotplug events to update the
* necessary kexec segments based on the hotplug event.
@@ -554,12 +625,14 @@ int update_cpus_node(void *fdt)
* @arg: struct memory_notify handler for memory add/remove case and NULL for CPU case.
*
* Update FDT segment to include newly added CPU. No action for CPU remove case.
+ * Recreate the elfcorehdr for Memory add/remove case and replace it with old one.
*/
void arch_crash_handle_hotplug_event(struct kimage *image, void *arg)
{
void *fdt, *ptr;
unsigned long mem;
int i, fdt_index = -1;
+ struct memory_notify *mn;
unsigned int hp_action = image->hp_action;
/*
@@ -569,9 +642,9 @@ void arch_crash_handle_hotplug_event(struct kimage *image, void *arg)
if (hp_action == KEXEC_CRASH_HP_REMOVE_CPU)
return;
- /* crash update on memory hotplug events is not supported yet */
if (hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY || hp_action == KEXEC_CRASH_HP_ADD_MEMORY) {
- pr_info_once("Crash update is not supported for memory hotplug\n");
+ mn = (struct memory_notify *) arg;
+ update_crash_elfcorehdr(image, mn);
return;
}
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index 3554168687869..db7ba1c8c5e0b 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -21,6 +21,8 @@
#include <linux/memblock.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/elf.h>
+
#include <asm/setup.h>
#include <asm/drmem.h>
#include <asm/firmware.h>
@@ -707,6 +709,30 @@ static void update_backup_region_phdr(struct kimage *image, Elf64_Ehdr *ehdr)
}
}
+/* get_max_phdr - Find the total number of Phdr needed to represent the
+ * max memory in the kdump elfcorehdr.
+ *
+ * @cmem: crash memory ranges in the system.
+ */
+static int get_max_phdr(struct crash_mem *cmem)
+{
+ int max_lmb;
+
+ /* In the worst case, a Phdr is needed for every other LMB to be represented
+ * as an individual crash range.
+ */
+ max_lmb = memory_hotplug_max() / 2 * drmem_lmb_size();
+
+ /* Do not cross the Phdr max limit of the elf header.
+ * Avoid counting Phdr for crash ranges (cmem->nr_ranges) which
+ * are already part of elfcorehdr.
+ */
+ if (max_lmb > PN_XNUM)
+ return PN_XNUM - cmem->nr_ranges;
+
+ return max_lmb - cmem->nr_ranges;
+}
+
/**
* load_elfcorehdr_segment - Setup crash memory ranges and initialize elfcorehdr
* segment needed to load kdump kernel.
@@ -738,7 +764,13 @@ static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf)
kbuf->buffer = headers;
kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
- kbuf->bufsz = kbuf->memsz = headers_sz;
+ kbuf->bufsz = headers_sz;
+/* Additional buffer space to accommodate future memory ranges */
+#if defined(CONFIG_MEMORY_HOTPLUG)
+ kbuf->memsz = headers_sz + get_max_phdr(cmem) * sizeof(Elf64_Phdr);
+#else
+ kbuf->memsz = headers_sz;
+#endif
kbuf->top_down = false;
ret = kexec_add_buffer(kbuf);
@@ -748,7 +780,7 @@ static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf)
}
image->elf_load_addr = kbuf->mem;
- image->elf_headers_sz = headers_sz;
+ image->elf_headers_sz = kbuf->memsz;
image->elf_headers = headers;
out:
kfree(cmem);
diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c
index 5fc53a5fcfdf6..d8007363cdc11 100644
--- a/arch/powerpc/kexec/ranges.c
+++ b/arch/powerpc/kexec/ranges.c
@@ -234,6 +234,91 @@ int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
return __add_mem_range(mem_ranges, base, size);
}
+/**
+ * remove_mem_range - Removes the given memory range from the range list.
+ * @mem_ranges: Range list to remove the memory range to.
+ * @base: Base address of the range to remove.
+ * @size: Size of the memory range to remove.
+ *
+ * (Re)allocates memory, if needed.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
+{
+ u64 end;
+ int ret = 0;
+ unsigned int i;
+ u64 mstart, mend;
+ struct crash_mem *mem_rngs = *mem_ranges;
+
+ if (!size)
+ return 0;
+
+ /*
+ * Memory range are stored as start and end address, use
+ * the same format to do remove operation.
+ */
+ end = base + size - 1;
+
+ for (i = 0; i < mem_rngs->nr_ranges; i++) {
+ mstart = mem_rngs->ranges[i].start;
+ mend = mem_rngs->ranges[i].end;
+
+ /*
+ * Memory range to remove is not part of this range entry
+ * in the memory range list
+ */
+ if (!(base >= mstart && end <= mend))
+ continue;
+
+ /*
+ * Memory range to remove is equivalent to this entry in the
+ * memory range list. Remove the range entry from the list.
+ */
+ if (base == mstart && end == mend) {
+ for (; i < mem_rngs->nr_ranges - 1; i++) {
+ mem_rngs->ranges[i].start = mem_rngs->ranges[i+1].start;
+ mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end;
+ }
+ mem_rngs->nr_ranges--;
+ goto out;
+ }
+ /*
+ * Start address of the memory range to remove and the
+ * current memory range entry in the list is same. Just
+ * move the start address of the current memory range
+ * entry in the list to end + 1.
+ */
+ else if (base == mstart) {
+ mem_rngs->ranges[i].start = end + 1;
+ goto out;
+ }
+ /*
+ * End address of the memory range to remove and the
+ * current memory range entry in the list is same.
+ * Just move the end address of the current memory
+ * range entry in the list to base - 1.
+ */
+ else if (end == mend) {
+ mem_rngs->ranges[i].end = base - 1;
+ goto out;
+ }
+ /*
+ * Memory range to remove is not at the edge of current
+ * memory range entry. Split the current memory entry into
+ * two half.
+ */
+ else {
+ mem_rngs->ranges[i].end = base - 1;
+ size = mem_rngs->ranges[i].end - end;
+ ret = add_mem_range(mem_ranges, end + 1, size);
+ }
+ }
+out:
+ return ret;
+}
+
/**
* add_tce_mem_ranges - Adds tce-table range to the given memory ranges list.
* @mem_ranges: Range list to add the memory range(s) to.
--
2.39.2
More information about the kexec
mailing list