[PATCH v5 2/4] crash: Exclude crash kernel memory in crash core
Mike Rapoport
rppt at kernel.org
Thu Feb 12 10:58:21 PST 2026
Hi,
On Thu, Feb 12, 2026 at 06:09:59PM +0800, Jinjie Ruan wrote:
> The exclude of crashk_res, crashk_low_res and crashk_cma memory
> are almost identical across different architectures, handling them
> in the crash core would eliminate a lot of duplication, so do
> them in the common code.
>
> And move the size calculation (and the realloc if needed) into the
> generic crash core so that:
>
> - New CMA regions or future crash-memory types can automatically
> accounted for in crash core;
>
> - Each architecture no longer has to play whack-a-mole with
> its private array size.
>
> To achieve the above goal, 4 architecture-specific functions are
> introduced:
>
> - arch_get_system_nr_ranges() and arch_prepare_elf64_ram_headers().
> The 1st function pre-counts the number of memory ranges, and
> the 2st function fill the memory ranges into the cmem->ranges[] array,
> and count the actual number of ranges filled.
The names should reflect that these function deal with crash memory ranges.
> - arch_crash_exclude_mem_range(). Realloc for powerpc. The default
> implementation is crash_exclude_mem_range(), and use
> crash_exclude_mem_range_guarded() to implement the arch version
> for powerpc.
>
> - arch_get_crash_memory_ranges(). Get crash memory ranges for arch and
> the default implementation is generic across x86, arm64, riscv, and
> loongson by using the first two arch functions above. powerpc has its
> own implementation by calling get_crash_memory_ranges().
Hmm, powerpc seems too different from the rest, maybe we shouldn't try to
squeeze it in?
> Tested on x86, arm64 and riscv with QEMU.
>
> Signed-off-by: Jinjie Ruan <ruanjinjie at huawei.com>
> ---
> arch/arm64/include/asm/kexec.h | 9 +-
> arch/arm64/kernel/machine_kexec_file.c | 41 +++-----
> arch/loongarch/include/asm/kexec.h | 9 +-
> arch/loongarch/kernel/machine_kexec_file.c | 41 +++-----
> arch/powerpc/include/asm/kexec.h | 13 +++
> arch/powerpc/include/asm/kexec_ranges.h | 3 -
> arch/powerpc/kexec/crash.c | 68 ++++++++------
> arch/powerpc/kexec/file_load_64.c | 17 ++--
> arch/powerpc/kexec/ranges.c | 18 +---
> arch/riscv/include/asm/kexec.h | 9 +-
> arch/riscv/kernel/machine_kexec_file.c | 37 +++-----
> arch/x86/include/asm/kexec.h | 9 ++
> arch/x86/kernel/crash.c | 104 +++------------------
> include/linux/crash_core.h | 75 +++++++++++++--
> kernel/crash_core.c | 85 +++++++++++++++--
> 15 files changed, 289 insertions(+), 249 deletions(-)
TBH, I'd expect this to produce negative diffstat :/
> diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
> index 892e5bebda95..67f790e3ba14 100644
> --- a/arch/arm64/include/asm/kexec.h
> +++ b/arch/arm64/include/asm/kexec.h
> @@ -119,6 +119,7 @@ struct kimage_arch {
> };
>
> #ifdef CONFIG_KEXEC_FILE
> +struct crash_mem;
> extern const struct kexec_file_ops kexec_image_ops;
>
> int arch_kimage_file_post_load_cleanup(struct kimage *image);
> @@ -128,7 +129,13 @@ extern int load_other_segments(struct kimage *image,
> unsigned long kernel_load_addr, unsigned long kernel_size,
> char *initrd, unsigned long initrd_len,
> char *cmdline);
> -#endif
> +
> +int arch_get_system_nr_ranges(unsigned int *nr_ranges);
> +#define arch_get_system_nr_ranges arch_get_system_nr_ranges
> +
> +int arch_prepare_elf64_ram_headers(struct crash_mem *cmem);
> +#define arch_prepare_elf64_ram_headers arch_prepare_elf64_ram_headers
I think a better practice would be to declare all functions that an
architecture may override in include/linux/crash_core.h and provide a
default __weak implementation in kernel/crash_core.c.
> +#endif /* CONFIG_KEXEC_FILE */
>
> #endif /* __ASSEMBLER__ */
>
> diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
> index 410060ebd86d..506a165117b1 100644
> --- a/arch/arm64/kernel/machine_kexec_file.c
> +++ b/arch/arm64/kernel/machine_kexec_file.c
> @@ -40,23 +40,22 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
> }
>
> #ifdef CONFIG_CRASH_DUMP
> -static int prepare_elf_headers(void **addr, unsigned long *sz)
> +int arch_get_system_nr_ranges(unsigned int *nr_ranges)
> {
> - struct crash_mem *cmem;
> - unsigned int nr_ranges;
> - int ret;
> - u64 i;
> phys_addr_t start, end;
> + u64 i;
>
> - nr_ranges = 2; /* for exclusion of crashkernel region */
> for_each_mem_range(i, &start, &end)
> - nr_ranges++;
> + (*nr_ranges)++;
> +
Won't be simpler to make it
unsigned int arch_get_system_nr_ranges(void)
count the ranges and return the result?
> + return 0;
> +}
>
> - cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
> - if (!cmem)
> - return -ENOMEM;
> +int arch_prepare_elf64_ram_headers(struct crash_mem *cmem)
> +{
It seems that this function collects the memory ranges and fills them into
cmem rather than prepares elf headers.
> + phys_addr_t start, end;
> + u64 i;
>
> - cmem->max_nr_ranges = nr_ranges;
> cmem->nr_ranges = 0;
> for_each_mem_range(i, &start, &end) {
> cmem->ranges[cmem->nr_ranges].start = start;
> @@ -64,22 +63,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
> cmem->nr_ranges++;
> }
>
> - /* Exclude crashkernel region */
> - ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
> - if (ret)
> - goto out;
> -
> - if (crashk_low_res.end) {
> - ret = crash_exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
> - if (ret)
> - goto out;
> - }
> -
> - ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
> -
> -out:
> - kfree(cmem);
> - return ret;
> + return 0;
> }
> #endif
>
> @@ -109,7 +93,8 @@ int load_other_segments(struct kimage *image,
> void *headers;
> unsigned long headers_sz;
> if (image->type == KEXEC_TYPE_CRASH) {
> - ret = prepare_elf_headers(&headers, &headers_sz);
> + ret = crash_prepare_elf64_headers(true, &headers, &headers_sz,
> + NULL, NULL, NULL);
> if (ret) {
> pr_err("Preparing elf core header failed\n");
> goto out_err;
Same comments as for arm64 apply for other architectures as well.
> diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
> index d35726d6a415..3105d28fd0c6 100644
> --- a/include/linux/crash_core.h
> +++ b/include/linux/crash_core.h
> @@ -2,11 +2,14 @@
> #ifndef LINUX_CRASH_CORE_H
> #define LINUX_CRASH_CORE_H
>
> -#include <linux/linkage.h>
> #include <linux/elfcore.h>
> #include <linux/elf.h>
> +#include <linux/kexec.h>
> +#include <linux/linkage.h>
> +#include <linux/vmalloc.h>
>
> struct kimage;
> +struct memory_notify;
>
> struct crash_mem {
> unsigned int max_nr_ranges;
> @@ -54,6 +57,66 @@ static inline int arch_crash_hotplug_support(struct kimage *image, unsigned long
> }
> #endif
>
> +extern int crash_exclude_mem_range(struct crash_mem *mem,
> + unsigned long long mstart,
> + unsigned long long mend);
> +
> +#ifndef arch_crash_exclude_mem_range
> +static __always_inline int arch_crash_exclude_mem_range(struct crash_mem **mem_ranges,
> + unsigned long long mstart,
> + unsigned long long mend)
> +{
> + return crash_exclude_mem_range(*mem_ranges, mstart, mend);
> +}
> +#endif
> +
> +#ifndef arch_get_system_nr_ranges
> +static inline int arch_get_system_nr_ranges(unsigned int *nr_ranges)
> +{
> + return -EINVAL;
> +}
> +#endif
> +
> +#ifndef arch_prepare_elf64_ram_headers
> +static inline int arch_prepare_elf64_ram_headers(struct crash_mem *cmem)
> +{
> + return -EINVAL;
> +}
> +#endif
> +
> +#ifndef arch_get_crash_memory_ranges
> +static inline int arch_get_crash_memory_ranges(struct crash_mem **cmem,
> + unsigned long *nr_mem_ranges,
> + struct kimage *image,
> + struct memory_notify *mn)
> +{
> + unsigned int nr_ranges;
> + int ret;
> +
> + /*
> + * Exclusion of crash region, crashk_low_res and/or crashk_cma_ranges
> + * may cause range splits. So add extra slots here.
> + */
> + nr_ranges = 1 + (crashk_low_res.end != 0) + crashk_cma_cnt;
> + ret = arch_get_system_nr_ranges(&nr_ranges);
> + if (ret)
> + return ret;
> +
> + *cmem = kvzalloc(struct_size(*cmem, ranges, nr_ranges), GFP_KERNEL);
> + if (!(*cmem))
> + return -ENOMEM;
> +
> + (*cmem)->max_nr_ranges = nr_ranges;
> + ret = arch_prepare_elf64_ram_headers(*cmem);
> + if (ret) {
> + kvfree(*cmem);
> + return ret;
> + }
> +
> + return 0;
> +}
This function is quite large for an inline, should be in
kernel/crash_core.c IMHO.
> +#endif
> +
> #ifndef crash_get_elfcorehdr_size
> static inline unsigned int crash_get_elfcorehdr_size(void) { return 0; }
> #endif
> @@ -61,11 +124,11 @@ static inline unsigned int crash_get_elfcorehdr_size(void) { return 0; }
> /* Alignment required for elf header segment */
> #define ELF_CORE_HEADER_ALIGN 4096
>
> -extern int crash_exclude_mem_range(struct crash_mem *mem,
> - unsigned long long mstart,
> - unsigned long long mend);
> -extern int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map,
> - void **addr, unsigned long *sz);
> +extern int crash_prepare_elf64_headers(int need_kernel_map,
> + void **addr, unsigned long *sz,
> + unsigned long *nr_mem_ranges,
> + struct kimage *image,
> + struct memory_notify *mn);
>
> struct kimage;
> struct kexec_segment;
> diff --git a/kernel/crash_core.c b/kernel/crash_core.c
> index 99dac1aa972a..99a0d6abf88e 100644
> --- a/kernel/crash_core.c
> +++ b/kernel/crash_core.c
> @@ -18,6 +18,7 @@
> #include <linux/memblock.h>
> #include <linux/kmemleak.h>
> #include <linux/crash_core.h>
> +#include <linux/crash_reserve.h>
> #include <linux/reboot.h>
> #include <linux/btf.h>
> #include <linux/objtool.h>
> @@ -161,19 +162,80 @@ static inline resource_size_t crash_resource_size(const struct resource *res)
> return !res->end ? 0 : resource_size(res);
> }
>
> +static int crash_exclude_mem_ranges(struct crash_mem *cmem,
> + unsigned long *nr_mem_ranges)
> +{
> + int ret, i;
> +
> +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_32)
> + /*
> + * Exclusion of low 1M may not cause another range split, because the
> + * range of exclude is [0, 1M] and the condition for splitting a new
> + * region is that the start, end parameters are both in a certain
> + * existing region in cmem and cannot be equal to existing region's
> + * start or end. Obviously, the start of [0, 1M] cannot meet this
> + * condition.
> + *
> + * But in order to lest the low 1M could be changed in the future,
> + * (e.g. [start, 1M]), add a extra slot.
> + */
> + cmem->max_nr_ranges++;
>
> + /* Exclude the low 1M because it is always reserved */
> + ret = arch_crash_exclude_mem_range(&cmem, 0, SZ_1M - 1);
> + if (ret)
> + return ret;
> +#endif
This should remain in x86.
>
> + /* Exclude crashkernel region */
> + ret = arch_crash_exclude_mem_range(&cmem, crashk_res.start, crashk_res.end);
> + if (ret)
> + return ret;
>
> -int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map,
> - void **addr, unsigned long *sz)
> + if (crashk_low_res.end) {
> + ret = arch_crash_exclude_mem_range(&cmem, crashk_low_res.start, crashk_low_res.end);
> + if (ret)
> + return ret;
> + }
> +
> + for (i = 0; i < crashk_cma_cnt; ++i) {
> + ret = arch_crash_exclude_mem_range(&cmem, crashk_cma_ranges[i].start,
> + crashk_cma_ranges[i].end);
> + if (ret)
> + return ret;
> + }
> +
> + /* Return the computed number of memory ranges, for hotplug usage */
> + if (nr_mem_ranges)
> + *nr_mem_ranges = cmem->nr_ranges;
> +
> + return 0;
> +}
> +
> +int crash_prepare_elf64_headers(int need_kernel_map, void **addr,
> + unsigned long *sz, unsigned long *nr_mem_ranges,
> + struct kimage *image, struct memory_notify *mn)
Hmm, we are adding image and mn parameters only for powerpc and we already
have arch_crash_exclude_mem_range() and arch_get_crash_memory_ranges() to
accommodate powerpc differences.
I'd suggest to take a slightly different approach. I'm thinking that we can
add crash_prepare_elf_headers() that will be similar to current
x86/arm64/loongarch prepare_elf_headers(), leave
crash_prepare_elf64_headers() alone and add a helper to exclude common
ranges, e.g crash_exclude_core_ranges(struct crash_mem *mem).
The crash_prepare_headers() would be something like this (error handling
omitted):
int crash_prepare_headers(int need_kernel_map, void **addr, unsigned long *sz)
{
unsigned int nr;
struct crash_mem *cmem;
nr = arch_get_system_nr_ranges();
cmem = alloc_cmem(nr);
arch_crash_populate_cmem(cmem);
crash_exclude_core_ranges(cmem);
arch_crash_exclude_ranges(cmem);
crash_prepare_elf64_headers(cmem, need_kernel_map, addr, sz);
}
powerpc could reuse crash_exclude_core_ranges() provided the latter call
an overridable arch_crash_exclude_range()
What do you think?
--
Sincerely yours,
Mike.
More information about the linux-riscv
mailing list