[PATCH v2 2/3] LoongArch: Add kdump support

Youling Tang tangyouling at loongson.cn
Fri Sep 9 02:03:16 PDT 2022


Hi,Huacai

On 09/09/2022 04:15 PM, Huacai Chen wrote:
> Hi, Youling,
>
> On Fri, Sep 9, 2022 at 11:20 AM Youling Tang <tangyouling at loongson.cn> wrote:
>>
>> This patch adds support for kdump, the kernel will reserve a region
>> for the crash kernel and jump there on panic.
>>
>> Arch-specific functions are added to allow for implementing a crash
>> dump file interface, /proc/vmcore, which can be viewed as a ELF file.
>>
>> A user space tool, like kexec-tools, is responsible for allocating a
>> separate region for the core's ELF header within crash kdump kernel
>> memory and filling it in when executing kexec_load().
>>
>> Then, its location will be advertised to crash dump kernel via a new
>> device-tree property, "linux,elfcorehdr", and crash dump kernel preserves
>> the region for later use with fdt_reserve_elfcorehdr() at boot time.
>>
>> At the same time, it will also limit the crash kdump kernel to the
>> crashkernel area via a new device-tree property, "linux, usable-memory-range",
>> so as not to destroy the original kernel dump data.
>>
>> On crash dump kernel, /proc/vmcore will access the primary kernel's memory
>> with copy_oldmem_page().
>>
>> I tested this on  LoongArch 3A5000 machine and works as expected (Suggest
>> crashkernel parameter is "crashkernel=512M at 2560M"), you may test it by
>> triggering a crash through /proc/sysrq_trigger:
>>
>>  $ sudo kexec -p /boot/vmlinux-kdump --reuse-cmdline --append="nr_cpus=1"
>>  # echo c > /proc/sysrq_trigger
>>
>> Signed-off-by: Youling Tang <tangyouling at loongson.cn>
>> ---
>>  arch/loongarch/Kconfig                  | 22 ++++++
>>  arch/loongarch/Makefile                 |  4 ++
>>  arch/loongarch/kernel/Makefile          |  1 +
>>  arch/loongarch/kernel/crash_dump.c      | 19 +++++
>>  arch/loongarch/kernel/machine_kexec.c   | 95 +++++++++++++++++++++++--
>>  arch/loongarch/kernel/mem.c             |  6 ++
>>  arch/loongarch/kernel/relocate_kernel.S |  6 ++
>>  arch/loongarch/kernel/setup.c           | 49 +++++++++++++
>>  arch/loongarch/kernel/traps.c           |  4 ++
>>  9 files changed, 200 insertions(+), 6 deletions(-)
>>  create mode 100644 arch/loongarch/kernel/crash_dump.c
>>
>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>> index 08e063aaf847..4eeeebf888c1 100644
>> --- a/arch/loongarch/Kconfig
>> +++ b/arch/loongarch/Kconfig
>> @@ -433,6 +433,28 @@ config KEXEC
>>
>>           The name comes from the similarity to the exec system call.
>>
>> +config CRASH_DUMP
>> +       bool "Build kdump crash kernel"
>> +       help
>> +         Generate crash dump after being started by kexec. This should
>> +         be normally only set in special crash dump kernels which are
>> +         loaded in the main kernel with kexec-tools into a specially
>> +         reserved region and then later executed after a crash by
>> +         kdump/kexec.
>> +
>> +         For more details see Documentation/admin-guide/kdump/kdump.rst
>> +
>> +config PHYSICAL_START
>> +       hex "Physical address where the kernel is loaded"
>> +       default "0x90000000a0000000" if 64BIT
>> +       depends on CRASH_DUMP
>> +       help
>> +         This gives the XKPRANGE address where the kernel is loaded.
>> +         If you plan to use kernel for capturing the crash dump change
>> +         this value to start of the reserved region (the "X" value as
>> +         specified in the "crashkernel=YM at XM" command line boot parameter
>> +         passed to the panic-ed kernel).
>> +
>>  config SECCOMP
>>         bool "Enable seccomp to safely compute untrusted bytecode"
>>         depends on PROC_FS
>> diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
>> index 69b39ba3a09d..224274c1644e 100644
>> --- a/arch/loongarch/Makefile
>> +++ b/arch/loongarch/Makefile
>> @@ -66,7 +66,11 @@ endif
>>  cflags-y += -ffreestanding
>>  cflags-y += $(call cc-option, -mno-check-zero-division)
>>
>> +ifdef CONFIG_PHYSICAL_START
>> +load-y         = $(CONFIG_PHYSICAL_START)
>> +else
>>  load-y         = 0x9000000000200000
>> +endif
>>  bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
>>
>>  drivers-$(CONFIG_PCI)          += arch/loongarch/pci/
>> diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
>> index 17dc8ce6b5ce..79eee7db1414 100644
>> --- a/arch/loongarch/kernel/Makefile
>> +++ b/arch/loongarch/kernel/Makefile
>> @@ -18,6 +18,7 @@ obj-$(CONFIG_MODULES)         += module.o module-sections.o
>>  obj-$(CONFIG_STACKTRACE)       += stacktrace.o
>>
>>  obj-$(CONFIG_KEXEC)             += machine_kexec.o relocate_kernel.o
>> +obj-$(CONFIG_CRASH_DUMP)        += crash_dump.o
>>
>>  obj-$(CONFIG_PROC_FS)          += proc.o
>>
>> diff --git a/arch/loongarch/kernel/crash_dump.c b/arch/loongarch/kernel/crash_dump.c
>> new file mode 100644
>> index 000000000000..13e5d2f7870d
>> --- /dev/null
>> +++ b/arch/loongarch/kernel/crash_dump.c
>> @@ -0,0 +1,19 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +#include <linux/highmem.h>
>> +#include <linux/crash_dump.h>
>> +#include <linux/io.h>
>> +
>> +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
>> +                        size_t csize, unsigned long offset)
>> +{
>> +       void  *vaddr;
>> +
>> +       if (!csize)
>> +               return 0;
>> +
>> +       vaddr = kmap_local_pfn(pfn);
>> +       csize = copy_to_iter(vaddr + offset, csize, iter);
>> +       kunmap_local(vaddr);
>> +
>> +       return csize;
>> +}
>> diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
>> index 7b3fea506c6d..847c4d025fed 100644
>> --- a/arch/loongarch/kernel/machine_kexec.c
>> +++ b/arch/loongarch/kernel/machine_kexec.c
>> @@ -6,11 +6,16 @@
>>   */
>>  #include <linux/compiler.h>
>>  #include <linux/cpu.h>
>> -#include <linux/kexec.h>
>> -#include <linux/mm.h>
>> +#include <linux/crash_dump.h>
>>  #include <linux/delay.h>
>> +#include <linux/irq.h>
>> +#include <linux/kexec.h>
>>  #include <linux/libfdt.h>
>> +#include <linux/mm.h>
>>  #include <linux/of_fdt.h>
>> +#include <linux/reboot.h>
>> +#include <linux/sched.h>
>> +#include <linux/sched/task_stack.h>
>>
>>  #include <asm/bootinfo.h>
>>  #include <asm/cacheflush.h>
>> @@ -21,6 +26,7 @@
>>  #define KEXEC_BLOB_ADDR        TO_CACHE(0x108000UL)
>>
>>  static unsigned long reboot_code_buffer;
>> +static cpumask_t cpus_in_crash = CPU_MASK_NONE;
>>
>>  #ifdef CONFIG_SMP
>>  static void (*relocated_kexec_smp_wait)(void *);
>> @@ -70,7 +76,7 @@ int machine_kexec_prepare(struct kimage *kimage)
>>                 continue;
>>         }
>>
>> -       /* kexec need a safe page to save reboot_code_buffer */
>> +       /* kexec/kdump need a safe page to save reboot_code_buffer */
>>         kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>>
>>         reboot_code_buffer =
>> @@ -119,10 +125,85 @@ static void kexec_shutdown_secondary(void *)
>>
>>         kexec_reboot();
>>  }
>> -#endif
>> +
>> +static void crash_shutdown_secondary(void *passed_regs)
>> +{
>> +       struct pt_regs *regs = passed_regs;
>> +       int cpu = smp_processor_id();
>> +
>> +       /*
>> +        * If we are passed registers, use those.  Otherwise get the
>> +        * regs from the last interrupt, which should be correct, as
>> +        * we are in an interrupt.  But if the regs are not there,
>> +        * pull them from the top of the stack.  They are probably
>> +        * wrong, but we need something to keep from crashing again.
>> +        */
>> +       if (!regs)
>> +               regs = get_irq_regs();
>> +       if (!regs)
>> +               regs = task_pt_regs(current);
>> +
>> +       local_irq_disable();
>> +       if (!cpumask_test_cpu(cpu, &cpus_in_crash))
>> +               crash_save_cpu(regs, cpu);
>> +       cpumask_set_cpu(cpu, &cpus_in_crash);
>> +
>> +       while (!atomic_read(&kexec_ready_to_reboot))
>> +               cpu_relax();
>> +
>> +       kexec_reboot();
>> +}
>> +
>> +void crash_smp_send_stop(void)
>> +{
>> +       static int cpus_stopped;
>> +       unsigned long timeout;
>> +       unsigned int ncpus;
>> +
>> +       /*
>> +        * This function can be called twice in panic path, but obviously
>> +        * we execute this only once.
>> +        */
>> +       if (cpus_stopped)
>> +               return;
>> +
>> +       cpus_stopped = 1;
>> +
>> +        /* Excluding the panic cpu */
>> +       ncpus = num_online_cpus() - 1;
>> +
>> +       smp_call_function(crash_shutdown_secondary, NULL, 0);
>> +       smp_wmb();
>> +
>> +       /*
>> +        * The crash CPU sends an IPI and wait for other CPUs to
>> +        * respond. Delay of at least 10 seconds.
>> +        */
>> +       pr_emerg("Sending IPI to other cpus...\n");
>> +       timeout = USEC_PER_SEC * 10;
>> +       while ((cpumask_weight(&cpus_in_crash) < ncpus) && timeout--) {
>> +               cpu_relax();
>> +               udelay(1);
>> +       }
>> +}
>> +#endif /* defined(CONFIG_SMP) */
>>
>>  void machine_crash_shutdown(struct pt_regs *regs)
>>  {
>> +       int crashing_cpu;
>> +
>> +       local_irq_disable();
>> +
>> +       crashing_cpu = smp_processor_id();
>> +       crash_save_cpu(regs, crashing_cpu);
>> +
>> +#ifdef CONFIG_SMP
>> +       /* shutdown non-crashing cpus */
>> +       crash_smp_send_stop();
>> +#endif
>> +       cpumask_set_cpu(crashing_cpu, &cpus_in_crash);
>> +
>> +       pr_info("Starting crashdump kernel...\n");
>>  }
>>
>>  void machine_shutdown(void)
>> @@ -143,7 +224,8 @@ void machine_kexec(struct kimage *image)
>>
>>         jump_addr = (unsigned long)phys_to_virt(image->start);
>>
>> -       first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
>> +       if (image->type == KEXEC_TYPE_DEFAULT)
>> +               first_ind_entry = (unsigned long)phys_to_virt(image->head & PAGE_MASK);
> The old implementation uses "kexec_indirection_page = (unsigned
> long)&image->head" in the kdump case, we don't need that now?

The kdump operation will copy the entire kernel to the Crashkernel area
, so there is no need to go through this indirect page.  Determine
whether the first_ind_entry parameter is 0 to perform the copy action
in relocate_new_kernel.

Youling.
>
> Huacai
>>
>>         /*
>>          * The generic kexec code builds a page list with physical
>> @@ -177,7 +259,8 @@ void machine_kexec(struct kimage *image)
>>
>>         /*
>>          * We know we were online, and there will be no incoming IPIs at
>> -        * this point.
>> +        * this point. Mark online again before rebooting so that the crash
>> +        * analysis tool will see us correctly.
>>          */
>>         set_cpu_online(smp_processor_id(), true);
>>
>> diff --git a/arch/loongarch/kernel/mem.c b/arch/loongarch/kernel/mem.c
>> index 7423361b0ebc..c6def6ff81c8 100644
>> --- a/arch/loongarch/kernel/mem.c
>> +++ b/arch/loongarch/kernel/mem.c
>> @@ -5,6 +5,7 @@
>>  #include <linux/efi.h>
>>  #include <linux/initrd.h>
>>  #include <linux/memblock.h>
>> +#include <linux/of_fdt.h>
>>
>>  #include <asm/bootinfo.h>
>>  #include <asm/loongson.h>
>> @@ -61,4 +62,9 @@ void __init memblock_init(void)
>>
>>         /* Reserve the initrd */
>>         reserve_initrd_mem();
>> +
>> +       /* Mainly reserved memory for the elf core head */
>> +       early_init_fdt_scan_reserved_mem();
>> +       /* Parse linux,usable-memory-range is for crash dump kernel */
>> +       early_init_dt_check_for_usable_mem_range();
>>  }
>> diff --git a/arch/loongarch/kernel/relocate_kernel.S b/arch/loongarch/kernel/relocate_kernel.S
>> index 05a0c1ccfab6..0bca7518bca2 100644
>> --- a/arch/loongarch/kernel/relocate_kernel.S
>> +++ b/arch/loongarch/kernel/relocate_kernel.S
>> @@ -26,6 +26,12 @@ SYM_CODE_START(relocate_new_kernel)
>>         move            s2, a2
>>         move            s3, a3
>>
>> +       /*
>> +        * In case of a kdump/crash kernel, the indirection page is not
>> +        * populated as the kernel is directly copied to a reserved location
>> +        */
>> +       beqz            s2, done
>> +
>>  process_entry:
>>         PTR_L           s4, s2, 0
>>         PTR_ADDI        s2, s2, SZREG
>> diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
>> index 058c232c11e8..6974b88446a8 100644
>> --- a/arch/loongarch/kernel/setup.c
>> +++ b/arch/loongarch/kernel/setup.c
>> @@ -19,6 +19,8 @@
>>  #include <linux/memblock.h>
>>  #include <linux/initrd.h>
>>  #include <linux/ioport.h>
>> +#include <linux/kexec.h>
>> +#include <linux/crash_dump.h>
>>  #include <linux/root_dev.h>
>>  #include <linux/console.h>
>>  #include <linux/pfn.h>
>> @@ -185,6 +187,50 @@ static int __init early_parse_mem(char *p)
>>  }
>>  early_param("mem", early_parse_mem);
>>
>> +static void __init loongarch_parse_crashkernel(void)
>> +{
>> +#ifdef CONFIG_KEXEC
>> +       unsigned long long start;
>> +       unsigned long long total_mem;
>> +       unsigned long long crash_size, crash_base;
>> +       int ret;
>> +
>> +       total_mem = memblock_phys_mem_size();
>> +       ret = parse_crashkernel(boot_command_line, total_mem,
>> +                               &crash_size, &crash_base);
>> +       if (ret != 0 || crash_size <= 0)
>> +               return;
>> +
>> +
>> +       start = memblock_phys_alloc_range(crash_size, 1, crash_base,
>> +                                       crash_base + crash_size);
>> +       if (start != crash_base) {
>> +               pr_warn("Invalid memory region reserved for crash kernel\n");
>> +               return;
>> +       }
>> +
>> +       crashk_res.start = crash_base;
>> +       crashk_res.end   = crash_base + crash_size - 1;
>> +#endif
>> +}
>> +
>> +static void __init request_crashkernel(struct resource *res)
>> +{
>> +#ifdef CONFIG_KEXEC
>> +       int ret;
>> +
>> +       if (crashk_res.start == crashk_res.end)
>> +               return;
>> +
>> +       ret = request_resource(res, &crashk_res);
>> +       if (!ret)
>> +               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
>> +                       (unsigned long)((crashk_res.end -
>> +                                        crashk_res.start + 1) >> 20),
>> +                       (unsigned long)(crashk_res.start  >> 20));
>> +#endif
>> +}
>> +
>>  void __init platform_init(void)
>>  {
>>         efi_init();
>> @@ -228,6 +274,8 @@ static void __init arch_mem_init(char **cmdline_p)
>>
>>         check_kernel_sections_mem();
>>
>> +       loongarch_parse_crashkernel();
>> +
>>         /*
>>          * In order to reduce the possibility of kernel panic when failed to
>>          * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
>> @@ -289,6 +337,7 @@ static void __init resource_init(void)
>>                 request_resource(res, &code_resource);
>>                 request_resource(res, &data_resource);
>>                 request_resource(res, &bss_resource);
>> +               request_crashkernel(res);
>>         }
>>  }
>>
>> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
>> index aa1c95aaf595..0e610872f3f4 100644
>> --- a/arch/loongarch/kernel/traps.c
>> +++ b/arch/loongarch/kernel/traps.c
>> @@ -10,6 +10,7 @@
>>  #include <linux/entry-common.h>
>>  #include <linux/init.h>
>>  #include <linux/kernel.h>
>> +#include <linux/kexec.h>
>>  #include <linux/module.h>
>>  #include <linux/extable.h>
>>  #include <linux/mm.h>
>> @@ -246,6 +247,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
>>
>>         oops_exit();
>>
>> +       if (regs && kexec_should_crash(current))
>> +               crash_kexec(regs);
>> +
>>         if (in_interrupt())
>>                 panic("Fatal exception in interrupt");
>>
>> --
>> 2.36.0
>>




More information about the kexec mailing list