[PATCH 1/3] LoongArch: Add kexec support
Youling Tang
tangyouling at loongson.cn
Tue Aug 30 20:38:23 PDT 2022
On 08/30/2022 11:25 AM, Youling Tang wrote:
> Hi, Jinyang
>
> On 08/30/2022 09:53 AM, Jinyang He wrote:
>> Hi, Youling,
>>
>>
>> On 08/29/2022 12:37 PM, Youling Tang wrote:
>>> Add three new files, kexec.h, machine_kexec.c and relocate_kernel.S to
>>> the
>>> LoongArch architecture that add support for the kexec re-boot mechanis
>>> (CONFIG_KEXEC) on LoongArch platforms.
>>>
>>> Supports loading vmlinux (vmlinux.elf) in ELF format and vmlinux.efi in
>>> PE format.
>>>
>>> I tested this on LoongArch 3A5000 machine and works as expected,
>>>
>>> $ sudo kexec -l /boot/vmlinux.efi --reuse-cmdline
>>> $ sudo kexec -e
>>>
>>> Signed-off-by: Youling Tang <tangyouling at loongson.cn>
>>> ---
>>> arch/loongarch/Kconfig | 11 ++
>>> arch/loongarch/include/asm/kexec.h | 58 ++++++++
>>> arch/loongarch/kernel/Makefile | 2 +
>>> arch/loongarch/kernel/head.S | 7 +-
>>> arch/loongarch/kernel/machine_kexec.c | 178 ++++++++++++++++++++++++
>>> arch/loongarch/kernel/relocate_kernel.S | 125 +++++++++++++++++
>>> 6 files changed, 380 insertions(+), 1 deletion(-)
>>> create mode 100644 arch/loongarch/include/asm/kexec.h
>>> create mode 100644 arch/loongarch/kernel/machine_kexec.c
>>> create mode 100644 arch/loongarch/kernel/relocate_kernel.S
>>>
>>> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
>>> index 45364cffc793..903c82fa958d 100644
>>> --- a/arch/loongarch/Kconfig
>>> +++ b/arch/loongarch/Kconfig
>>> @@ -409,6 +409,17 @@ config FORCE_MAX_ZONEORDER
>>> The page size is not necessarily 4KB. Keep this in mind
>>> when choosing a value for this option.
>>> +config KEXEC
>>> + bool "Kexec system call"
>>> + select KEXEC_CORE
>>> + help
>>> + kexec is a system call that implements the ability to shutdown
>>> your
>>> + current kernel, and to start another kernel. It is like a reboot
>>> + but it is independent of the system firmware. And like a reboot
>>> + you can start any kernel with it, not just Linux.
>>> +
>>> + The name comes from the similarity to the exec system call.
>>> +
>>> config SECCOMP
>>> bool "Enable seccomp to safely compute untrusted bytecode"
>>> depends on PROC_FS
>>> diff --git a/arch/loongarch/include/asm/kexec.h
>>> b/arch/loongarch/include/asm/kexec.h
>>> new file mode 100644
>>> index 000000000000..5c9e7b5eccb8
>>> --- /dev/null
>>> +++ b/arch/loongarch/include/asm/kexec.h
>>> @@ -0,0 +1,58 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>> +/*
>>> + * kexec.h for kexec
>>> + *
>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>> + */
>>> +
>>> +#ifndef _ASM_KEXEC_H
>>> +#define _ASM_KEXEC_H
>>> +
>>> +#include <asm/stacktrace.h>
>>> +#include <asm/page.h>
>>> +
>>> +/* Maximum physical address we can use pages from */
>>> +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
>>> +/* Maximum address we can reach in physical address mode */
>>> +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
>>> + /* Maximum address we can use for the control code buffer */
>>> +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
>>> +
>>> +/* Reserve a page for the control code buffer */
>>> +#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE
>>> +
>>> +/* The native architecture */
>>> +#define KEXEC_ARCH KEXEC_ARCH_LOONGARCH
>>> +
>>> +static inline void crash_setup_regs(struct pt_regs *newregs,
>>> + struct pt_regs *oldregs)
>>> +{
>>> + if (oldregs)
>>> + memcpy(newregs, oldregs, sizeof(*newregs));
>>> + else
>>> + prepare_frametrace(newregs);
>>> +}
>>> +
>>> +#define ARCH_HAS_KIMAGE_ARCH
>>> +
>>> +struct kimage_arch {
>>> + unsigned long boot_flag;
>>> + unsigned long fdt_addr;
>>> +};
>>> +
>>> +typedef void (*do_kexec_t)(unsigned long boot_flag,
>>> + unsigned long fdt_addr,
>>> + unsigned long first_ind_entry,
>>> + unsigned long jump_addr);
>>> +
>>> +struct kimage;
>>> +extern const unsigned char relocate_new_kernel[];
>>> +extern const size_t relocate_new_kernel_size;
>>> +
>>> +#ifdef CONFIG_SMP
>>> +extern atomic_t kexec_ready_to_reboot;
>>> +extern const unsigned char kexec_smp_wait[];
>>> +extern void kexec_reboot(void);
>>> +#endif
>>> +
>>> +#endif /* !_ASM_KEXEC_H */
>>> diff --git a/arch/loongarch/kernel/Makefile
>>> b/arch/loongarch/kernel/Makefile
>>> index a213e994db68..20b64ac3f128 100644
>>> --- a/arch/loongarch/kernel/Makefile
>>> +++ b/arch/loongarch/kernel/Makefile
>>> @@ -17,6 +17,8 @@ obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
>>> obj-$(CONFIG_MODULES) += module.o module-sections.o
>>> obj-$(CONFIG_STACKTRACE) += stacktrace.o
>>> +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
>>> +
>>> obj-$(CONFIG_PROC_FS) += proc.o
>>> obj-$(CONFIG_SMP) += smp.o
>>> diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
>>> index 01bac62a6442..22bdf4928325 100644
>>> --- a/arch/loongarch/kernel/head.S
>>> +++ b/arch/loongarch/kernel/head.S
>>> @@ -20,7 +20,12 @@
>>> _head:
>>> .word MZ_MAGIC /* "MZ", MS-DOS header */
>>> - .org 0x3c /* 0x04 ~ 0x3b reserved */
>>> + .org 0x8
>>> + .quad 0 /* Image load offset from start of RAM */
>>> + .dword _end - _text /* Effective size of kernel image */
>>> + .quad 0
>>> + .dword kernel_entry /* Kernel entry point */
>>> + .org 0x3c /* 0x28 ~ 0x3b reserved */
>>> .long pe_header - _head /* Offset to the PE header */
>>> pe_header:
>>> diff --git a/arch/loongarch/kernel/machine_kexec.c
>>> b/arch/loongarch/kernel/machine_kexec.c
>>> new file mode 100644
>>> index 000000000000..4ffcd4cd9c8c
>>> --- /dev/null
>>> +++ b/arch/loongarch/kernel/machine_kexec.c
>>> @@ -0,0 +1,178 @@
>>> +// SPDX-License-Identifier: GPL-2.0-only
>>> +/*
>>> + * machine_kexec.c for kexec
>>> + *
>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>> + */
>>> +#include <linux/compiler.h>
>>> +#include <linux/cpu.h>
>>> +#include <linux/kexec.h>
>>> +#include <linux/mm.h>
>>> +#include <linux/delay.h>
>>> +#include <linux/libfdt.h>
>>> +#include <linux/of_fdt.h>
>>> +
>>> +#include <asm/bootinfo.h>
>>> +#include <asm/cacheflush.h>
>>> +#include <asm/page.h>
>>> +
>>> +/* 0x100000 ~ 0x200000 is safe */
>>> +#define KEXEC_CTRL_CODE TO_CACHE(0x100000UL)
>>> +#define KEXEC_BLOB_ADDR TO_CACHE(0x108000UL)
>>> +
>>> +static unsigned long reboot_code_buffer;
>>> +#ifdef CONFIG_SMP
>>> +void (*relocated_kexec_smp_wait)(void *);
>>> +atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0);
>>> +#endif
>>> +
>>> +static unsigned long jump_addr;
>>> +static unsigned long first_ind_entry;
>>> +static unsigned long boot_flag;
>>> +static unsigned long fdt_addr;
>>> +
>>> +static void kexec_image_info(const struct kimage *kimage)
>>> +{
>>> + unsigned long i;
>>> +
>>> + pr_debug("kexec kimage info:\n");
>>> + pr_debug("\ttype: %d\n", kimage->type);
>>> + pr_debug("\tstart: %lx\n", kimage->start);
>>> + pr_debug("\thead: %lx\n", kimage->head);
>>> + pr_debug("\tnr_segments: %lu\n", kimage->nr_segments);
>>> +
>>> + for (i = 0; i < kimage->nr_segments; i++) {
>>> + pr_debug("\t segment[%lu]: %016lx - %016lx", i,
>>> + kimage->segment[i].mem,
>>> + kimage->segment[i].mem + kimage->segment[i].memsz);
>>> + pr_debug("\t\t0x%lx bytes, %lu pages\n",
>>> + (unsigned long)kimage->segment[i].memsz,
>>> + (unsigned long)kimage->segment[i].memsz / PAGE_SIZE);
>>> + }
>>> +}
>>> +
>>> +int machine_kexec_prepare(struct kimage *kimage)
>>> +{
>>> + int i;
>>> + void *dtb = (void *)KEXEC_BLOB_ADDR;
>>> +
>>> + kexec_image_info(kimage);
>>> +
>>> + /* Find the Flattened Device Tree */
>>> + for (i = 0; i < kimage->nr_segments; i++) {
>>> + if (!fdt_check_header(kimage->segment[i].buf)) {
>>> + memcpy(dtb, kimage->segment[i].buf, SZ_64K);
>>> + kimage->arch.boot_flag = fw_arg0;
>>> + kimage->arch.fdt_addr = (unsigned long) dtb;
>>> + break;
>>> + }
>>> + continue;
>>> + }
>>> +
>>> + /* kexec need a safe page to save reboot_code_buffer */
>>> + kimage->control_code_page = virt_to_page((void *)KEXEC_CTRL_CODE);
>>> +
>>> + reboot_code_buffer =
>>> + (unsigned long)page_address(kimage->control_code_page);
>>> + memcpy((void *)reboot_code_buffer, relocate_new_kernel,
>>> + relocate_new_kernel_size);
>> It copys same content to KEXEC_CTRL_CODE each time, could we do this at
>> boot time?
> I think it's possible to have the copy action happen at boot-time or
> during the prepare phase. (RISCV in prepare, MIPS in boot-time)
>
>>
>> BTW, our system always keep the low-2MB no used, on mips-loongson or
>> LoongArch. Is that necessary on LoongArch? We cannot use parameter
>> 'mem=YYM' normally but 'mem=YYM at 2M' is ok. And the low-2MB is not
>> in virtual memory management, although we can get it in kernel.
> For existing kernels, the low 2M has been reserved by
> memblock_reserve(PHYS_OFFSET, 0x200000), maybe it is acceptable to keep
> the low 2M behavior.
>
> Yes, we need to use "mem=YM at 2M" if the low 2M is reserved.
>
>>
>> In kexec/kdump process, we can follows kimage_alloc_control_pages().
>> When the boot cpu copy complete the second-kernels, all cpus can jump
>> to a kernel-entry-trampoline which is in kernel image. Then we don't
>> worry about the code can be destroyed. The kernel-entry-trampoline
>> get its cpuid, keep non-boot cpus do as kexec_smp_wait and let boot
>> cpu go kernel-entry. In this way we can drop the low-2MB IMO.
>
> It is also feasible to dynamically allocate control pages, but it is
> easier to use a low 2M approach. What do you think, Huacai?
>
>>
>>> +
>>> + /* All secondary cpus now may jump to kexec_smp_wait cycle */
>>> + relocated_kexec_smp_wait = reboot_code_buffer +
>>> + (void *)(kexec_smp_wait - relocate_new_kernel);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +void machine_kexec_cleanup(struct kimage *kimage)
>>> +{
>>> +}
>>> +
>>> +#ifdef CONFIG_SMP
>>> +void kexec_reboot(void)
>>> +{
>>> + do_kexec_t do_kexec = NULL;
>>> +
>>> + /* All secondary cpus go to kexec_smp_wait */
>>> + if (smp_processor_id() > 0) {
>>> + relocated_kexec_smp_wait(NULL);
>>> + unreachable();
>>> + }
>>> +
>>> + do_kexec = (void *)reboot_code_buffer;
>>> + do_kexec(boot_flag, fdt_addr, first_ind_entry, jump_addr);
>>> +
>>> + unreachable();
>>> +}
>>> +
>>> +static void kexec_shutdown_secondary(void *)
>>> +{
>>> + local_irq_disable();
>>> + while (!atomic_read(&kexec_ready_to_reboot))
>>> + cpu_relax();
>>> +
>>> + kexec_reboot();
>>> +}
>>> +
>>> +void machine_crash_shutdown(struct pt_regs *regs)
>>> +{
>>> +}
>>> +#endif
>>> +
>>> +void machine_shutdown(void)
>>> +{
>>> + smp_call_function(kexec_shutdown_secondary, NULL, 0);
>>> +}
>>> +
>>> +void machine_kexec(struct kimage *image)
>>> +{
>>> + unsigned long entry;
>>> + unsigned long *ptr;
>>> + struct kimage_arch *internal = &image->arch;
>>> +
>>> + boot_flag = internal->boot_flag;
>>> + fdt_addr = internal->fdt_addr;
>>> +
>>> + jump_addr = (unsigned long)phys_to_virt(image->start);
>>> +
>>> + first_ind_entry = (unsigned long)phys_to_virt(image->head &
>>> PAGE_MASK);
>>> +
>>> + /*
>>> + * The generic kexec code builds a page list with physical
>>> + * addresses. they are directly accessible through XKPRANGE
>>> + * hence the phys_to_virt() call.
>>> + */
>>> + for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE);
>>> + ptr = (entry & IND_INDIRECTION) ?
>>> + phys_to_virt(entry & PAGE_MASK) : ptr + 1) {
>>> + if (*ptr & IND_SOURCE || *ptr & IND_INDIRECTION ||
>>> + *ptr & IND_DESTINATION)
>>> + *ptr = (unsigned long) phys_to_virt(*ptr);
>>> + }
>>> +
>>> + /* Mark offline before disabling local irq. */
>>> + set_cpu_online(smp_processor_id(), false);
>>> +
>>> + /* we do not want to be bothered. */
>>> + local_irq_disable();
>>> +
>>> + pr_notice("Will call new kernel at %lx\n", jump_addr);
>>> + pr_notice("FDT image at %lx\n", fdt_addr);
>>> + pr_notice("Bye ...\n");
>>> +
>>> + /* Make reboot code buffer available to the boot CPU. */
>>> + flush_cache_all();
>>> +
>>> + atomic_set(&kexec_ready_to_reboot, 1);
>>> +
>>> + /*
>>> + * We know we were online, and there will be no incoming IPIs at
>>> + * this point.
>>> + */
>>> + set_cpu_online(smp_processor_id(), true);
>>> +
>>> + /* Ensure remote CPUs observe that we're online before
>>> rebooting. */
>>> + smp_mb__after_atomic();
>>> +
>>> + kexec_reboot();
>>> +}
>>> diff --git a/arch/loongarch/kernel/relocate_kernel.S
>>> b/arch/loongarch/kernel/relocate_kernel.S
>>> new file mode 100644
>>> index 000000000000..d1f242f74ea8
>>> --- /dev/null
>>> +++ b/arch/loongarch/kernel/relocate_kernel.S
>>> @@ -0,0 +1,125 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>> +/*
>>> + * relocate_kernel.S for kexec
>>> + *
>>> + * Copyright (C) 2022 Loongson Technology Corporation Limited
>>> + */
>>> +
>>> +#include <linux/kexec.h>
>>> +
>>> +#include <asm/asm.h>
>>> +#include <asm/asmmacro.h>
>>> +#include <asm/regdef.h>
>>> +#include <asm/loongarch.h>
>>> +#include <asm/stackframe.h>
>>> +#include <asm/addrspace.h>
>>> +
>>> +#define IPI_REG_BASE 0x1fe01000
>>> +
>>> +SYM_CODE_START(relocate_new_kernel)
>>> + /*
>>> + * s0: Boot flag passed to the new kernel
>>> + * s1: Virt address of the FDT image
>>> + * s2: Pointer to the current entry
>>> + * s3: Virt address to jump to after relocation
>>> + */
>>> + move s0, a0
>>> + move s1, a1
>>> + move s2, a2
>>> + move s3, a3
>>> +
>>> +process_entry:
>>> + PTR_L s4, s2, 0
>>> + PTR_ADDI s2, s2, SZREG
>>> +
>>> + /* destination page */
>>> + andi s5, s4, IND_DESTINATION
>>> + beqz s5, 1f
>>> + li.w t0, ~0x1
>>> + and s6, s4, t0 /* store destination addr in s6 */
>>> + b process_entry
>>> +
>>> +1:
>>> + /* indirection page, update s2 */
>>> + andi s5, s4, IND_INDIRECTION
>>> + beqz s5, 1f
>>> + li.w t0, ~0x2
>>> + and s2, s4, t0
>>> + b process_entry
>>> +
>>> +1:
>>> + /* done page */
>>> + andi s5, s4, IND_DONE
>>> + beqz s5, 1f
>>> + b done
>>> +1:
>>> + /* source page */
>>> + andi s5, s4, IND_SOURCE
>>> + beqz s5, process_entry
>>> + li.w t0, ~0x8
>>> + and s4, s4, t0
>>> + li.w s8, (1 << _PAGE_SHIFT) / SZREG
>>> +
>>> +copy_word:
>>> + /* copy page word by word */
>>> + REG_L s7, s4, 0
>>> + REG_S s7, s6, 0
>>> + PTR_ADDI s6, s6, SZREG
>>> + PTR_ADDI s4, s4, SZREG
>>> + LONG_ADDI s8, s8, -1
>>> + beqz s8, process_entry
>>> + b copy_word
>>> + b process_entry
>>> +
>>> +done:
>>> + dbar 0
>> ibar, too?
>
> Will add ibar 0.
>
>>> +
>>> + move a0, s0
>>> + move a1, s1
>>> + /* jump to the new kernel */
>>> + jr s3
>>> +SYM_CODE_END(relocate_new_kernel)
>>> +
>>> +#ifdef CONFIG_SMP
>>> +/*
>>> + * Other CPUs should wait until code is relocated and
>>> + * then start at entry (?) point.
>>> + */
>>> +SYM_CODE_START(kexec_smp_wait)
>>> + li.d t0, IPI_REG_BASE
>>> + li.d t1, UNCACHE_BASE
>>> + or t0, t0, t1
>>> +
>>> + /*
>>> + * s1:initfn
>>> + * t0:base t1:cpuid t2:node t3:core t4:count
>>> + */
>>> + csrrd t1, LOONGARCH_CSR_CPUID
>>> + andi t1, t1, CSR_CPUID_COREID
>>> + andi t3, t1, 0x3
>>> + slli.w t3, t3, 8 /* get core id */
>>> + or t0, t0, t3
>>> + andi t2, t1, 0x3c
>>> + slli.d t2, t2, 42 /* get node id */
>>> + or t0, t0, t2
>>> +
>>> +1: li.w t4, 0x100 /* wait for init loop */
>>> +2: addi.w t4, t4, -1 /* limit mailbox access */
>>> + bnez t4, 2b
>>> + ld.w s1, t0, 0x20 /* check PC as an indicator */
>> Can we do this with iocsr*?
>
> OK, I will consider the implementation in the iocsr way.
>
The kexec_smp_wait implementation will be modified in the next release
as follows:
SYM_CODE_START(kexec_smp_wait)
1: li.w t0, 0x100 /* wait for init
loop */
2: addi.w t0, t0, -1 /* limit mailbox
access */
bnez t0, 2b
li.w t1, LOONGARCH_IOCSR_MBUF0
iocsrrd.w s1, t1 /* check PC as
an indicator */
beqz s1, 1b
iocsrrd.d s1, t1 /* get PC via
mailbox */
li.d t0, CACHE_BASE
or s1, s1, t0
jr s1 /* jump to
initial PC */
SYM_CODE_END(kexec_smp_wait)
1) Using the iocsr method
2) Remove sp, tp settings. sp, tp will be obtained from cpuboot_data in
smpboot_entry.
Thanks again Jinyang for the suggestion.
Youling.
> Thanks,
> Youling
>>
>> Thanks,
>> Jinyang
>>> + beqz s1, 1b
>>> + ld.d s1, t0, 0x20 /* get PC via mailbox */
>>> + ld.d sp, t0, 0x28 /* get SP via mailbox */
>>> + ld.d tp, t0, 0x30 /* get TP via mailbox */
>>> +
>>> + li.d t0, CACHE_BASE
>>> + or s1, s1, t0
>>> + jr s1 /* jump to initial PC */
>>> +SYM_CODE_END(kexec_smp_wait)
>>> +#endif
>>> +
>>> +relocate_new_kernel_end:
>>> +
>>> +SYM_DATA_START(relocate_new_kernel_size)
>>> + PTR relocate_new_kernel_end - relocate_new_kernel
>>> +SYM_DATA_END(relocate_new_kernel_size)
>>
>
More information about the kexec
mailing list