[PATCH] RISC-V: Add support for riscv kexec/kdump on kexec-tools

Xianting Tian xianting.tian at linux.alibaba.com
Mon Sep 19 01:20:37 PDT 2022


sorry, I will send v2 patch to switch the author to NICK.

在 2022/9/19 下午3:57, Guo Ren 写道:
> F.Y.I Nick Kossifidis
>
> The first author should be him.
>
> On Wed, Sep 14, 2022 at 3:52 PM Xianting Tian
> <xianting.tian at linux.alibaba.com> wrote:
>> This patch adds support for loading the ELF kernel image. It parses
>> the current/provided device tree to determine the system's memory
>> layout, and /proc/iomem for the various kernel segments.
>>
>> This patch was firstly developed by Nick Kossifidis, and two fixes (
>>     1: fail to find free memory area for dtb load when using initrd image,
>>        lists.infradead.org/pipermail/linux-riscv/2022-August/018398.html;
>>     2: fix memory range size calculation,
>>         kexec/arch/riscv/crashdump-riscv.c:line 85
>> ) are contributed by Yixun Lan, Xianting Tian.
>>
>> Tested on Qemu's rv64 virt machine and SoC of T-Head RISC-V Xuantie 910 CPU.
>>
>> Tested-by: Yixun Lan <yixun.lan at gmail.com>
>> Signed-off-by: Xianting Tian <xianting.tian at linux.alibaba.com>
>> Signed-off-by: Yixun Lan <yixun.lan at gmail.com>
>> Signed-off-by: Nick Kossifidis <mick at ics.forth.gr>
>> ---
>>   configure.ac                            |   3 +
>>   include/elf.h                           |   3 +-
>>   kexec/Makefile                          |   1 +
>>   kexec/arch/riscv/Makefile               |  35 ++
>>   kexec/arch/riscv/crashdump-riscv.c      | 140 ++++++++
>>   kexec/arch/riscv/include/arch/options.h |  43 +++
>>   kexec/arch/riscv/kexec-elf-riscv.c      | 255 ++++++++++++++
>>   kexec/arch/riscv/kexec-riscv.c          | 365 +++++++++++++++++++
>>   kexec/arch/riscv/kexec-riscv.h          |  32 ++
>>   kexec/dt-ops.c                          | 442 +++++++++++++++++++++++-
>>   kexec/dt-ops.h                          |   7 +
>>   kexec/kexec-syscall.h                   |   4 +
>>   purgatory/Makefile                      |   1 +
>>   purgatory/arch/riscv/Makefile           |   7 +
>>   14 files changed, 1336 insertions(+), 2 deletions(-)
>>   create mode 100644 kexec/arch/riscv/Makefile
>>   create mode 100644 kexec/arch/riscv/crashdump-riscv.c
>>   create mode 100644 kexec/arch/riscv/include/arch/options.h
>>   create mode 100644 kexec/arch/riscv/kexec-elf-riscv.c
>>   create mode 100644 kexec/arch/riscv/kexec-riscv.c
>>   create mode 100644 kexec/arch/riscv/kexec-riscv.h
>>   create mode 100644 purgatory/arch/riscv/Makefile
>>
>> diff --git a/configure.ac b/configure.ac
>> index 0d825ef..7cc55b0 100644
>> --- a/configure.ac
>> +++ b/configure.ac
>> @@ -58,6 +58,9 @@ case $target_cpu in
>>          hppa*)
>>                  ARCH="hppa"
>>                  ;;
>> +       riscv32|riscv64 )
>> +               ARCH="riscv"
>> +               ;;
>>          * )
>>                  AC_MSG_ERROR([unsupported architecture $target_cpu])
>>                  ;;
>> diff --git a/include/elf.h b/include/elf.h
>> index b7677a2..123f167 100644
>> --- a/include/elf.h
>> +++ b/include/elf.h
>> @@ -259,7 +259,8 @@ typedef struct
>>   #define EM_ARC_A5      93              /* ARC Cores Tangent-A5 */
>>   #define EM_XTENSA      94              /* Tensilica Xtensa Architecture */
>>   #define EM_AARCH64     183             /* ARM AARCH64 */
>> -#define EM_NUM         184
>> +#define EM_RISCV       243             /* RISC-V */
>> +#define EM_NUM         244
>>
>>   /* If it is necessary to assign new unofficial EM_* values, please
>>      pick large random numbers (0x8523, 0xa7f2, etc.) to minimize the
>> diff --git a/kexec/Makefile b/kexec/Makefile
>> index e69e309..ca17831 100644
>> --- a/kexec/Makefile
>> +++ b/kexec/Makefile
>> @@ -88,6 +88,7 @@ include $(srcdir)/kexec/arch/mips/Makefile
>>   include $(srcdir)/kexec/arch/cris/Makefile
>>   include $(srcdir)/kexec/arch/ppc/Makefile
>>   include $(srcdir)/kexec/arch/ppc64/Makefile
>> +include $(srcdir)/kexec/arch/riscv/Makefile
>>   include $(srcdir)/kexec/arch/s390/Makefile
>>   include $(srcdir)/kexec/arch/sh/Makefile
>>   include $(srcdir)/kexec/arch/x86_64/Makefile
>> diff --git a/kexec/arch/riscv/Makefile b/kexec/arch/riscv/Makefile
>> new file mode 100644
>> index 0000000..f26cc90
>> --- /dev/null
>> +++ b/kexec/arch/riscv/Makefile
>> @@ -0,0 +1,35 @@
>> +#
>> +# kexec riscv
>> +#
>> +riscv_KEXEC_SRCS =  kexec/arch/riscv/kexec-riscv.c
>> +riscv_KEXEC_SRCS += kexec/arch/riscv/kexec-elf-riscv.c
>> +riscv_KEXEC_SRCS += kexec/arch/riscv/crashdump-riscv.c
>> +
>> +riscv_MEM_REGIONS = kexec/mem_regions.c
>> +
>> +riscv_DT_OPS += kexec/dt-ops.c
>> +
>> +riscv_ARCH_REUSE_INITRD =
>> +
>> +riscv_CPPFLAGS += -I $(srcdir)/kexec/
>> +
>> +dist += kexec/arch/riscv/Makefile $(riscv_KEXEC_SRCS)                  \
>> +       kexec/arch/riscv/kexec-riscv.h                                  \
>> +       kexec/arch/riscv/include/arch/options.h
>> +
>> +ifdef HAVE_LIBFDT
>> +
>> +LIBS += -lfdt
>> +
>> +else
>> +
>> +include $(srcdir)/kexec/libfdt/Makefile.libfdt
>> +
>> +libfdt_SRCS += $(LIBFDT_SRCS:%=kexec/libfdt/%)
>> +
>> +riscv_CPPFLAGS += -I$(srcdir)/kexec/libfdt
>> +
>> +riscv_KEXEC_SRCS += $(libfdt_SRCS)
>> +
>> +endif
>> +
>> diff --git a/kexec/arch/riscv/crashdump-riscv.c b/kexec/arch/riscv/crashdump-riscv.c
>> new file mode 100644
>> index 0000000..3ed4fe3
>> --- /dev/null
>> +++ b/kexec/arch/riscv/crashdump-riscv.c
>> @@ -0,0 +1,140 @@
>> +#include <errno.h>
>> +#include <linux/elf.h>
>> +#include <unistd.h>
>> +
>> +#include "kexec.h"
>> +#include "crashdump.h"
>> +#include "kexec-elf.h"
>> +#include "mem_regions.h"
>> +
>> +static struct crash_elf_info elf_info = {
>> +#if __riscv_xlen == 64
>> +       .class          = ELFCLASS64,
>> +#else
>> +       .class          = ELFCLASS32,
>> +#endif
>> +       .data           = ELFDATA2LSB,
>> +       .machine        = EM_RISCV,
>> +};
>> +
>> +static struct memory_ranges crash_mem_ranges = {0};
>> +struct memory_range elfcorehdr_mem = {0};
>> +
>> +static unsigned long long get_page_offset(struct kexec_info *info)
>> +{
>> +       unsigned long long vaddr_off = 0;
>> +       unsigned long long page_size = sysconf(_SC_PAGESIZE);
>> +       unsigned long long init_start = get_kernel_sym("_sinittext");
>> +
>> +       /*
>> +        * Begining of init section is aligned to page size
>> +        */
>> +       vaddr_off = init_start - page_size;
>> +
>> +       return vaddr_off;
>> +}
>> +
>> +int load_elfcorehdr(struct kexec_info *info)
>> +{
>> +       struct memory_range crashkern_range = {0};
>> +       struct memory_range *ranges = NULL;
>> +       unsigned long start = 0;
>> +       unsigned long end = 0;
>> +       unsigned long buf_size = 0;
>> +       unsigned long elfcorehdr_addr = 0;
>> +       void* buf = NULL;
>> +       int i = 0;
>> +       int ret = 0;
>> +
>> +       ret = parse_iomem_single("Kernel code\n", &start, NULL);
>> +       if (ret) {
>> +               fprintf(stderr, "Cannot determine kernel physical base addr\n");
>> +               return -EINVAL;
>> +       }
>> +       elf_info.kern_paddr_start = start;
>> +
>> +       ret = parse_iomem_single("Kernel bss\n", NULL, &end);
>> +       if (ret) {
>> +               fprintf(stderr, "Cannot determine kernel physical bss addr\n");
>> +               return -EINVAL;
>> +       }
>> +       elf_info.kern_paddr_start = start;
>> +       elf_info.kern_size = end - start;
>> +
>> +       elf_info.kern_vaddr_start = get_kernel_sym("_text");
>> +       if (!elf_info.kern_vaddr_start) {
>> +               elf_info.kern_vaddr_start = UINT64_MAX;
>> +       }
>> +
>> +       elf_info.page_offset = get_page_offset(info);
>> +       dbgprintf("page_offset:   %016llx\n", elf_info.page_offset);
>> +
>> +       ret = parse_iomem_single("Crash kernel\n", &start, &end);
>> +       if (ret) {
>> +               fprintf(stderr, "Cannot determine kernel physical bss addr\n");
>> +               return -EINVAL;
>> +       }
>> +       crashkern_range.start = start;
>> +       crashkern_range.end = end;
>> +       crashkern_range.type = RANGE_RESERVED;
>> +
>> +       ranges = info->memory_range;
>> +       for (i = 0; i < info->memory_ranges; i++) {
>> +               ret = mem_regions_alloc_and_add(&crash_mem_ranges,
>> +                                               ranges[i].start,
>> +                                               ranges[i].end - ranges[i].start + 1,
>> +                                               ranges[i].type);
>> +               if (ret ) {
>> +                       fprintf(stderr, "Could not create crash_mem_ranges\n");
>> +                       return ret;
>> +               }
>> +       }
>> +
>> +       ret = mem_regions_alloc_and_exclude(&crash_mem_ranges,
>> +                                           &crashkern_range);
>> +       if (ret) {
>> +               fprintf(stderr, "Could not exclude crashkern_range\n");
>> +               return ret;
>> +       }
>> +
>> +#if __riscv_xlen == 64
>> +       crash_create_elf64_headers(info, &elf_info, crash_mem_ranges.ranges,
>> +                                  crash_mem_ranges.size, &buf, &buf_size,
>> +                                  ELF_CORE_HEADER_ALIGN);
>> +
>> +#else
>> +       crash_create_elf32_headers(info, &elf_info, crash_mem_ranges.ranges,
>> +                                  crash_mem_ranges.size, &buf, &buf_size,
>> +                                  ELF_CORE_HEADER_ALIGN);
>> +#endif
>> +
>> +
>> +       elfcorehdr_addr = add_buffer_phys_virt(info, buf, buf_size,
>> +                                              buf_size, 0,
>> +                                              crashkern_range.start,
>> +                                              crashkern_range.end,
>> +                                              -1, 0);
>> +
>> +       elfcorehdr_mem.start = elfcorehdr_addr;
>> +       elfcorehdr_mem.end = elfcorehdr_addr + buf_size - 1;
>> +
>> +       dbgprintf("%s: elfcorehdr 0x%llx-0x%llx\n", __func__,
>> +                 elfcorehdr_mem.start, elfcorehdr_mem.end);
>> +
>> +       return 0;
>> +}
>> +
>> +int is_crashkernel_mem_reserved(void)
>> +{
>> +       uint64_t start = 0;
>> +       uint64_t end = 0;
>> +
>> +       return parse_iomem_single("Crash kernel\n", &start, &end) == 0 ?
>> +              (start != end) : 0;
>> +}
>> +
>> +int get_crash_kernel_load_range(uint64_t *start, uint64_t *end)
>> +{
>> +       return parse_iomem_single("Crash kernel\n", start, end);
>> +}
>> +
>> diff --git a/kexec/arch/riscv/include/arch/options.h b/kexec/arch/riscv/include/arch/options.h
>> new file mode 100644
>> index 0000000..7c24184
>> --- /dev/null
>> +++ b/kexec/arch/riscv/include/arch/options.h
>> @@ -0,0 +1,43 @@
>> +#ifndef KEXEC_ARCH_RISCV_OPTIONS_H
>> +#define KEXEC_ARCH_RISCV_OPTIONS_H
>> +
>> +#define OPT_APPEND             ((OPT_MAX)+0)
>> +#define OPT_DTB                        ((OPT_MAX)+1)
>> +#define OPT_INITRD             ((OPT_MAX)+2)
>> +#define OPT_CMDLINE            ((OPT_MAX)+3)
>> +#define OPT_REUSE_CMDLINE      ((OPT_MAX)+4)
>> +#define OPT_ARCH_MAX           ((OPT_MAX)+5)
>> +
>> +/* Options relevant to the architecture (excluding loader-specific ones),
>> + * in this case none:
>> + */
>> +#define KEXEC_ARCH_OPTIONS \
>> +       KEXEC_OPTIONS \
>> +       { "append",             1, 0, OPT_APPEND}, \
>> +       { "dtb",                1, 0, OPT_DTB }, \
>> +       { "initrd",             1, 0, OPT_INITRD }, \
>> +       { "command-line",       1, 0, OPT_CMDLINE}, \
>> +       { "reuse-cmdline",      0, NULL, OPT_REUSE_CMDLINE }, \
>> +
>> +
>> +#define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR ""
>> +
>> +/* The following two #defines list ALL of the options added by all of the
>> + * architecture's loaders.
>> + * o   main() uses this complete list to scan for its options, ignoring
>> + *     arch-specific/loader-specific ones.
>> + * o   Then, arch_process_options() uses this complete list to scan for its
>> + *     options, ignoring general/loader-specific ones.
>> + * o   Then, the file_type[n].load re-scans for options, using
>> + *     KEXEC_ARCH_OPTIONS plus its loader-specific options subset.
>> + *     Any unrecognised options cause an error here.
>> + *
>> + * This is done so that main()'s/arch_process_options()'s getopt_long() calls
>> + * don't choose a kernel filename from random arguments to options they don't
>> + * recognise -- as they now recognise (if not act upon) all possible options.
>> + */
>> +#define KEXEC_ALL_OPTIONS KEXEC_ARCH_OPTIONS
>> +
>> +#define KEXEC_ALL_OPT_STR KEXEC_ARCH_OPT_STR
>> +
>> +#endif /* KEXEC_ARCH_RISCV_OPTIONS_H */
>> diff --git a/kexec/arch/riscv/kexec-elf-riscv.c b/kexec/arch/riscv/kexec-elf-riscv.c
>> new file mode 100644
>> index 0000000..f3c011c
>> --- /dev/null
>> +++ b/kexec/arch/riscv/kexec-elf-riscv.c
>> @@ -0,0 +1,255 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * Copyright (C) 2019 FORTH-ICS/CARV
>> + *              Nick Kossifidis <mick at ics.forth.gr>
>> + */
>> +
>> +#include "kexec.h"
>> +#include "dt-ops.h"            /* For dtb_set/clear_initrd() */
>> +#include <elf.h>               /* For ELF header handling */
>> +#include <errno.h>             /* For EFBIG/EINVAL */
>> +#include <unistd.h>            /* For getpagesize() */
>> +#include "kexec-syscall.h"     /* For KEXEC_ON_CRASH */
>> +#include "kexec-riscv.h"
>> +
>> +
>> +/*********\
>> +* HELPERS *
>> +\*********/
>> +
>> +/*
>> + * Go through the available physical memory regions and
>> + * find one that can hold an image of the specified size.
>> + * Note: This is called after get_memory_ranges so
>> + * info->memory_range[] should be populated. Also note that
>> + * memory ranges are sorted, so we'll return the first region
>> + * that's big enough for holding the image.
>> + */
>> +static int elf_riscv_find_pbase(struct kexec_info *info, off_t *addr,
>> +                               off_t size)
>> +{
>> +       int i = 0;
>> +       off_t start = 0;
>> +       off_t end = 0;
>> +       int ret = 0;
>> +
>> +       /*
>> +        * If this image is for a crash kernel, use the region
>> +        * the primary kernel has already reserved for us.
>> +        */
>> +       if (info->kexec_flags & KEXEC_ON_CRASH) {
>> +               ret = get_crash_kernel_load_range((uint64_t *) &start,
>> +                                                 (uint64_t *) &end);
>> +               if (!ret) {
>> +                       /*
>> +                        * Kernel should be aligned to the nearest
>> +                        * hugepage (2MB for RV64, 4MB for RV32).
>> +                        */
>> +#if __riscv_xlen == 64
>> +                       start = _ALIGN_UP(start, 0x200000);
>> +#else
>> +                       start = _ALIGN_UP(start, 0x400000);
>> +#endif
>> +                       if (end > start && ((end - start) >= size)) {
>> +                               *addr = start;
>> +                               return 0;
>> +                       }
>> +
>> +                       return -EFBIG;
>> +               } else
>> +                       return ENOCRASHKERNEL;
>> +       }
>> +
>> +       for (i = 0; i < info->memory_ranges; i++) {
>> +               if (info->memory_range[i].type != RANGE_RAM)
>> +                       continue;
>> +
>> +               start = info->memory_range[i].start;
>> +               end = info->memory_range[i].end;
>> +
>> +#if __riscv_xlen == 64
>> +               start = _ALIGN_UP(start, 0x200000);
>> +#else
>> +               start = _ALIGN_UP(start, 0x400000);
>> +#endif
>> +
>> +               if (end > start && ((end - start) >= size)) {
>> +                       *addr = start;
>> +                       return 0;
>> +               }
>> +       }
>> +
>> +       return -EFBIG;
>> +}
>> +
>> +/**************\
>> +* ENTRY POINTS *
>> +\**************/
>> +
>> +int elf_riscv_probe(const char *buf, off_t len)
>> +{
>> +       struct mem_ehdr ehdr = {0};
>> +       int ret = 0;
>> +
>> +       ret = build_elf_exec_info(buf, len, &ehdr, 0);
>> +       if (ret < 0)
>> +               goto cleanup;
>> +
>> +       if (ehdr.e_machine != EM_RISCV) {
>> +               fprintf(stderr, "Not for this architecture.\n");
>> +               ret = -EINVAL;
>> +               goto cleanup;
>> +       }
>> +
>> +       ret = 0;
>> +
>> + cleanup:
>> +       free_elf_info(&ehdr);
>> +       return ret;
>> +}
>> +
>> +void elf_riscv_usage(void)
>> +{
>> +}
>> +
>> +int elf_riscv_load(int argc, char **argv, const char *buf, off_t len,
>> +                  struct kexec_info *info)
>> +{
>> +       struct mem_ehdr ehdr = {0};
>> +       struct mem_phdr *phdr = NULL;
>> +       off_t new_base_addr = 0;
>> +       off_t kernel_size = 0;
>> +       off_t page_size = getpagesize();
>> +       off_t max_addr = 0;
>> +       off_t old_base_addr = 0;
>> +       off_t old_start_addr = 0;
>> +       int i = 0;
>> +       int ret = 0;
>> +
>> +       if (info->file_mode) {
>> +               fprintf(stderr, "kexec_file not supported on this "
>> +                               "architecture\n");
>> +               return -EINVAL;
>> +       }
>> +
>> +       /* Parse the ELF file */
>> +       ret = build_elf_exec_info(buf, len, &ehdr, 0);
>> +       if (ret < 0) {
>> +               fprintf(stderr, "ELF exec parse failed\n");
>> +               return -EINVAL;
>> +       }
>> +
>> +       max_addr = elf_max_addr(&ehdr);
>> +       old_base_addr = max_addr;
>> +       old_start_addr = max_addr;
>> +
>> +       /*
>> +        * Get the memory footprint, base physical
>> +        * and start address of the ELF image
>> +        */
>> +       for (i = 0; i < ehdr.e_phnum; i++) {
>> +               phdr = &ehdr.e_phdr[i];
>> +               if (phdr->p_type != PT_LOAD)
>> +                       continue;
>> +
>> +               /*
>> +                * Note: According to ELF spec the loadable regions
>> +                * are sorted on p_vaddr, not p_paddr.
>> +                */
>> +               if (old_base_addr > phdr->p_paddr)
>> +                       old_base_addr = phdr->p_paddr;
>> +
>> +               if (phdr->p_vaddr == ehdr.e_entry ||
>> +                   phdr->p_paddr == ehdr.e_entry)
>> +                       old_start_addr = phdr->p_paddr;
>> +
>> +               kernel_size += _ALIGN_UP(phdr->p_memsz, page_size);
>> +       }
>> +
>> +       if (old_base_addr == max_addr || kernel_size == 0) {
>> +               fprintf(stderr, "No loadable segments present on the "
>> +                               "provided ELF image\n");
>> +               return -EINVAL;
>> +       }
>> +
>> +       if (old_start_addr == max_addr) {
>> +               fprintf(stderr, "Could not find the entry point address of "
>> +                               "provided ELF image\n");
>> +               return -EINVAL;
>> +       }
>> +
>> +       dbgprintf("Got ELF with total memsz %luKB\n"
>> +                 "Base paddr: 0x%lX, start_addr: 0x%lX\n",
>> +                 kernel_size / 1024, old_base_addr, old_start_addr);
>> +
>> +       /* Get a continuous physical region that can hold the kernel */
>> +       ret = elf_riscv_find_pbase(info, &new_base_addr, kernel_size);
>> +       if (ret < 0) {
>> +               fprintf(stderr, "Could not find a memory region for the "
>> +                               "provided ELF image\n");
>> +               return ret;
>> +       }
>> +
>> +       dbgprintf("New base paddr for the ELF: 0x%lX\n", new_base_addr);
>> +
>> +       /* Re-set the base physical address of the ELF */
>> +       for (i = 0; i < ehdr.e_phnum; i++) {
>> +               phdr = &ehdr.e_phdr[i];
>> +               if (phdr->p_type != PT_LOAD)
>> +                       continue;
>> +
>> +               phdr->p_paddr -= old_base_addr;
>> +               phdr->p_paddr += new_base_addr;
>> +       }
>> +
>> +       /* Re-set the entry point address */
>> +       ehdr.e_entry = (old_start_addr - old_base_addr) + new_base_addr;
>> +       info->entry = (void *) ehdr.e_entry;
>> +       dbgprintf("New entry point for the ELF: 0x%llX\n", ehdr.e_entry);
>> +
>> +
>> +       /* Load the ELF executable */
>> +       ret = elf_exec_load(&ehdr, info);
>> +       if (ret < 0) {
>> +               fprintf(stderr, "ELF exec load failed\n");
>> +               return ret;
>> +       }
>> +
>> +       ret = load_extra_segments(info, new_base_addr,
>> +                                 kernel_size, max_addr);
>> +       return ret;
>> +}
>> +
>> +
>> +/*******\
>> +* STUBS *
>> +\*******/
>> +
>> +int machine_verify_elf_rel(struct mem_ehdr *ehdr)
>> +{
>> +       if (ehdr->ei_data != ELFDATA2LSB)
>> +               return 0;
>> +#if __riscv_xlen == 64
>> +       if (ehdr->ei_class != ELFCLASS64)
>> +#else
>> +       if (ehdr->ei_class != ELFCLASS32)
>> +#endif
>> +               return 0;
>> +       if (ehdr->e_machine != EM_RISCV)
>> +               return 0;
>> +       return 1;
>> +}
>> +
>> +void machine_apply_elf_rel(struct mem_ehdr *UNUSED(ehdr),
>> +                          struct mem_sym *UNUSED(sym),
>> +                          unsigned long r_type,
>> +                          void *UNUSED(location),
>> +                          unsigned long UNUSED(address),
>> +                          unsigned long UNUSED(value))
>> +{
>> +       switch (r_type) {
>> +       default:
>> +               die("Unknown rela relocation: %lu\n", r_type);
>> +               break;
>> +       }
>> +}
>> diff --git a/kexec/arch/riscv/kexec-riscv.c b/kexec/arch/riscv/kexec-riscv.c
>> new file mode 100644
>> index 0000000..fe5dd2d
>> --- /dev/null
>> +++ b/kexec/arch/riscv/kexec-riscv.c
>> @@ -0,0 +1,365 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * Copyright (C) 2019 FORTH-ICS/CARV
>> + *              Nick Kossifidis <mick at ics.forth.gr>
>> + */
>> +
>> +#include "kexec-syscall.h"     /* For KEXEC_ARCH_RISCV */
>> +#include "kexec.h"             /* For OPT_MAX and concat_cmdline() */
>> +#include "mem_regions.h"       /* For mem_regions_sort() */
>> +#include "dt-ops.h"            /* For dtb_set_bootargs() */
>> +#include <arch/options.h>      /* For KEXEC_ARCH_OPTIONS */
>> +#include <getopt.h>            /* For struct option */
>> +#include <sys/stat.h>          /* For stat() and struct stat */
>> +#include <stdlib.h>            /* For free() */
>> +#include <errno.h>             /* For EINVAL */
>> +#include <libfdt.h>            /* For DeviceTree handling */
>> +#include "kexec-riscv.h"
>> +
>> +const struct arch_map_entry arches[] = {
>> +       { "riscv32", KEXEC_ARCH_RISCV },
>> +       { "riscv64", KEXEC_ARCH_RISCV },
>> +       { NULL, 0 },
>> +};
>> +
>> +
>> +struct file_type file_type[] = {
>> +       {"elf-riscv", elf_riscv_probe, elf_riscv_load, elf_riscv_usage},
>> +};
>> +int file_types = sizeof(file_type) / sizeof(file_type[0]);
>> +
>> +static const char riscv_opts_usage[] =
>> +"      --append=STRING         Append STRING to the kernel command line.\n"
>> +"      --dtb=FILE              Use FILE as the device tree blob.\n"
>> +"      --initrd=FILE           Use FILE as the kernel initial ramdisk.\n"
>> +"      --cmdline=STRING        Use STRING as the kernel's command line.\n"
>> +"      --reuse-cmdline         Use kernel command line from running system.\n";
>> +
>> +static struct riscv_opts arch_options = {0};
>> +static struct fdt_image provided_fdt = {0};
>> +static struct memory_ranges sysmem_ranges = {0};
>> +
>> +/****************\
>> +* COMMON HELPERS *
>> +\****************/
>> +
>> +int load_extra_segments(struct kexec_info *info, uint64_t kernel_base,
>> +                       uint64_t kernel_size, uint64_t max_addr)
>> +{
>> +       struct fdt_image *fdt = arch_options.fdt;
>> +       char *initrd_buf = NULL;
>> +       off_t initrd_size = 0;
>> +       uint64_t initrd_base = 0;
>> +       uint64_t start = 0;
>> +       uint64_t end = 0;
>> +       uint64_t min_usable = kernel_base + kernel_size;
>> +       uint64_t max_usable = max_addr;
>> +       int ret = 0;
>> +
>> +       /* Prepare the device tree */
>> +       if (info->kexec_flags & KEXEC_ON_CRASH) {
>> +               ret = load_elfcorehdr(info);
>> +               if (ret) {
>> +                       fprintf(stderr, "Couldn't create elfcorehdr\n");
>> +                       return ret;
>> +               }
>> +
>> +               ret = dtb_add_range_property(&fdt->buf, &fdt->size,
>> +                                            elfcorehdr_mem.start, elfcorehdr_mem.end,
>> +                                            "chosen", "linux,elfcorehdr");
>> +               if (ret) {
>> +                       fprintf(stderr, "Couldn't add elfcorehdr to fdt\n");
>> +                       return ret;
>> +               }
>> +
>> +               ret = get_crash_kernel_load_range(&start, &end);
>> +               if (ret) {
>> +                       fprintf(stderr, "Couldn't get crashkenel region\n");
>> +                       return ret;
>> +               }
>> +
>> +               ret = dtb_add_range_property(&fdt->buf, &fdt->size, start, end,
>> +                                            "memory", "linux,usable-memory");
>> +               if (ret) {
>> +                       fprintf(stderr, "Couldn't add usable-memory to fdt\n");
>> +                       return ret;
>> +               }
>> +
>> +               max_usable = end;
>> +       } else {
>> +               /*
>> +                * Make sure we remove elfcorehdr and usable-memory
>> +                * when switching from crash kernel to a normal one.
>> +                */
>> +               dtb_delete_property(fdt->buf, "chosen", "linux,elfcorehdr");
>> +               dtb_delete_property(fdt->buf, "memory", "linux,usable-memory");
>> +       }
>> +
>> +       /* Do we need to include an initrd image ? */
>> +       if (!arch_options.initrd_path && !arch_options.initrd_end)
>> +               dtb_clear_initrd(&fdt->buf, &fdt->size);
>> +       else if (arch_options.initrd_path) {
>> +               if (arch_options.initrd_end)
>> +                       fprintf(stderr, "Warning: An initrd image was provided"
>> +                                       ", will ignore reuseinitrd\n");
>> +
>> +               initrd_buf = slurp_file(arch_options.initrd_path,
>> +                                       &initrd_size);
>> +               if (!initrd_buf) {
>> +                       fprintf(stderr, "Couldn't read provided initrd\n");
>> +                       return -EINVAL;
>> +               }
>> +
>> +               initrd_base = add_buffer_phys_virt(info, initrd_buf,
>> +                                                  initrd_size,
>> +                                                  initrd_size, 0,
>> +                                                  min_usable,
>> +                                                  max_usable, -1, 0);
>> +
>> +               dtb_set_initrd(&fdt->buf, &fdt->size, initrd_base,
>> +                              initrd_base + initrd_size);
>> +
>> +               dbgprintf("Base addr for initrd image: 0x%lX\n", initrd_base);
>> +               min_usable = initrd_base;
>> +               max_usable = initrd_base;
>> +       }
>> +
>> +       /* Add device tree */
>> +       add_buffer_phys_virt(info, fdt->buf, fdt->size, fdt->size, 0,
>> +                            min_usable, max_usable, -1, 0);
>> +
>> +       return 0;
>> +}
>> +
>> +
>> +/**************\
>> +* ENTRY POINTS *
>> +\**************/
>> +
>> +void arch_usage(void)
>> +{
>> +       printf(riscv_opts_usage);
>> +}
>> +
>> +int arch_process_options(int argc, char **argv)
>> +{
>> +       static const struct option options[] = {
>> +               KEXEC_ARCH_OPTIONS
>> +               { 0 },
>> +       };
>> +       static const char short_options[] = KEXEC_ARCH_OPT_STR;
>> +       struct stat st = {0};
>> +       char *append = NULL;
>> +       char *cmdline = NULL;
>> +       void *tmp = NULL;
>> +       off_t tmp_size = 0;
>> +       int opt = 0;
>> +       int ret = 0;
>> +
>> +       while ((opt = getopt_long(argc, argv, short_options,
>> +                                 options, 0)) != -1) {
>> +               switch (opt) {
>> +               case OPT_APPEND:
>> +                       append = optarg;
>> +                       break;
>> +               case OPT_CMDLINE:
>> +                       if (cmdline)
>> +                               fprintf(stderr,
>> +                                       "Warning: Kernel's cmdline "
>> +                                       "set twice !\n");
>> +                       cmdline = optarg;
>> +                       break;
>> +               case OPT_REUSE_CMDLINE:
>> +                       if (cmdline)
>> +                               fprintf(stderr,
>> +                                       "Warning: Kernel's cmdline "
>> +                                       "set twice !\n");
>> +                       cmdline = get_command_line();
>> +                       break;
>> +               case OPT_DTB:
>> +                       ret = stat(optarg, &st);
>> +                       if (ret) {
>> +                               fprintf(stderr,
>> +                                       "Could not find the provided dtb !\n");
>> +                               return -EINVAL;
>> +                       }
>> +                       arch_options.fdt_path = optarg;
>> +                       break;
>> +               case OPT_INITRD:
>> +                       ret = stat(optarg, &st);
>> +                       if (ret) {
>> +                               fprintf(stderr,
>> +                                       "Could not find the provided "
>> +                                       "initrd image !\n");
>> +                               return -EINVAL;
>> +                       }
>> +                       arch_options.initrd_path = optarg;
>> +                       break;
>> +               default:
>> +                       break;
>> +               }
>> +       }
>> +
>> +       /* Handle Kernel's command line */
>> +       if (append && !cmdline)
>> +               fprintf(stderr, "Warning: No cmdline provided, "
>> +                               "using append string as cmdline\n");
>> +       if (!append && !cmdline)
>> +               fprintf(stderr, "Warning: No cmdline or append string "
>> +                               "provided\n");
>> +
>> +       if (append || cmdline)
>> +               /*
>> +                * Note that this also handles the case where "cmdline"
>> +                * or "append" is NULL.
>> +                */
>> +               arch_options.cmdline = concat_cmdline(cmdline, append);
>> +
>> +       /* Handle FDT image */
>> +       if (!arch_options.fdt_path) {
>> +               ret = stat("/sys/firmware/fdt", &st);
>> +               if (ret) {
>> +                       fprintf(stderr, "No dtb provided and "
>> +                                       "/sys/firmware/fdt is not present\n");
>> +                       return -EINVAL;
>> +               }
>> +               fprintf(stderr, "Warning: No dtb provided, "
>> +                               "using /sys/firmware/fdt\n");
>> +               arch_options.fdt_path = "/sys/firmware/fdt";
>> +       }
>> +
>> +       tmp = slurp_file(arch_options.fdt_path, &tmp_size);
>> +       if (!tmp) {
>> +               fprintf(stderr, "Couldn't read provided fdt\n");
>> +               return -EINVAL;
>> +       }
>> +
>> +       ret = fdt_check_header(tmp);
>> +       if (ret) {
>> +               fprintf(stderr, "Got an ivalid fdt image !\n");
>> +               free(tmp);
>> +               return -EINVAL;
>> +       }
>> +       provided_fdt.buf = tmp;
>> +       provided_fdt.size = tmp_size;
>> +
>> +       if (arch_options.cmdline) {
>> +               ret = dtb_set_bootargs(&provided_fdt.buf, &provided_fdt.size,
>> +                                      arch_options.cmdline);
>> +               if (ret < 0) {
>> +                       fprintf(stderr, "Could not set bootargs on "
>> +                                       "the fdt image\n");
>> +                       return ret;
>> +               }
>> +       }
>> +
>> +       arch_options.fdt = &provided_fdt;
>> +
>> +       return 0;
>> +}
>> +
>> +/*
>> + * This one is called after arch_process_options so we already
>> + * have an fdt image in place.
>> + */
>> +void arch_reuse_initrd(void)
>> +{
>> +       const uint32_t *prop32 = NULL;
>> +       uint32_t addr_cells = 0;
>> +       const void *prop = 0;
>> +       int prop_size = 0;
>> +       uint64_t initrd_start = 0;
>> +       uint64_t initrd_end = 0;
>> +       int chosen_offset = 0;
>> +       struct fdt_image *fdt = &provided_fdt;
>> +
>> +       chosen_offset = fdt_subnode_offset(fdt->buf, 0, "chosen");
>> +       if (chosen_offset < 0) {
>> +               fprintf(stderr, "No /chosen node found on fdt image "
>> +                               "unable to reuse initrd\n");
>> +               return;
>> +       }
>> +
>> +       prop32 = fdt_getprop(fdt->buf, 0, "#address-cells", NULL);
>> +       if (!prop32) {
>> +               fprintf(stderr, "No #address-cells property on root node\n");
>> +               return;
>> +       }
>> +       addr_cells = be32_to_cpu(*prop32);
>> +
>> +       prop = fdt_getprop(fdt->buf, chosen_offset,
>> +                          "linux,initrd-start", &prop_size);
>> +       if (!prop) {
>> +               fprintf(stderr, "Could not get linux,initrd-start\n");
>> +               return;
>> +       }
>> +       dtb_extract_int_property(&initrd_start, prop, addr_cells);
>> +
>> +       prop = fdt_getprop(fdt->buf, chosen_offset,
>> +                          "linux,initrd-end", &prop_size);
>> +       if (!prop) {
>> +               fprintf(stderr, "Could not get linux,initrd-end\n");
>> +               return;
>> +       }
>> +       dtb_extract_int_property(&initrd_end, prop, addr_cells);
>> +
>> +       arch_options.initrd_start = initrd_start;
>> +       arch_options.initrd_end = initrd_end;
>> +       dbgprintf("initrd_start: 0x%lX, initrd_end: 0x%lX\n",
>> +                 initrd_start, initrd_end);
>> +
>> +}
>> +
>> +int get_memory_ranges(struct memory_range **range, int *num_ranges,
>> +                     unsigned long kexec_flags)
>> +{
>> +       const struct fdt_image *fdt = &provided_fdt;
>> +       struct memory_ranges *extra_ranges = NULL;
>> +       int i = 0;
>> +       int ret = 0;
>> +
>> +       if (arch_options.initrd_start && arch_options.initrd_end) {
>> +               int initrd_size = arch_options.initrd_end - arch_options.initrd_start;
>> +               dbgprintf("Marking current intird image as reserved\n");
>> +               ret = mem_regions_alloc_and_add(extra_ranges,
>> +                                               arch_options.initrd_start,
>> +                                               initrd_size,
>> +                                               RANGE_RESERVED);
>> +               if (ret)
>> +                       return ret;
>> +       }
>> +
>> +       ret = dtb_get_memory_ranges(fdt->buf, &sysmem_ranges, extra_ranges);
>> +       if (ret) {
>> +               fprintf(stderr, "Could not get memory ranges from device tree (%i) !\n", ret);
>> +               return ret;
>> +       }
>> +
>> +       *range = sysmem_ranges.ranges;
>> +       *num_ranges = sysmem_ranges.size;
>> +
>> +       dbgprintf("Memory regions:\n");
>> +       for (i = 0; i < sysmem_ranges.size; i++) {
>> +               dbgprintf("\t0x%llx - 0x%llx : %s (%i)\n",
>> +                         sysmem_ranges.ranges[i].start,
>> +                         sysmem_ranges.ranges[i].end,
>> +                         sysmem_ranges.ranges[i].type == RANGE_RESERVED ?
>> +                         "RANGE_RESERVED" : "RANGE_RAM",
>> +                         sysmem_ranges.ranges[i].type);
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>> +/*******\
>> +* STUBS *
>> +\*******/
>> +
>> +int arch_compat_trampoline(struct kexec_info *UNUSED(info))
>> +{
>> +       return 0;
>> +}
>> +
>> +void arch_update_purgatory(struct kexec_info *UNUSED(info))
>> +{
>> +}
>> diff --git a/kexec/arch/riscv/kexec-riscv.h b/kexec/arch/riscv/kexec-riscv.h
>> new file mode 100644
>> index 0000000..c4323a6
>> --- /dev/null
>> +++ b/kexec/arch/riscv/kexec-riscv.h
>> @@ -0,0 +1,32 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * Copyright (C) 2019 FORTH-ICS/CARV
>> + *              Nick Kossifidis <mick at ics.forth.gr>
>> + */
>> +
>> +struct fdt_image {
>> +       char    *buf;
>> +       off_t   size;
>> +};
>> +
>> +struct riscv_opts {
>> +       char *cmdline;
>> +       char *fdt_path;
>> +       char *initrd_path;
>> +       uint64_t initrd_start;
>> +       uint64_t initrd_end;
>> +       struct fdt_image *fdt;
>> +};
>> +
>> +/* crashdump-riscv.c */
>> +extern struct memory_range elfcorehdr_mem;
>> +int load_elfcorehdr(struct kexec_info *info);
>> +
>> +/* kexec-riscv.c */
>> +int load_extra_segments(struct kexec_info *info, uint64_t kernel_base,
>> +                       uint64_t kernel_size, uint64_t max_addr);
>> +
>> +int elf_riscv_probe(const char *buf, off_t len);
>> +void elf_riscv_usage(void);
>> +int elf_riscv_load(int argc, char **argv, const char *buf, off_t len,
>> +                  struct kexec_info *info);
>> diff --git a/kexec/dt-ops.c b/kexec/dt-ops.c
>> index 0a96b75..3e285ab 100644
>> --- a/kexec/dt-ops.c
>> +++ b/kexec/dt-ops.c
>> @@ -4,9 +4,11 @@
>>   #include <libfdt.h>
>>   #include <stdio.h>
>>   #include <stdlib.h>
>> +#include <stdbool.h>
>>
>>   #include "kexec.h"
>>   #include "dt-ops.h"
>> +#include "mem_regions.h"
>>
>>   static const char n_chosen[] = "chosen";
>>
>> @@ -95,7 +97,7 @@ int dtb_set_property(char **dtb, off_t *dtb_size, const char *node,
>>
>>          strcpy(new_node, "/");
>>          strcat(new_node, node);
>> -
>> +
>>          nodeoffset = fdt_path_offset(new_dtb, new_node);
>>
>>          if (nodeoffset == -FDT_ERR_NOTFOUND) {
>> @@ -174,3 +176,441 @@ int dtb_delete_property(char *dtb, const char *node, const char *prop)
>>          free(new_node);
>>          return result;
>>   }
>> +
>> +static int dtb_get_num_cells(char *dtb, int nodeoffset, uint32_t *addr_cells,
>> +                            uint32_t *size_cells, bool recursive)
>> +{
>> +       const uint32_t *prop32 = NULL;
>> +       int curr_offset = nodeoffset;
>> +       int prop_len = 0;
>> +       *addr_cells = 0;
>> +       *size_cells = 0;
>> +
>> +       do {
>> +               prop32 = fdt_getprop(dtb, curr_offset, "#address-cells", &prop_len);
>> +               curr_offset = fdt_parent_offset(dtb, curr_offset);
>> +       } while (!prop32 && prop_len == -FDT_ERR_NOTFOUND && recursive);
>> +
>> +       if (!prop32) {
>> +               dbgprintf("Could not get #address-cells property for %s (%s)\n",
>> +                         fdt_get_name(dtb, nodeoffset, NULL), fdt_strerror(nodeoffset));
>> +               return -EINVAL;
>> +       }
>> +       *addr_cells = fdt32_to_cpu(*prop32);
>> +
>> +       curr_offset = nodeoffset;
>> +       do {
>> +               prop32 = fdt_getprop(dtb, curr_offset, "#size-cells", &prop_len);
>> +               curr_offset = fdt_parent_offset(dtb, curr_offset);
>> +       } while (!prop32 && prop_len == -FDT_ERR_NOTFOUND && recursive);
>> +
>> +       if (!prop32) {
>> +               dbgprintf("Could not get #size-cells property for %s (%s)\n",
>> +                         fdt_get_name(dtb, nodeoffset, NULL), fdt_strerror(nodeoffset));
>> +               return -EINVAL;
>> +       }
>> +       *size_cells = fdt32_to_cpu(*prop32);
>> +
>> +       dbgprintf("%s: #address-cells:%d #size-cells:%d\n",
>> +                fdt_get_name(dtb, nodeoffset, NULL), *addr_cells, *size_cells);
>> +
>> +       return 0;
>> +}
>> +
>> +void dtb_extract_int_property(uint64_t *val, const void *buf, uint32_t cells)
>> +{
>> +       const uint32_t *prop32 = NULL;
>> +       const uint64_t *prop64 = NULL;
>> +
>> +       if (cells == 1) {
>> +               prop32 = (const uint32_t *) buf;
>> +               *val = (uint64_t) be32_to_cpu(*prop32);
>> +       } else {
>> +               /* Skip any leading cells */
>> +               prop64 = (const uint64_t *) (uint32_t *)buf + cells - 2;
>> +               *val = (uint64_t) be64_to_cpu(*prop64);
>> +       }
>> +}
>> +
>> +void dtb_fill_int_property(void *buf, uint64_t val, uint32_t cells)
>> +{
>> +       uint32_t prop32 = 0;
>> +       uint64_t prop64 = 0;
>> +
>> +       if (cells == 1) {
>> +               prop32 = cpu_to_fdt32((uint32_t) val);
>> +               memcpy(buf, &prop32, sizeof(uint32_t));
>> +       } else {
>> +               prop64 = cpu_to_fdt64(val);
>> +               /* Skip any leading cells */
>> +               memcpy((uint64_t *)(uint32_t *)buf + cells - 2,
>> +                      &prop64, sizeof(uint64_t));
>> +       }
>> +}
>> +
>> +int dtb_add_range_property(char **dtb, off_t *dtb_size, uint64_t start, uint64_t end,
>> +                          const char *parent, const char *name)
>> +{
>> +       uint32_t addr_cells = 0;
>> +       uint32_t size_cells = 0;
>> +       char *nodepath = NULL;
>> +       void *prop = NULL;
>> +       int nodeoffset = 0;
>> +       int prop_size = 0;
>> +       int ret = 0;
>> +
>> +       nodepath = malloc(strlen("/") + strlen(parent) + 1);
>> +       if (!nodepath) {
>> +               dbgprintf("%s: malloc failed\n", __func__);
>> +               return -ENOMEM;
>> +       }
>> +
>> +       strcpy(nodepath, "/");
>> +       strcat(nodepath, parent);
>> +
>> +       nodeoffset = fdt_path_offset(*dtb, nodepath);
>> +       if (nodeoffset < 0) {
>> +               dbgprintf("%s: fdt_path_offset(%s) failed: %s\n", __func__,
>> +                         nodepath, fdt_strerror(nodeoffset));
>> +               free(nodepath);
>> +               return nodeoffset;
>> +       }
>> +       free(nodepath);
>> +
>> +       ret = dtb_get_num_cells(*dtb, nodeoffset, &addr_cells, &size_cells, true);
>> +       if (ret < 0)
>> +               return ret;
>> +
>> +       /* Can the range fit with the given address/size cells ? */
>> +       if ((addr_cells == 1) && (start >= (1ULL << 32)))
>> +               return -EINVAL;
>> +
>> +       if ((size_cells == 1) && ((end - start + 1) >= (1ULL << 32)))
>> +               return -EINVAL;
>> +
>> +       prop_size = sizeof(uint32_t) * (addr_cells + size_cells);
>> +       prop = malloc(prop_size);
>> +
>> +       dtb_fill_int_property(prop, start, addr_cells);
>> +       dtb_fill_int_property((void *)((uint32_t *)prop + addr_cells),
>> +                             end - start + 1, size_cells);
>> +
>> +       /* Add by node path name */
>> +       return dtb_set_property(dtb, dtb_size, parent, name, prop, prop_size);
>> +}
>> +
>> +/************************\
>> +* MEMORY RANGES HANDLING *
>> +\************************/
>> +
>> +static int dtb_add_memory_range(struct memory_ranges *mem_ranges, uint64_t start,
>> +                               uint64_t end, unsigned type)
>> +{
>> +       struct memory_range this_region = {0};
>> +       struct memory_range *ranges = mem_ranges->ranges;
>> +       int i = 0;
>> +       int ret = 0;
>> +
>> +       if (start == end) {
>> +               dbgprintf("Ignoring empty region\n");
>> +               return -EINVAL;
>> +       }
>> +
>> +       /* Check if we are adding an existing region */
>> +       for (i = 0; i < mem_ranges->size; i++) {
>> +               if (start == ranges[i].start && end == ranges[i].end) {
>> +                       dbgprintf("Duplicate: 0x%lx - 0x%lx\n", start, end);
>> +
>> +                       if (type == ranges[i].type)
>> +                               return 0;
>> +                       else if (type == RANGE_RESERVED) {
>> +                               ranges[i].type = RANGE_RESERVED;
>> +                               return 0;
>> +                       }
>> +
>> +                       dbgprintf("Conflicting types for region: 0x%lx - 0x%lx\n",
>> +                                 start, end);
>> +                       return -EINVAL;
>> +               }
>> +       }
>> +
>> +       /*
>> +        * Reserved regions may be part of an existing /memory
>> +        * region and shouldn't overlap according to spec, so
>> +        * since we add /memory regions first, we can exclude
>> +        * reserved regions here from the existing /memory regions
>> +        * included in ranges[], so that we don't have the same
>> +        * region twice.
>> +        */
>> +       if (type == RANGE_RESERVED) {
>> +               this_region.start = start;
>> +               this_region.end = end - 1;
>> +               this_region.type = type;
>> +               ret = mem_regions_exclude(mem_ranges, &this_region);
>> +               if (ret)
>> +                       return ret;
>> +       }
>> +
>> +       ret = mem_regions_alloc_and_add(mem_ranges, start,
>> +                                       end - start, type);
>> +
>> +       return ret;
>> +}
>> +
>> +static int dtb_add_memory_region(char *dtb, int nodeoffset,
>> +                                struct memory_ranges *mem_ranges, int type)
>> +{
>> +       uint32_t root_addr_cells = 0;
>> +       uint32_t root_size_cells = 0;
>> +       uint64_t addr = 0;
>> +       uint64_t size = 0;
>> +       const char *reg = NULL;
>> +       int prop_size = 0;
>> +       int offset = 0;
>> +       int entry_size = 0;
>> +       int num_entries = 0;
>> +       int ret = 0;
>> +
>> +       /*
>> +        * Get address-cells and size-cells properties (according to
>> +        * binding spec these are the same as in the root node)
>> +        */
>> +       ret = dtb_get_num_cells(dtb, 0, &root_addr_cells, &root_size_cells, false);
>> +       if (ret < 0) {
>> +               dbgprintf("No address/size cells on root node !\n");
>> +               return ret;
>> +       }
>> +
>> +       /*
>> +        * Parse the reg array, acording to device tree spec it includes
>> +        * an arbitary number of <address><size> pairs
>> +        */
>> +       entry_size = (root_addr_cells + root_size_cells) * sizeof(uint32_t);
>> +       reg = fdt_getprop(dtb, nodeoffset, "reg", &prop_size);
>> +       if (!reg) {
>> +               dbgprintf("Warning: Malformed memory region with no reg property (%s) !\n",
>> +                         fdt_get_name(dtb, nodeoffset, NULL));
>> +               return -EINVAL;
>> +       }
>> +
>> +       num_entries = prop_size / entry_size;
>> +       dbgprintf("Got region with %i entries: %s\n", num_entries,
>> +                 fdt_get_name(dtb, nodeoffset, NULL));
>> +
>> +       for (num_entries--; num_entries >= 0; num_entries--) {
>> +               offset = num_entries * entry_size;
>> +
>> +               dtb_extract_int_property(&addr, reg + offset,
>> +                                        root_addr_cells);
>> +               offset += root_addr_cells * sizeof(uint32_t);
>> +
>> +               dtb_extract_int_property(&size, reg + offset,
>> +                                        root_size_cells);
>> +
>> +               ret = dtb_add_memory_range(mem_ranges, addr,
>> +                                          addr + size, type);
>> +               if (ret)
>> +                       return ret;
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>> +static int dtb_parse_memory_reservations_table(char *dtb, struct memory_ranges *mem_ranges)
>> +{
>> +       int total_memrsrv = 0;
>> +       uint64_t addr = 0;
>> +       uint64_t size = 0;
>> +       int ret = 0;
>> +       int i = 0;
>> +
>> +       total_memrsrv = fdt_num_mem_rsv(dtb);
>> +       for (i = 0; i < total_memrsrv; i++) {
>> +               ret = fdt_get_mem_rsv(dtb, i, &addr, &size);
>> +               if (ret)
>> +                       continue;
>> +               ret = dtb_add_memory_range(mem_ranges, addr, addr + size - 1,
>> +                                          RANGE_RESERVED);
>> +               if (ret)
>> +                       return ret;
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>> +static int dtb_get_reserved_memory_node(char *dtb)
>> +{
>> +       uint32_t root_addr_cells = 0;
>> +       uint32_t root_size_cells = 0;
>> +       uint32_t addr_cells = 0;
>> +       uint32_t size_cells = 0;
>> +       int prop_size = 0;
>> +       int nodeoffset = 0;
>> +       int ret = 0;
>> +
>> +       /* Get address / size cells from root node */
>> +       ret = dtb_get_num_cells(dtb, 0, &root_addr_cells, &root_size_cells, false);
>> +       if (ret < 0) {
>> +               dbgprintf("No address/size cells on root node !\n");
>> +               return ret;
>> +       }
>> +
>> +       /* This calls fdt_next_node internaly */
>> +       nodeoffset = fdt_subnode_offset(dtb, 0, "reserved-memory");
>> +       if (nodeoffset == -FDT_ERR_NOTFOUND) {
>> +               return nodeoffset;
>> +       } else if (nodeoffset < 0) {
>> +               dbgprintf("Error while looking for reserved-memory: %s\n",
>> +                       fdt_strerror(nodeoffset));
>> +               return nodeoffset;
>> +       }
>> +
>> +       /* Look for the ranges property */
>> +       fdt_getprop(dtb, nodeoffset, "ranges", &prop_size);
>> +       if (prop_size < 0) {
>> +               fprintf(stderr, "Malformed reserved-memory node (no ranges property) !\n");
>> +               return -EINVAL;
>> +       }
>> +
>> +       /* Verify address-cells / size-cells */
>> +       ret = dtb_get_num_cells(dtb, nodeoffset, &addr_cells, &size_cells, false);
>> +       if (ret < 0) {
>> +               dbgprintf("No address/size cells property on reserved-memory node\n");
>> +               return ret;
>> +       }
>> +
>> +       if (addr_cells != root_addr_cells) {
>> +               fprintf(stderr, "Invalid #address-cells property on reserved-memory node\n");
>> +               return -EINVAL;
>> +       }
>> +
>> +       if (size_cells != root_size_cells) {
>> +               fprintf(stderr, "Invalid #size-cells property on reserved-memory node\n");
>> +               return -EINVAL;
>> +
>> +       }
>> +
>> +       return nodeoffset;
>> +}
>> +
>> +static int dtb_parse_reserved_memory_node(char *dtb, struct memory_ranges *mem_ranges)
>> +{
>> +       int nodeoffset = 0;
>> +       int node_depth = 0;
>> +       int parent_depth = 0;
>> +       int ret = 0;
>> +
>> +       nodeoffset = dtb_get_reserved_memory_node(dtb);
>> +       if (nodeoffset == -FDT_ERR_NOTFOUND)
>> +               return 0;
>> +       else if (nodeoffset < 0)
>> +               return nodeoffset;
>> +
>> +       /* Got the parent node, check for sub-nodes */
>> +
>> +       /* fdt_next_node() increases or decreases depth */
>> +       node_depth = parent_depth;
>> +       nodeoffset = fdt_next_node(dtb, nodeoffset, &node_depth);
>> +       if (ret < 0) {
>> +               dbgprintf("Unable to get next node: %s\n",
>> +                         fdt_strerror(ret));
>> +               return -EINVAL;
>> +       }
>> +
>> +       while (node_depth != parent_depth) {
>> +
>> +               ret = dtb_add_memory_region(dtb, nodeoffset,
>> +                                           mem_ranges, RANGE_RESERVED);
>> +               if (ret)
>> +                       return ret;
>> +
>> +               nodeoffset = fdt_next_node(dtb, nodeoffset, &node_depth);
>> +               if (ret < 0) {
>> +                       dbgprintf("Unable to get next node: %s\n",
>> +                                 fdt_strerror(ret));
>> +                       return -EINVAL;
>> +               }
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>> +static int dtb_parse_memory_nodes(char *dtb, struct memory_ranges *mem_ranges)
>> +{
>> +       int nodeoffset = 0;
>> +       int num_regions = 0;
>> +       const char* dev_type = 0;
>> +       int prop_size = 0;
>> +       int ret = 0;
>> +
>> +       for (; ; num_regions++) {
>> +               nodeoffset = fdt_subnode_offset(dtb, nodeoffset,
>> +                                                "memory");
>> +               if (nodeoffset < 0)
>> +                       break;
>> +
>> +               dbgprintf("Got memory node at depth: %i\n", fdt_node_depth(dtb, nodeoffset));
>> +
>> +               /* Look for the device_type  property */
>> +               dev_type = fdt_getprop(dtb, nodeoffset, "device_type", &prop_size);
>> +               if (prop_size < 0) {
>> +                       fprintf(stderr, "Malformed /memory node (no device-type property) !\n");
>> +                       return -EINVAL;
>> +               }
>> +
>> +               if (strncmp(dev_type, "memory", prop_size)) {
>> +                       dbgprintf("Got unknown dev_type property: %s\n", dev_type);
>> +                       continue;
>> +               }
>> +
>> +               ret = dtb_add_memory_region(dtb, nodeoffset, mem_ranges, RANGE_RAM);
>> +               if (ret)
>> +                       return ret;
>> +       }
>> +
>> +       if (!num_regions) {
>> +               dbgprintf("Malformed dtb, no /memory nodes present !\n");
>> +               return -EINVAL;
>> +       }
>> +
>> +       dbgprintf("Got %i /memory nodes\n", num_regions);
>> +
>> +       return 0;
>> +}
>> +
>> +int dtb_get_memory_ranges(char *dtb, struct memory_ranges *mem_ranges, struct memory_ranges *extra_ranges)
>> +{
>> +       int i = 0;
>> +       int ret = 0;
>> +
>> +       /* Fill mem_ranges[] by parsing the device tree */
>> +       ret = dtb_parse_memory_nodes(dtb, mem_ranges);
>> +       if (ret)
>> +               return ret;
>> +
>> +       ret = dtb_parse_memory_reservations_table(dtb, mem_ranges);
>> +       if (ret)
>> +               return ret;
>> +
>> +       ret = dtb_parse_reserved_memory_node(dtb, mem_ranges);
>> +       if (ret)
>> +               return ret;
>> +
>> +       /* Append any extra ranges provided by the caller (e.g. initrd) */
>> +       for (i = 0; extra_ranges != NULL && i < extra_ranges->size; i++) {
>> +               dbgprintf("Adding extra range: 0x%llx - 0x%llx (%s)\n",
>> +                         extra_ranges->ranges[i].start,
>> +                         extra_ranges->ranges[i].end,
>> +                         extra_ranges->ranges[i].type == RANGE_RESERVED ?
>> +                          "RANGE_RESERVED" : "RANGE_RAM");
>> +
>> +               ret = dtb_add_memory_range(mem_ranges, extra_ranges->ranges[i].start,
>> +                                extra_ranges->ranges[i].end, extra_ranges->ranges[i].type);
>> +               if (ret)
>> +                       return ret;
>> +       }
>> +
>> +       mem_regions_sort(mem_ranges);
>> +
>> +       return 0;
>> +}
>> diff --git a/kexec/dt-ops.h b/kexec/dt-ops.h
>> index 03659ce..3014205 100644
>> --- a/kexec/dt-ops.h
>> +++ b/kexec/dt-ops.h
>> @@ -11,4 +11,11 @@ int dtb_set_property(char **dtb, off_t *dtb_size, const char *node,
>>
>>   int dtb_delete_property(char *dtb, const char *node, const char *prop);
>>
>> +void dtb_extract_int_property(uint64_t *val, const void *buf, uint32_t cells);
>> +void dtb_fill_int_property(void *buf, uint64_t val, uint32_t cells);
>> +int dtb_add_range_property(char **dtb, off_t *dtb_size, uint64_t start, uint64_t end,
>> +                           const char *node, const char* parent);
>> +int dtb_get_memory_ranges(char *dtb, struct memory_ranges *mem_ranges,
>> +                         struct memory_ranges *extra_ranges);
>> +
>>   #endif
>> diff --git a/kexec/kexec-syscall.h b/kexec/kexec-syscall.h
>> index bea29d4..2e99e2b 100644
>> --- a/kexec/kexec-syscall.h
>> +++ b/kexec/kexec-syscall.h
>> @@ -134,6 +134,7 @@ static inline long kexec_file_load(int kernel_fd, int initrd_fd,
>>   #define KEXEC_ARCH_MIPS_LE (10 << 16)
>>   #define KEXEC_ARCH_MIPS    ( 8 << 16)
>>   #define KEXEC_ARCH_CRIS    (76 << 16)
>> +#define KEXEC_ARCH_RISCV   (243 << 16)
>>
>>   #define KEXEC_MAX_SEGMENTS 16
>>
>> @@ -177,5 +178,8 @@ static inline long kexec_file_load(int kernel_fd, int initrd_fd,
>>   #if defined(__arm64__)
>>   #define KEXEC_ARCH_NATIVE      KEXEC_ARCH_ARM64
>>   #endif
>> +#if defined(__riscv__) || defined(__riscv)
>> +#define KEXEC_ARCH_NATIVE      KEXEC_ARCH_RISCV
>> +#endif
>>
>>   #endif /* KEXEC_SYSCALL_H */
>> diff --git a/purgatory/Makefile b/purgatory/Makefile
>> index 15adb12..11694e5 100644
>> --- a/purgatory/Makefile
>> +++ b/purgatory/Makefile
>> @@ -25,6 +25,7 @@ include $(srcdir)/purgatory/arch/ia64/Makefile
>>   include $(srcdir)/purgatory/arch/mips/Makefile
>>   include $(srcdir)/purgatory/arch/ppc/Makefile
>>   include $(srcdir)/purgatory/arch/ppc64/Makefile
>> +include $(srcdir)/purgatory/arch/riscv/Makefile
>>   include $(srcdir)/purgatory/arch/s390/Makefile
>>   include $(srcdir)/purgatory/arch/sh/Makefile
>>   include $(srcdir)/purgatory/arch/x86_64/Makefile
>> diff --git a/purgatory/arch/riscv/Makefile b/purgatory/arch/riscv/Makefile
>> new file mode 100644
>> index 0000000..8bded71
>> --- /dev/null
>> +++ b/purgatory/arch/riscv/Makefile
>> @@ -0,0 +1,7 @@
>> +#
>> +# Purgatory riscv
>> +#
>> +
>> +riscv_PURGATORY_SRCS =
>> +
>> +dist += purgatory/arch/sh/Makefile $(riscv_PURGATORY_SRCS)
>> --
>> 2.17.1
>>
>



More information about the kexec mailing list