[PATCH] RISC-V: Add kernel image sections to the resource tree

Nick Kossifidis mick at ics.forth.gr
Tue Dec 22 18:58:26 EST 2020


Στις 2020-12-19 01:52, Atish Patra έγραψε:
> On Thu, Nov 5, 2020 at 10:37 AM Palmer Dabbelt <palmer at dabbelt.com> 
> wrote:
>> 
>> On Mon, 12 Oct 2020 07:24:10 PDT (-0700), mick at ics.forth.gr wrote:
>> > This patch (previously part of my kexec/kdump series) populates
>> > /proc/iomem with the various sections of the kernel image. We need
>> > this for kexec-tools to be able to prepare the crashkernel image
>> > for kdump to work. Since resource tree initialization is not
>> > related to memory initialization I added the code to kernel/setup.c
>> > and removed the original code (derived from the arm64 tree) from
>> > mm/init.c.
>> >
>> > Signed-off-by: Nick Kossifidis <mick at ics.forth.gr>
>> > ---
>> >  arch/riscv/kernel/setup.c | 160 ++++++++++++++++++++++++++++++++++++++
>> >  arch/riscv/mm/init.c      |  27 -------
>> >  2 files changed, 160 insertions(+), 27 deletions(-)
>> >
>> > diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
>> > index 2c6dd3293..450f0142f 100644
>> > --- a/arch/riscv/kernel/setup.c
>> > +++ b/arch/riscv/kernel/setup.c
>> > @@ -4,6 +4,8 @@
>> >   *  Chen Liqin <liqin.chen at sunplusct.com>
>> >   *  Lennox Wu <lennox.wu at sunplusct.com>
>> >   * Copyright (C) 2012 Regents of the University of California
>> > + * Copyright (C) 2020 FORTH-ICS/CARV
>> > + *  Nick Kossifidis <mick at ics.forth.gr>
>> >   */
>> >
>> >  #include <linux/init.h>
>> > @@ -48,6 +50,163 @@ atomic_t hart_lottery __section(.sdata);
>> >  unsigned long boot_cpu_hartid;
>> >  static DEFINE_PER_CPU(struct cpu, cpu_devices);
>> >
>> > +/*
>> > + * Place kernel memory regions on the resource tree so that
>> > + * kexec-tools can retrieve them from /proc/iomem. While there
>> > + * also add "System RAM" regions for compatibility with other
>> > + * archs, and the rest of the known regions for completeness.
>> > + */
>> > +static struct resource code_res = { .name = "Kernel code", };
>> > +static struct resource data_res = { .name = "Kernel data", };
>> > +static struct resource rodata_res = { .name = "Kernel rodata", };
>> > +static struct resource bss_res = { .name = "Kernel bss", };
>> > +
>> > +static int __init add_resource(struct resource *parent,
>> > +                             struct resource *res)
>> > +{
>> > +     int ret = 0;
>> > +
>> > +     ret = insert_resource(parent, res);
>> > +     if (ret < 0) {
>> > +             pr_err("Failed to add a %s resource at %llx\n",
>> > +                     res->name, (unsigned long long) res->start);
>> > +             return ret;
>> > +     }
>> > +
>> > +     return 1;
>> > +}
>> > +
>> > +static int __init add_kernel_resources(struct resource *res)
>> > +{
>> > +     int ret = 0;
>> > +
>> > +     /*
>> > +      * The memory region of the kernel image is continuous and
>> > +      * was reserved on setup_bootmem, find it here and register
>> > +      * it as a resource, then register the various segments of
>> > +      * the image as child nodes
>> > +      */
>> > +     if (!(res->start <= code_res.start && res->end >= data_res.end))
>> > +             return 0;
>> > +
>> > +     res->name = "Kernel image";
>> > +     res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
>> > +
>> > +     /*
>> > +      * We removed a part of this region on setup_bootmem so
>> > +      * we need to expand the resource for the bss to fit in.
>> > +      */
>> > +     res->end = bss_res.end;
>> > +
>> > +     ret = add_resource(&iomem_resource, res);
>> > +     if (ret < 0)
>> > +             return ret;
>> > +
>> > +     ret = add_resource(res, &code_res);
>> > +     if (ret < 0)
>> > +             return ret;
>> > +
>> > +     ret = add_resource(res, &rodata_res);
>> > +     if (ret < 0)
>> > +             return ret;
>> > +
>> > +     ret = add_resource(res, &data_res);
>> > +     if (ret < 0)
>> > +             return ret;
>> > +
>> > +     ret = add_resource(res, &bss_res);
>> > +
>> > +     return ret;
>> > +}
>> > +
>> > +static void __init init_resources(void)
>> > +{
>> > +     struct memblock_region *region = NULL;
>> > +     struct resource *res = NULL;
>> > +     int ret = 0;
>> > +
>> > +     code_res.start = __pa_symbol(_text);
>> > +     code_res.end = __pa_symbol(_etext) - 1;
>> > +     code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
>> > +
>> > +     rodata_res.start = __pa_symbol(__start_rodata);
>> > +     rodata_res.end = __pa_symbol(__end_rodata) - 1;
>> > +     rodata_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
>> > +
>> > +     data_res.start = __pa_symbol(_data);
>> > +     data_res.end = __pa_symbol(_edata) - 1;
>> > +     data_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
>> > +
>> > +     bss_res.start = __pa_symbol(__bss_start);
>> > +     bss_res.end = __pa_symbol(__bss_stop) - 1;
>> > +     bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
>> > +
>> > +     /*
>> > +      * Start by adding the reserved regions, if they overlap
>> > +      * with /memory regions, insert_resource later on will take
>> > +      * care of it.
>> > +      */
>> > +     for_each_memblock(reserved, region) {
>> > +             res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
> 
> Is there a specific reason to invoke memblock_alloc while iterating
> reserved regions ?
> memblock_alloc also adds calls memblock_reserve. So we are modifying
> the reserved region entries
> while iterating it. It resulted in below warning for rv32.
> 
> [    0.000000] ------------[ cut here ]------------
> [    0.000000] WARNING: CPU: 0 PID: 0 at kernel/resource.c:795
> __insert_resource+0x8e/0xd0
> [    0.000000] Modules linked in:
> [    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted
> 5.10.0-00022-ge20097fb37e2-dirty #549
> [    0.000000] epc: c00125c2 ra : c001262c sp : c1c01f50
> [    0.000000]  gp : c1d456e0 tp : c1c0a980 t0 : ffffcf20
> [    0.000000]  t1 : 00000000 t2 : 00000000 s0 : c1c01f60
> [    0.000000]  s1 : ffffcf00 a0 : ffffff00 a1 : c1c0c0c4
> [    0.000000]  a2 : 80c12b15 a3 : 80402000 a4 : 80402000
> [    0.000000]  a5 : c1c0c0c4 a6 : 80c12b15 a7 : f5faf600
> [    0.000000]  s2 : c1c0c0c4 s3 : c1c0e000 s4 : c1009a80
> [    0.000000]  s5 : c1c0c000 s6 : c1d48000 s7 : c1613b4c
> [    0.000000]  s8 : 00000fff s9 : 80000200 s10: c1613b40
> [    0.000000]  s11: 00000000 t3 : c1d4a000 t4 : ffffffff
> [    0.000000]  t5 : c1008e38 t6 : 00000001
> [    0.000000] status: 00000100 badaddr: 00000000 cause: 00000003
> [    0.000000] irq event stamp: 0
> [    0.000000] hardirqs last  enabled at (0): [<00000000>] 0x0
> [    0.000000] hardirqs last disabled at (0): [<00000000>] 0x0
> [    0.000000] softirqs last  enabled at (0): [<00000000>] 0x0
> [    0.000000] softirqs last disabled at (0): [<00000000>] 0x0
> [    0.000000] random: get_random_bytes called from __warn+0xd8/0x11e
> with crng_init=0
> [    0.000000] ---[ end trace 0000000000000000 ]---
> [    0.000000] Failed to add a Kernel code resource at 80402000
> 
> Here is the fix that works:
> --- a/arch/riscv/kernel/setup.c
> +++ b/arch/riscv/kernel/setup.c
> @@ -127,7 +127,9 @@ static void __init init_resources(void)
>  {
>         struct memblock_region *region = NULL;
>         struct resource *res = NULL;
> -       int ret = 0;
> +       int ret = 0, i = 0;
> +       int num_mem_res;
> +       struct resource *mem_res;
> 
>         code_res.start = __pa_symbol(_text);
>         code_res.end = __pa_symbol(_etext) - 1;
> @@ -145,16 +147,17 @@ static void __init init_resources(void)
>         bss_res.end = __pa_symbol(__bss_stop) - 1;
>         bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
> 
> +        num_mem_res = memblock.memory.cnt + memblock.reserved.cnt;
> +       mem_res = memblock_alloc(num_mem_res * sizeof(*mem_res),
> SMP_CACHE_BYTES);
> +       if (!mem_res)
> +               panic("%s: Failed to allocate %zu bytes\n", __func__,
> num_mem_res * sizeof(*mem_res));
>         /*
>          * Start by adding the reserved regions, if they overlap
>          * with /memory regions, insert_resource later on will take
>          * care of it.
>          */
>         for_each_reserved_mem_region(region) {
> -               res = memblock_alloc(sizeof(struct resource), 
> SMP_CACHE_BYTES);
> -               if (!res)
> -                       panic("%s: Failed to allocate %zu bytes\n", 
> __func__,
> -                             sizeof(struct resource));
> +               res = &mem_res[i++];
> 
>                 res->name = "Reserved";
>                 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
> @@ -181,11 +184,8 @@ static void __init init_resources(void)
> 
>         /* Add /memory regions to the resource tree */
>         for_each_mem_region(region) {
> -               res = memblock_alloc(sizeof(struct resource), 
> SMP_CACHE_BYTES);
> -               if (!res)
> -                       panic("%s: Failed to allocate %zu bytes\n", 
> __func__,
> -                             sizeof(struct resource));
> 
> +               res = &mem_res[i++];
> 
> If this looks okay to you, I will send the patch.
> 

The problem is that we don't want to include all reserved regions within 
/memory such as the device tree or initramfs, since they'll get modified 
and/or freed later on. So pre-allocating resources for all reserved 
regions doesn't seem the right thing to do. My goal here was to allocate 
a resource on each itteration and free it if not needed / on failure, I 
free the failed resource on failure but not if it's not needed as you 
noted.

As for the issue of memblock_alloc() calling memblock_reserve(), we 
don't add any new reserved regions at this point directly through 
memblock_reserve(), and memblock_reserve() will run on neighboring 
regions that memblock_merge_regions() will later on merge together, so 
it'll work on a single growing region instead of creating new ones. The 
issue is that we call memblock_alloc() for the first time inside the 
loop and for_each_mem_region() macro uses pointer arithmetic on the 
initial array of reserved memblocks, we could use 
for_each_memblock_type() macro but it's not on memblock.h anymore OR we 
could call memblock_alloc() before the loop so that the array is not 
expanded while itterating.

How about this, does it work for you ?:

diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 1d85e9bf7..460cfddb7 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -127,8 +127,16 @@ static void __init init_resources(void)
  {
  	struct memblock_region *region = NULL;
  	struct resource *res = NULL;
+	struct resource *mem_resources = NULL;
+	size_t mem_resources_sz = 0;
  	int ret = 0;

+	mem_resources_sz = memblock.memory.cnt * sizeof(struct resource);
+	mem_resources = memblock_alloc(mem_resources_sz, SMP_CACHE_BYTES);
+	if (!mem_resources)
+		panic("%s: Failed to allocate %zu bytes\n", __func__,
+		      sizeof(struct resource));
+
  	code_res.start = __pa_symbol(_text);
  	code_res.end = __pa_symbol(_etext) - 1;
  	code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
@@ -171,20 +179,21 @@ static void __init init_resources(void)
  		 * Ignore any other reserved regions within
  		 * system memory.
  		 */
-		if (memblock_is_memory(res->start))
+		if (memblock_is_memory(res->start)) {
+			memblock_free((phys_addr_t) res, sizeof(struct resource));
  			continue;
+		}

  		ret = add_resource(&iomem_resource, res);
-		if (ret < 0)
+		if (ret < 0) {
+			memblock_free((phys_addr_t) res, sizeof(struct resource));
  			goto error;
+		}
  	}

  	/* Add /memory regions to the resource tree */
  	for_each_mem_region(region) {
-		res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
-		if (!res)
-			panic("%s: Failed to allocate %zu bytes\n", __func__,
-			      sizeof(struct resource));
+		res = mem_resources++;

  		if (unlikely(memblock_is_nomap(region))) {
  			res->name = "Reserved";
@@ -205,9 +214,9 @@ static void __init init_resources(void)
  	return;

   error:
-	memblock_free((phys_addr_t) res, sizeof(struct resource));
  	/* Better an empty resource tree than an inconsistent one */
  	release_child_resources(&iomem_resource);
+	memblock_free((phys_addr_t) mem_resources, mem_resources_sz);
  }

Regards,
Nick



More information about the linux-riscv mailing list