[PATCH v2 09/10] arm64/efi: ignore unusable regions instead of reserving them

Ard Biesheuvel ard.biesheuvel at linaro.org
Sun Nov 9 23:31:44 PST 2014


On 10 November 2014 05:11, Mark Salter <msalter at redhat.com> wrote:
> On Thu, 2014-11-06 at 15:13 +0100, Ard Biesheuvel wrote:
>> This changes the way memblocks are installed based on the contents of
>> the UEFI memory map. Formerly, all regions would be memblock_add()'ed,
>> after which unusable regions would be memblock_reserve()'d as well.
>> To simplify things, but also to allow access to the unusable regions
>> through mmap(/dev/mem), even with CONFIG_STRICT_DEVMEM set, change
>> this so that only usable regions are memblock_add()'ed in the first
>> place.
>
> This patch is crashing 64K pagesize kernels during boot. I'm not exactly
> sure why, though. Here is what I get on an APM Mustang box:
>

Ah, yes, I meant to mention this patch

https://git.kernel.org/cgit/linux/kernel/git/glikely/linux.git/commit/?id=8cccffc52694938fc88f3d90bc7fed8460e27191

in the cover letter, which addresses this issue at least for the DT case.

-- 
Ard.





> [    0.046262] Unhandled fault: alignment fault (0x96000021) at 0xfffffc000004f026
> [    0.053863] Internal error: : 96000021 [#1] SMP
> [    0.058566] Modules linked in:
> [    0.061736] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.18.0-rc3+ #11
> [    0.068418] Hardware name: APM X-Gene Mustang board (DT)
> [    0.073942] task: fffffe0000f46a20 ti: fffffe0000f10000 task.ti: fffffe0000f10000
> [    0.081735] PC is at acpi_ns_lookup+0x520/0x734
> [    0.086448] LR is at acpi_ns_lookup+0x4ac/0x734
> [    0.091151] pc : [<fffffe0000460458>] lr : [<fffffe00004603e4>] pstate: 60000245
> [    0.098843] sp : fffffe0000f13b00
> [    0.102293] x29: fffffe0000f13b10 x28: 000000000000000a
> [    0.107810] x27: 0000000000000008 x26: 0000000000000000
> [    0.113345] x25: fffffe0000f13c38 x24: fffffe0000fae000
> [    0.118862] x23: 0000000000000008 x22: 0000000000000001
> [    0.124406] x21: fffffe0000972000 x20: 000000000000000a
> [    0.129940] x19: fffffc000004f026 x18: 000000000000000b
> [    0.135483] x17: 0000000000000023 x16: 000000000000059f
> [    0.141026] x15: 0000000000007fff x14: 000000000000038e
> [    0.146543] x13: ff00000000000000 x12: ffffffffffffffff
> [    0.152060] x11: 0000000000000010 x10: 00000000fffffff6
> [    0.157585] x9 : 0000000000000050 x8 : fffffe03fa7401b0
> [    0.163111] x7 : fffffe0001115a00 x6 : fffffe0001115a00
> [    0.168620] x5 : fffffe0001115000 x4 : 0000000000000010
> [    0.174146] x3 : 0000000000000010 x2 : 000000000000000b
> [    0.179689] x1 : 00000000fffffff5 x0 : 0000000000000000
> [    0.185215]
> [    0.186765] Process swapper/0 (pid: 0, stack limit = 0xfffffe0000f10058)
> [    0.193724] Stack: (0xfffffe0000f13b00 to 0xfffffe0000f14000)
> [    0.199707] 3b00: 00000000 fffffe03 00000666 00000000 00f13bd0 fffffe00 0044f388 fffffe00
> ...
> [    0.539605] Call trace:
> [    0.542150] [<fffffe0000460458>] acpi_ns_lookup+0x520/0x734
> [    0.547935] [<fffffe000044f384>] acpi_ds_load1_begin_op+0x384/0x4b4
> [    0.554445] [<fffffe0000469594>] acpi_ps_build_named_op+0xfc/0x228
> [    0.560868] [<fffffe00004698c8>] acpi_ps_create_op+0x208/0x340
> [    0.566945] [<fffffe0000468ea8>] acpi_ps_parse_loop+0x208/0x7f8
> [    0.573092] [<fffffe000046a6b8>] acpi_ps_parse_aml+0x1c0/0x434
> [    0.579153] [<fffffe0000463cb0>] acpi_ns_one_complete_parse+0x1a4/0x1ec
> [    0.586017] [<fffffe0000463d88>] acpi_ns_parse_table+0x90/0x130
> [    0.592181] [<fffffe0000462f94>] acpi_ns_load_table+0xc8/0x1b0
> [    0.598243] [<fffffe0000e71848>] acpi_load_tables+0xf4/0x234
> [    0.604122] [<fffffe0000e70a0c>] acpi_early_init+0x78/0xc8
> [    0.609828] [<fffffe0000e40928>] start_kernel+0x334/0x3ac
> [    0.615431] Code: 2a00037b b9009fbc 36180057 321d037b (b9400260)
> [    0.621777] ---[ end trace cb88537fdc8fa200 ]---
> [    0.626586] Kernel panic - not syncing: Attempted to kill the idle task!
>
> I also get a different crash when booting with device tree instead of ACPI.
>
> One thing is that early_init_dt_add_memory_arch() is called with UEFI
> (4K) pagesize alignment and size and it clips the passed in range such
> that what is added is the 64K aligned range which fits completely within
> the given range. This may mean nothing gets added if the given range is
> smaller than 64K. And only a subset of the desired range is added in
> other cases. This could theoretically leave bits of devicetree and/or
> initrd unmapped, but that doesn't seem to be the case here. I'll keep
> poking at it...
>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel at linaro.org>
>> ---
>>  arch/arm64/kernel/efi.c | 69 +++++++++++++++++++------------------------------
>>  1 file changed, 26 insertions(+), 43 deletions(-)
>>
>> diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
>> index 3009c22e2620..af2214c692d3 100644
>> --- a/arch/arm64/kernel/efi.c
>> +++ b/arch/arm64/kernel/efi.c
>> @@ -40,13 +40,6 @@ static int __init uefi_debug_setup(char *str)
>>  }
>>  early_param("uefi_debug", uefi_debug_setup);
>>
>> -static int __init is_normal_ram(efi_memory_desc_t *md)
>> -{
>> -     if (md->attribute & EFI_MEMORY_WB)
>> -             return 1;
>> -     return 0;
>> -}
>> -
>>  static int __init uefi_init(void)
>>  {
>>       efi_char16_t *c16;
>> @@ -105,28 +98,11 @@ out:
>>       return retval;
>>  }
>>
>> -/*
>> - * Return true for RAM regions we want to permanently reserve.
>> - */
>> -static __init int is_reserve_region(efi_memory_desc_t *md)
>> -{
>> -     switch (md->type) {
>> -     case EFI_LOADER_CODE:
>> -     case EFI_LOADER_DATA:
>> -     case EFI_BOOT_SERVICES_CODE:
>> -     case EFI_BOOT_SERVICES_DATA:
>> -     case EFI_CONVENTIONAL_MEMORY:
>> -             return 0;
>> -     default:
>> -             break;
>> -     }
>> -     return is_normal_ram(md);
>> -}
>> -
>> -static __init void reserve_regions(void)
>> +static __init void process_memory_map(void)
>>  {
>>       efi_memory_desc_t *md;
>>       u64 paddr, npages, size;
>> +     u32 lost = 0;
>>
>>       if (uefi_debug)
>>               pr_info("Processing EFI memory map:\n");
>> @@ -134,31 +110,38 @@ static __init void reserve_regions(void)
>>       for_each_efi_memory_desc(&memmap, md) {
>>               paddr = md->phys_addr;
>>               npages = md->num_pages;
>> +             size = npages << EFI_PAGE_SHIFT;
>>
>>               if (uefi_debug) {
>>                       char buf[64];
>>
>> -                     pr_info("  0x%012llx-0x%012llx %s",
>> -                             paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1,
>> +                     pr_info("  0x%012llx-0x%012llx %s\n",
>> +                             paddr, paddr + size - 1,
>>                               efi_md_typeattr_format(buf, sizeof(buf), md));
>>               }
>>
>> -             memrange_efi_to_native(&paddr, &npages);
>> -             size = npages << PAGE_SHIFT;
>> -
>> -             if (is_normal_ram(md))
>> -                     early_init_dt_add_memory_arch(paddr, size);
>> -
>> -             if (is_reserve_region(md)) {
>> -                     memblock_reserve(paddr, size);
>> -                     if (uefi_debug)
>> -                             pr_cont("*");
>> -             }
>> -
>> -             if (uefi_debug)
>> -                     pr_cont("\n");
>> +             if (!efi_mem_is_usable_region(md))
>> +                     continue;
>> +
>> +             early_init_dt_add_memory_arch(paddr, size);
>> +
>> +             /*
>> +              * Keep a tally of how much memory we are losing due to
>> +              * rounding of regions that are not aligned to the page
>> +              * size. We cannot easily recover this memory without
>> +              * sorting the memory map and attempting to merge adjacent
>> +              * usable regions.
>> +              */
>> +             if (PAGE_SHIFT != EFI_PAGE_SHIFT)
>> +                     lost += (npages << EFI_PAGE_SHIFT) -
>> +                             round_down(max_t(s64, size - PAGE_ALIGN(paddr) +
>> +                                              md->phys_addr, 0),
>> +                                        PAGE_SIZE);
>>       }
>>
>> +     if (lost > SZ_1K)
>> +             pr_warn("efi: lost %u KB of RAM to rounding\n", lost / SZ_1K);
>> +
>>       set_bit(EFI_MEMMAP, &efi.flags);
>>  }
>>
>> @@ -182,7 +165,7 @@ void __init efi_init(void)
>>
>>       WARN_ON(uefi_init() < 0);
>>
>> -     reserve_regions();
>> +     process_memory_map();
>>  }
>>
>>  static int __init arm64_enter_virtual_mode(void)
>
>



More information about the linux-arm-kernel mailing list