[PATCH v4 6/8] arm64/efi: move SetVirtualAddressMap() to UEFI stub
Ard Biesheuvel
ard.biesheuvel at linaro.org
Wed Jan 7 04:16:02 PST 2015
On 7 January 2015 at 12:06, Leif Lindholm <leif.lindholm at linaro.org> wrote:
> On Mon, Dec 22, 2014 at 10:59:02AM +0000, Ard Biesheuvel wrote:
>> In order to support kexec, the kernel needs to be able to deal with the
>> state of the UEFI firmware after SetVirtualAddressMap() has been called.
>> To avoid having separate code paths for non-kexec and kexec, let's move
>> the call to SetVirtualAddressMap() to the stub: this will guarantee us
>> that it will only be called once (since the stub is not executed during
>> kexec), and ensures that the UEFI state is identical between kexec and
>> normal boot.
>>
>> This implies that the layout of the virtual mapping needs to be created
>> by the stub as well. All regions are rounded up to a naturally aligned
>> multiple of 64 KB (for compatibility with 64k pages kernels) and recorded
>> in the UEFI memory map. The kernel proper reads those values and installs
>> the mappings in a dedicated set of page tables that are swapped in during
>> UEFI Runtime Services calls.
>>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel at linaro.org>
>> ---
>> arch/arm64/include/asm/efi.h | 20 +++-
>> arch/arm64/kernel/efi.c | 223 ++++++++++++++++++++-----------------
>> arch/arm64/kernel/setup.c | 1 +
>> drivers/firmware/efi/libstub/fdt.c | 137 ++++++++++++++++++++++-
>> 4 files changed, 270 insertions(+), 111 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
>> index 71291253114f..6cc668a378c5 100644
>> --- a/arch/arm64/include/asm/efi.h
>> +++ b/arch/arm64/include/asm/efi.h
>> @@ -7,28 +7,36 @@
>> #ifdef CONFIG_EFI
>> extern void efi_init(void);
>> extern void efi_idmap_init(void);
>> +extern void efi_virtmap_init(void);
>> #else
>> #define efi_init()
>> #define efi_idmap_init()
>> +#define efi_virtmap_init
>> #endif
>>
>> #define efi_call_virt(f, ...) \
>> ({ \
>> - efi_##f##_t *__f = efi.systab->runtime->f; \
>> + efi_##f##_t *__f; \
>> efi_status_t __s; \
>> \
>> - kernel_neon_begin(); \
>> + kernel_neon_begin(); /* disables preemption */ \
>
> Nitpick: adding comment to otherwise untouched source line.
>
>> + efi_virtmap_load(); \
>> + __f = efi.systab->runtime->f; \
>> __s = __f(__VA_ARGS__); \
>> + efi_virtmap_unload(); \
>> kernel_neon_end(); \
>> __s; \
>> })
>>
>> #define __efi_call_virt(f, ...) \
>> ({ \
>> - efi_##f##_t *__f = efi.systab->runtime->f; \
>> + efi_##f##_t *__f; \
>> \
>> - kernel_neon_begin(); \
>> + kernel_neon_begin(); /* disables preemption */ \
>
> Same nitpick.
>
Is there anything wrong with that?
Would you prefer the comment to be on a separate line?
>> + efi_virtmap_load(); \
>> + __f = efi.systab->runtime->f; \
>> __f(__VA_ARGS__); \
>> + efi_virtmap_unload(); \
>> kernel_neon_end(); \
>> })
>>
>> @@ -45,5 +53,9 @@ extern void efi_idmap_init(void);
>> #define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__)
>>
>> #define EFI_ALLOC_ALIGN SZ_64K
>> +#define EFI_VIRTMAP EFI_ARCH_1
>> +
>> +void efi_virtmap_load(void);
>> +void efi_virtmap_unload(void);
>>
>> #endif /* _ASM_EFI_H */
>> diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
>> index 6fac253bc783..2ebe67ffb629 100644
>> --- a/arch/arm64/kernel/efi.c
>> +++ b/arch/arm64/kernel/efi.c
>> @@ -11,25 +11,30 @@
>> *
>> */
>>
>> +#include <linux/atomic.h>
>> #include <linux/dmi.h>
>> #include <linux/efi.h>
>> #include <linux/export.h>
>> #include <linux/memblock.h>
>> +#include <linux/mm_types.h>
>> #include <linux/bootmem.h>
>> #include <linux/of.h>
>> #include <linux/of_fdt.h>
>> +#include <linux/rbtree.h>
>> +#include <linux/rwsem.h>
>> #include <linux/sched.h>
>> #include <linux/slab.h>
>> +#include <linux/spinlock.h>
>>
>> #include <asm/cacheflush.h>
>> #include <asm/efi.h>
>> #include <asm/tlbflush.h>
>> #include <asm/mmu_context.h>
>> +#include <asm/mmu.h>
>> +#include <asm/pgtable.h>
>>
>> struct efi_memory_map memmap;
>>
>> -static efi_runtime_services_t *runtime;
>> -
>> static u64 efi_system_table;
>>
>> static int uefi_debug __initdata;
>> @@ -69,9 +74,33 @@ static void __init efi_setup_idmap(void)
>> }
>> }
>>
>> +/*
>> + * Translate a EFI virtual address into a physical address: this is necessary,
>> + * as some data members of the EFI system table are virtually remapped after
>> + * SetVirtualAddressMap() has been called.
>> + */
>> +static phys_addr_t efi_to_phys(unsigned long addr)
>> +{
>> + efi_memory_desc_t *md;
>> +
>> + for_each_efi_memory_desc(&memmap, md) {
>> + if (!(md->attribute & EFI_MEMORY_RUNTIME))
>> + continue;
>> + if (md->virt_addr == 0)
>> + /* no virtual mapping has been installed by the stub */
>> + break;
>> + if (md->virt_addr <= addr &&
>> + (addr - md->virt_addr) < (md->num_pages << EFI_PAGE_SHIFT))
>> + return md->phys_addr + addr - md->virt_addr;
>> + }
>> + return addr;
>> +}
>> +
>> static int __init uefi_init(void)
>> {
>> efi_char16_t *c16;
>> + void *config_tables;
>> + u64 table_size;
>> char vendor[100] = "unknown";
>> int i, retval;
>>
>> @@ -99,7 +128,7 @@ static int __init uefi_init(void)
>> efi.systab->hdr.revision & 0xffff);
>>
>> /* Show what we know for posterity */
>> - c16 = early_memremap(efi.systab->fw_vendor,
>> + c16 = early_memremap(efi_to_phys(efi.systab->fw_vendor),
>> sizeof(vendor));
>> if (c16) {
>> for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i)
>> @@ -112,8 +141,14 @@ static int __init uefi_init(void)
>> efi.systab->hdr.revision >> 16,
>> efi.systab->hdr.revision & 0xffff, vendor);
>>
>> - retval = efi_config_init(NULL);
>> + table_size = sizeof(efi_config_table_64_t) * efi.systab->nr_tables;
>> + config_tables = early_memremap(efi_to_phys(efi.systab->tables),
>> + table_size);
>> +
>> + retval = efi_config_parse_tables(config_tables,
>> + efi.systab->nr_tables, NULL);
>>
>> + early_memunmap(config_tables, table_size);
>> out:
>> early_memunmap(efi.systab, sizeof(efi_system_table_t));
>> return retval;
>> @@ -328,51 +363,9 @@ void __init efi_idmap_init(void)
>> efi_setup_idmap();
>> }
>>
>> -static int __init remap_region(efi_memory_desc_t *md, void **new)
>> -{
>> - u64 paddr, vaddr, npages, size;
>> -
>> - paddr = md->phys_addr;
>> - npages = md->num_pages;
>> - memrange_efi_to_native(&paddr, &npages);
>> - size = npages << PAGE_SHIFT;
>> -
>> - if (is_normal_ram(md))
>> - vaddr = (__force u64)ioremap_cache(paddr, size);
>> - else
>> - vaddr = (__force u64)ioremap(paddr, size);
>> -
>> - if (!vaddr) {
>> - pr_err("Unable to remap 0x%llx pages @ %p\n",
>> - npages, (void *)paddr);
>> - return 0;
>> - }
>> -
>> - /* adjust for any rounding when EFI and system pagesize differs */
>> - md->virt_addr = vaddr + (md->phys_addr - paddr);
>> -
>> - if (uefi_debug)
>> - pr_info(" EFI remap 0x%012llx => %p\n",
>> - md->phys_addr, (void *)md->virt_addr);
>> -
>> - memcpy(*new, md, memmap.desc_size);
>> - *new += memmap.desc_size;
>> -
>> - return 1;
>> -}
>> -
>> -/*
>> - * Switch UEFI from an identity map to a kernel virtual map
>> - */
>
> No function description at all?
Seems I was a bit lazy there.
> Arguably this function could change name now as well, since UEFI will
> already be in virtual mode. arm64_enable_runtime_map()?
>
OK
>> static int __init arm64_enter_virtual_mode(void)
>> {
>> - efi_memory_desc_t *md;
>> - phys_addr_t virtmap_phys;
>> - void *virtmap, *virt_md;
>> - efi_status_t status;
>> u64 mapsize;
>> - int count = 0;
>> - unsigned long flags;
>>
>> if (!efi_enabled(EFI_BOOT)) {
>> pr_info("EFI services will not be available.\n");
>> @@ -395,79 +388,28 @@ static int __init arm64_enter_virtual_mode(void)
>>
>> efi.memmap = &memmap;
>>
>> - /* Map the runtime regions */
>> - virtmap = kmalloc(mapsize, GFP_KERNEL);
>> - if (!virtmap) {
>> - pr_err("Failed to allocate EFI virtual memmap\n");
>> - return -1;
>> - }
>> - virtmap_phys = virt_to_phys(virtmap);
>> - virt_md = virtmap;
>> -
>> - for_each_efi_memory_desc(&memmap, md) {
>> - if (!(md->attribute & EFI_MEMORY_RUNTIME))
>> - continue;
>> - if (!remap_region(md, &virt_md))
>> - goto err_unmap;
>> - ++count;
>> - }
>> -
>> - efi.systab = (__force void *)efi_lookup_mapped_addr(efi_system_table);
>> + efi.systab = (__force void *)ioremap_cache(efi_system_table,
>> + sizeof(efi_system_table_t));
>> if (!efi.systab) {
>> - /*
>> - * If we have no virtual mapping for the System Table at this
>> - * point, the memory map doesn't cover the physical offset where
>> - * it resides. This means the System Table will be inaccessible
>> - * to Runtime Services themselves once the virtual mapping is
>> - * installed.
>> - */
>> - pr_err("Failed to remap EFI System Table -- buggy firmware?\n");
>> - goto err_unmap;
>> + pr_err("Failed to remap EFI System Table\n");
>> + return -1;
>> }
>> set_bit(EFI_SYSTEM_TABLES, &efi.flags);
>>
>> - local_irq_save(flags);
>> - cpu_switch_mm(idmap_pg_dir, &init_mm);
>> -
>> - /* Call SetVirtualAddressMap with the physical address of the map */
>> - runtime = efi.systab->runtime;
>> - efi.set_virtual_address_map = runtime->set_virtual_address_map;
>> -
>> - status = efi.set_virtual_address_map(count * memmap.desc_size,
>> - memmap.desc_size,
>> - memmap.desc_version,
>> - (efi_memory_desc_t *)virtmap_phys);
>> - cpu_set_reserved_ttbr0();
>> - flush_tlb_all();
>> - local_irq_restore(flags);
>> -
>> - kfree(virtmap);
>> -
>> free_boot_services();
>>
>> - if (status != EFI_SUCCESS) {
>> - pr_err("Failed to set EFI virtual address map! [%lx]\n",
>> - status);
>> + if (!efi_enabled(EFI_VIRTMAP)) {
>> + pr_err("No UEFI virtual mapping was installed -- runtime services will not be available\n");
>> return -1;
>> }
>>
>> /* Set up runtime services function pointers */
>> - runtime = efi.systab->runtime;
>> efi_native_runtime_setup();
>> set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
>>
>> efi.runtime_version = efi.systab->hdr.revision;
>>
>> return 0;
>> -
>> -err_unmap:
>> - /* unmap all mappings that succeeded: there are 'count' of those */
>> - for (virt_md = virtmap; count--; virt_md += memmap.desc_size) {
>> - md = virt_md;
>> - iounmap((__force void __iomem *)md->virt_addr);
>> - }
>> - kfree(virtmap);
>> - return -1;
>> }
>> early_initcall(arm64_enter_virtual_mode);
>>
>> @@ -484,3 +426,78 @@ static int __init arm64_dmi_init(void)
>> return 0;
>> }
>> core_initcall(arm64_dmi_init);
>> +
>> +static pgd_t efi_pgd[PTRS_PER_PGD] __page_aligned_bss;
>> +
>> +static struct mm_struct efi_mm = {
>> + .mm_rb = RB_ROOT,
>> + .pgd = efi_pgd,
>> + .mm_users = ATOMIC_INIT(2),
>> + .mm_count = ATOMIC_INIT(1),
>> + .mmap_sem = __RWSEM_INITIALIZER(efi_mm.mmap_sem),
>> + .page_table_lock = __SPIN_LOCK_UNLOCKED(efi_mm.page_table_lock),
>> + .mmlist = LIST_HEAD_INIT(efi_mm.mmlist),
>> + INIT_MM_CONTEXT(efi_mm)
>> +};
>> +
>> +static void efi_set_pgd(struct mm_struct *mm)
>> +{
>> + cpu_switch_mm(mm->pgd, mm);
>> + flush_tlb_all();
>> + if (icache_is_aivivt())
>> + __flush_icache_all();
>> +}
>> +
>> +void efi_virtmap_load(void)
>> +{
>> + WARN_ON(preemptible());
>> + efi_set_pgd(&efi_mm);
>> +}
>> +
>> +void efi_virtmap_unload(void)
>> +{
>> + efi_set_pgd(current->active_mm);
>> +}
>> +
>> +void __init efi_virtmap_init(void)
>> +{
>> + efi_memory_desc_t *md;
>> +
>> + if (!efi_enabled(EFI_BOOT))
>> + return;
>> +
>> + for_each_efi_memory_desc(&memmap, md) {
>> + u64 paddr, npages, size;
>> + pgprot_t prot;
>> +
>> + if (!(md->attribute & EFI_MEMORY_RUNTIME))
>> + continue;
>> + if (WARN(md->virt_addr == 0,
>> + "UEFI virtual mapping incomplete or missing -- no entry found for 0x%llx\n",
>> + md->phys_addr))
>> + return;
>> +
>> + paddr = md->phys_addr;
>> + npages = md->num_pages;
>> + memrange_efi_to_native(&paddr, &npages);
>> + size = npages << PAGE_SHIFT;
>> +
>> + pr_info(" EFI remap 0x%012llx => %p\n",
>> + md->phys_addr, (void *)md->virt_addr);
>> +
>> + /*
>> + * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
>> + * executable, everything else can be mapped with the XN bits
>> + * set.
>> + */
>> + if (!is_normal_ram(md))
>> + prot = __pgprot(PROT_DEVICE_nGnRE);
>> + else if (md->type == EFI_RUNTIME_SERVICES_CODE)
>> + prot = PAGE_KERNEL_EXEC;
>> + else
>> + prot = PAGE_KERNEL;
>> +
>> + create_pgd_mapping(&efi_mm, paddr, md->virt_addr, size, prot);
>> + }
>> + set_bit(EFI_VIRTMAP, &efi.flags);
>> +}
>> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
>> index b80991166754..d8390f507da0 100644
>> --- a/arch/arm64/kernel/setup.c
>> +++ b/arch/arm64/kernel/setup.c
>> @@ -402,6 +402,7 @@ void __init setup_arch(char **cmdline_p)
>> request_standard_resources();
>>
>> efi_idmap_init();
>> + efi_virtmap_init();
>
> Could these two be merged together into one function?
> Say efi_memmap_init()?
>
Well, I decided to do it like this because efi_idmap_init() gets
removed in its entirety (including this invocation) in a subsequent
patch.
>>
>> unflatten_device_tree();
>>
>> diff --git a/drivers/firmware/efi/libstub/fdt.c b/drivers/firmware/efi/libstub/fdt.c
>> index c846a9608cbd..76bc8abf41d1 100644
>> --- a/drivers/firmware/efi/libstub/fdt.c
>> +++ b/drivers/firmware/efi/libstub/fdt.c
>> @@ -167,6 +167,94 @@ fdt_set_fail:
>> #define EFI_FDT_ALIGN EFI_PAGE_SIZE
>> #endif
>>
>> +static efi_status_t get_memory_map(efi_system_table_t *sys_table_arg,
>> + efi_memory_desc_t **map,
>> + unsigned long *map_size,
>> + unsigned long *desc_size,
>> + u32 *desc_ver, unsigned long *key_ptr)
>> +{
>> + efi_status_t status;
>> +
>> + /*
>> + * Call get_memory_map() with 0 size to retrieve the size of the
>> + * required allocation.
>> + */
>> + *map_size = 0;
>> + status = efi_call_early(get_memory_map, map_size, NULL,
>> + key_ptr, desc_size, desc_ver);
>> + if (status != EFI_BUFFER_TOO_SMALL)
>> + return EFI_LOAD_ERROR;
>> +
>> + /*
>> + * Add an additional efi_memory_desc_t to map_size because we're doing
>> + * an allocation which may be in a new descriptor region. Then double it
>> + * to give us some scratch space to prepare the input virtmap to give
>> + * to SetVirtualAddressMap(). Note that this is EFI_LOADER_DATA memory,
>> + * and the kernel memblock_reserve()'s only the size of the actual
>> + * memory map, so the scratch space is freed again automatically.
>> + */
>> + *map_size += *desc_size;
>> + status = efi_call_early(allocate_pool, EFI_LOADER_DATA,
>> + *map_size * 2, (void **)map);
>> + if (status != EFI_SUCCESS)
>> + return status;
>> +
>> + status = efi_call_early(get_memory_map, map_size, *map,
>> + key_ptr, desc_size, desc_ver);
>> + if (status != EFI_SUCCESS)
>> + efi_call_early(free_pool, *map);
>> + return status;
>> +}
>> +
>> +/*
>> + * This is the base address at which to start allocating virtual memory ranges
>> + * for UEFI Runtime Services. This is a userland range so that we can use any
>> + * allocation we choose, and eliminate the risk of a conflict after kexec.
>> + */
>> +#define EFI_RT_VIRTUAL_BASE 0x40000000
>> +
>> +static void update_memory_map(efi_memory_desc_t *memory_map,
>> + unsigned long map_size, unsigned long desc_size,
>> + int *count)
>> +{
>> + u64 efi_virt_base = EFI_RT_VIRTUAL_BASE;
>> + efi_memory_desc_t *out = (void *)memory_map + map_size;
>> + int l;
>> +
>> + for (l = 0; l < map_size; l += desc_size) {
>> + efi_memory_desc_t *in = (void *)memory_map + l;
>> + u64 paddr, size;
>> +
>> + if (!(in->attribute & EFI_MEMORY_RUNTIME))
>> + continue;
>> +
>> + /*
>> + * Make the mapping compatible with 64k pages: this allows
>> + * a 4k page size kernel to kexec a 64k page size kernel and
>> + * vice versa.
>> + */
>> + paddr = round_down(in->phys_addr, SZ_64K);
>> + size = round_up(in->num_pages * EFI_PAGE_SIZE +
>> + in->phys_addr - paddr, SZ_64K);
>> +
>> + /*
>> + * Avoid wasting memory on PTEs by choosing a virtual base that
>> + * is compatible with section mappings if this region has the
>> + * appropriate size and physical alignment. (Sections are 2 MB
>> + * on 4k granule kernels)
>> + */
>> + if (IS_ALIGNED(in->phys_addr, SZ_2M) && size >= SZ_2M)
>> + efi_virt_base = round_up(efi_virt_base, SZ_2M);
>> +
>> + in->virt_addr = efi_virt_base + in->phys_addr - paddr;
>> + efi_virt_base += size;
>> +
>> + memcpy(out, in, desc_size);
>> + out = (void *)out + desc_size;
>> + ++*count;
>> + }
>> +}
>> +
>> /*
>> * Allocate memory for a new FDT, then add EFI, commandline, and
>> * initrd related fields to the FDT. This routine increases the
>> @@ -196,6 +284,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
>> efi_memory_desc_t *memory_map;
>> unsigned long new_fdt_size;
>> efi_status_t status;
>> + int runtime_entry_count = 0;
>>
>> /*
>> * Estimate size of new FDT, and allocate memory for it. We
>> @@ -216,8 +305,8 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
>> * we can get the memory map key needed for
>> * exit_boot_services().
>> */
>> - status = efi_get_memory_map(sys_table, &memory_map, &map_size,
>> - &desc_size, &desc_ver, &mmap_key);
>> + status = get_memory_map(sys_table, &memory_map, &map_size,
>> + &desc_size, &desc_ver, &mmap_key);
>> if (status != EFI_SUCCESS)
>> goto fail_free_new_fdt;
>>
>> @@ -248,12 +337,52 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
>> }
>> }
>>
>> + /*
>> + * Update the memory map with virtual addresses. The function will also
>> + * populate the spare second half of the memory_map allocation with
>> + * copies of just the EFI_MEMORY_RUNTIME entries so that we can pass it
>> + * straight into SetVirtualAddressMap()
>> + */
>> + update_memory_map(memory_map, map_size, desc_size,
>> + &runtime_entry_count);
>> +
>> + pr_efi(sys_table,
>> + "Exiting boot services and installing virtual address map...\n");
>> +
>> /* Now we are ready to exit_boot_services.*/
>> status = sys_table->boottime->exit_boot_services(handle, mmap_key);
>>
>> + if (status == EFI_SUCCESS) {
>> + efi_set_virtual_address_map_t *svam;
>>
>> - if (status == EFI_SUCCESS)
>> - return status;
>> + /* Install the new virtual address map */
>> + svam = sys_table->runtime->set_virtual_address_map;
>> + status = svam(runtime_entry_count * desc_size, desc_size,
>> + desc_ver, (void *)memory_map + map_size);
>> +
>> + /*
>> + * We are beyond the point of no return here, so if the call to
>> + * SetVirtualAddressMap() failed, we need to signal that to the
>> + * incoming kernel but proceed normally otherwise.
>> + */
>> + if (status != EFI_SUCCESS) {
>> + int l;
>> +
>> + /*
>> + * Set the virtual address field of all
>> + * EFI_MEMORY_RUNTIME entries to 0. This will signal
>> + * the incoming kernel that no virtual translation has
>> + * been installed.
>> + */
>> + for (l = 0; l < map_size; l += desc_size) {
>> + efi_memory_desc_t *p = (void *)memory_map + l;
>> +
>> + if (p->attribute & EFI_MEMORY_RUNTIME)
>> + p->virt_addr = 0;
>> + }
>> + }
>> + return EFI_SUCCESS;
>> + }
>>
>> pr_efi_err(sys_table, "Exit boot services failed.\n");
>>
>> --
>> 1.8.3.2
>
> Apart from this, and other comments in the thread, looks good.
>
> /
> Leif
>
More information about the linux-arm-kernel
mailing list