[PATCH v6 3/6] arm64: head: record the MMU state at primary entry

Ard Biesheuvel ardb at kernel.org
Thu Dec 1 01:18:50 PST 2022


On Tue, 29 Nov 2022 at 17:14, Ard Biesheuvel <ardb at kernel.org> wrote:
>
> Prepare for being able to deal with primary entry with the MMU and
> caches enabled, by recording whether or not we entered with the MMU on
> in register x19 and in a global variable. (Note that writing to this
> variable does not require cache invalidation, nor is it required for
> storing the bootargs in that case, so omit the cache maintenance).
>
> Since boot with the MMU enabled is not permitted by the bare metal boot
> protocol, ensure that a diagnostic is emitted and a taint bit set if
> the MMU was found to be enabled on a non-EFI boot. We will make an
> exception for EFI boot later, which has strict requirements for the
> mapping of system memory, permitting us to relax the boot protocol and
> hand over from the EFI stub to the core kernel with MMU and caches left
> enabled. To reduce the likelihood that bare metal boot will violate this
> requirement, introduce a separate entry point for EFI boot, which is
> different from the one that is invoked by branching to offset #0 in the
> image.
>

This is actually much more fiddly than I though:
- the existing arm64 stub can link to any symbol so there, it does not
make any difference
- the zboot stub needs to find the right entry point into the image,
as the one pointed to by the header is no longer correct.

I confused myself into thinking that we can just grab it from the
PE/COFF header, but that is the header point *into* the EFI stub, not
the one that the stub calls on its way out.

Grabbing stuff from the PE/COFF header is needed anyway, and can be
done in a generic manner.
Grabbing a special EFI-only entry point means mangling the ELF to find
the relative offset from the start of the binary representation of the
image, and passing that information along with the compressed blob to
the decompressor.

So the options are:
- go back to the old approach (and add a panic() in the right place)
- add a branch to the EFI entrypoint at a known offset (in the image header?)
- other ideas?


> While at it, add 'pre_disable_mmu_workaround' macro invocations to
> init_kernel_el, as its manipulation of SCTLR_ELx may amount to disabling
> of the MMU after subsequent patches.
>
> Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
> ---
>  arch/arm64/kernel/head.S       | 28 ++++++++++++++++++--
>  arch/arm64/kernel/image-vars.h |  2 +-
>  arch/arm64/kernel/setup.c      |  9 +++++--
>  3 files changed, 34 insertions(+), 5 deletions(-)
>
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index bec97aad092c2b43..c3b97f4ae6d769f7 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -77,6 +77,7 @@
>          * primary lowlevel boot path:
>          *
>          *  Register   Scope                      Purpose
> +        *  x19        primary_entry() .. start_kernel()        whether we entered with the MMU on
>          *  x20        primary_entry() .. __primary_switch()    CPU boot mode
>          *  x21        primary_entry() .. start_kernel()        FDT pointer passed at boot in x0
>          *  x22        create_idmap() .. start_kernel()         ID map VA of the DT blob
> @@ -85,8 +86,13 @@
>          *  x25        primary_entry() .. start_kernel()        supported VA size
>          *  x28        create_idmap()                           callee preserved temp register
>          */
> -SYM_CODE_START(primary_entry)
> -       bl      preserve_boot_args
> +SYM_CODE_START(efi_primary_entry)
> +       bl      record_mmu_state
> +       b       0f
> +
> +SYM_INNER_LABEL(primary_entry, SYM_L_LOCAL)
> +       mov     x19, xzr                        // MMU must be off on bare metal boot
> +0:     bl      preserve_boot_args
>         bl      init_kernel_el                  // w0=cpu_boot_mode
>         mov     x20, x0
>         bl      create_idmap
> @@ -109,6 +115,18 @@ SYM_CODE_START(primary_entry)
>         b       __primary_switch
>  SYM_CODE_END(primary_entry)
>
> +SYM_CODE_START_LOCAL(record_mmu_state)
> +       mrs     x19, CurrentEL
> +       cmp     x19, #CurrentEL_EL2
> +       mrs     x19, sctlr_el1
> +       b.ne    0f
> +       mrs     x19, sctlr_el2
> +0:     tst     x19, #SCTLR_ELx_C               // Z := (C == 0)
> +       and     x19, x19, #SCTLR_ELx_M          // isolate M bit
> +       csel    x19, xzr, x19, eq               // clear x19 if Z
> +       ret
> +SYM_CODE_END(record_mmu_state)
> +
>  /*
>   * Preserve the arguments passed by the bootloader in x0 .. x3
>   */
> @@ -119,11 +137,14 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
>         stp     x21, x1, [x0]                   // x0 .. x3 at kernel entry
>         stp     x2, x3, [x0, #16]
>
> +       cbnz    x19, 0f                         // skip cache invalidation if MMU is on
>         dmb     sy                              // needed before dc ivac with
>                                                 // MMU off
>
>         add     x1, x0, #0x20                   // 4 x 8 bytes
>         b       dcache_inval_poc                // tail call
> +0:     str_l   x19, mmu_enabled_at_boot, x0
> +       ret
>  SYM_CODE_END(preserve_boot_args)
>
>  SYM_FUNC_START_LOCAL(clear_page_tables)
> @@ -497,6 +518,7 @@ SYM_FUNC_START(init_kernel_el)
>
>  SYM_INNER_LABEL(init_el1, SYM_L_LOCAL)
>         mov_q   x0, INIT_SCTLR_EL1_MMU_OFF
> +       pre_disable_mmu_workaround
>         msr     sctlr_el1, x0
>         isb
>         mov_q   x0, INIT_PSTATE_EL1
> @@ -529,11 +551,13 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
>         cbz     x0, 1f
>
>         /* Set a sane SCTLR_EL1, the VHE way */
> +       pre_disable_mmu_workaround
>         msr_s   SYS_SCTLR_EL12, x1
>         mov     x2, #BOOT_CPU_FLAG_E2H
>         b       2f
>
>  1:
> +       pre_disable_mmu_workaround
>         msr     sctlr_el1, x1
>         mov     x2, xzr
>  2:
> diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
> index 8151412653de209c..b925094692983734 100644
> --- a/arch/arm64/kernel/image-vars.h
> +++ b/arch/arm64/kernel/image-vars.h
> @@ -11,7 +11,7 @@
>  #endif
>
>  PROVIDE(__efistub_kernel_size          = _edata - _text);
> -PROVIDE(__efistub_primary_entry_offset = primary_entry - _text);
> +PROVIDE(__efistub_primary_entry_offset = efi_primary_entry - _text);
>
>  /*
>   * The EFI stub has its own symbol namespace prefixed by __efistub_, to
> diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
> index 12cfe9d0d3fac10d..0ac8605a8efe00a5 100644
> --- a/arch/arm64/kernel/setup.c
> +++ b/arch/arm64/kernel/setup.c
> @@ -58,6 +58,7 @@ static int num_standard_resources;
>  static struct resource *standard_resources;
>
>  phys_addr_t __fdt_pointer __initdata;
> +u64 mmu_enabled_at_boot __initdata;
>
>  /*
>   * Standard memory resources
> @@ -332,8 +333,12 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
>         xen_early_init();
>         efi_init();
>
> -       if (!efi_enabled(EFI_BOOT) && ((u64)_text % MIN_KIMG_ALIGN) != 0)
> -            pr_warn(FW_BUG "Kernel image misaligned at boot, please fix your bootloader!");
> +       if (!efi_enabled(EFI_BOOT)) {
> +               if ((u64)_text % MIN_KIMG_ALIGN)
> +                       pr_warn(FW_BUG "Kernel image misaligned at boot, please fix your bootloader!");
> +               WARN_TAINT(mmu_enabled_at_boot, TAINT_FIRMWARE_WORKAROUND,
> +                          FW_BUG "Booted with MMU enabled!");
> +       }
>
>         arm64_memblock_init();
>
> --
> 2.35.1
>



More information about the linux-arm-kernel mailing list