[PATCH] arm64: mm: install KPTI nG mappings with MMU enabled

Ard Biesheuvel ardb at kernel.org
Tue Apr 12 03:07:59 PDT 2022


On Tue, 12 Apr 2022 at 11:29, Ard Biesheuvel <ardb at kernel.org> wrote:
>
> In cases where we unmap the kernel while running in user space, we rely
> on ASIDs to distinguish the minimal trampoline from the full kernel
> mapping, and this means we must use non-global attributes for those
> mappings, to ensure they are scoped by ASID and will not hit in the TLB
> inadvertently.
>
> We only do this when needed, as this is generally more costly in terms
> of TLB pressure, and so we boot without these non-global attributes, and
> apply them to all existing kernel mappings once all CPUs are up and we
> know whether or not the non-global attributes are needed. At this point,
> we cannot simply unmap and remap the entire address space, so we have to
> update all existing block and page descriptors in place.
>
> Currently, we go through a lot of trouble to perform these updates with
> the MMU and caches off, to avoid violating break before make (BBM) rules
> imposed by the architecture. Since we make changes to page tables that
> are not covered by the ID map, we gain access to those descriptors by
> disabling translations altogether. This means that the stores to memory
> are issued with device attributes, and require extra care in terms of
> coherency, which is costly. We also rely on the ID map to access a
> shared flag, which requires the ID map to be executable and writable at
> the same time, which is another thing we'd prefer to avoid.
>
> So let's switch to an approach where we replace the kernel mapping with
> a minimal mapping of a few pages that can be used for the shared flag,
> as well as a minimal, ad-hoc fixmap that we can use to map each page
> table in turn as we traverse the hierarchy. This requires one PTE per
> level, and an associated page worth of VA space in the temporary
> mapping.
>
> Note that table entries do not have a non-global attribute, so avoid
> setting this bit unnecessarily as well.
>
> Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
> ---
>  arch/arm64/include/asm/mmu.h   |  4 ++
>  arch/arm64/kernel/cpufeature.c | 51 ++++++++++++++++--
>  arch/arm64/mm/mmu.c            |  8 ++-
>  arch/arm64/mm/proc.S           | 98 +++++++++++++++++++---------------
>  4 files changed, 109 insertions(+), 52 deletions(-)
>
...
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index abc3696bd601..33b1517f2e37 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -14,6 +14,7 @@
>  #include <asm/asm-offsets.h>
>  #include <asm/asm_pointer_auth.h>
>  #include <asm/hwcap.h>
> +#include <asm/kernel-pgtable.h>
>  #include <asm/pgtable-hwdef.h>
>  #include <asm/cpufeature.h>
>  #include <asm/alternative.h>
> @@ -167,8 +168,7 @@ SYM_FUNC_END(cpu_do_resume)
>
>         .pushsection ".idmap.text", "awx"
>
> -.macro __idmap_cpu_set_reserved_ttbr1, tmp1, tmp2
> -       adrp    \tmp1, reserved_pg_dir
> +.macro __idmap_cpu_set_ttbr1, tmp1, tmp2
>         phys_to_ttbr \tmp2, \tmp1
>         offset_ttbr1 \tmp2, \tmp1
>         msr     ttbr1_el1, \tmp2
> @@ -187,7 +187,8 @@ SYM_FUNC_END(cpu_do_resume)
>  SYM_FUNC_START(idmap_cpu_replace_ttbr1)
>         save_and_disable_daif flags=x2
>
> -       __idmap_cpu_set_reserved_ttbr1 x1, x3
> +       adrp    x1, reserved_pg_dir
> +       __idmap_cpu_set_ttbr1 x1, x3
>
>         offset_ttbr1 x0, x3
>         msr     ttbr1_el1, x0
> @@ -200,36 +201,52 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1)
>         .popsection
>
>  #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
> +
> +#define KPTI_NG_PTE_FLAGS      (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
> +
>         .pushsection ".idmap.text", "awx"
>
>         .macro  __idmap_kpti_get_pgtable_ent, type
> -       dc      cvac, cur_\()\type\()p          // Ensure any existing dirty
> -       dmb     sy                              // lines are written back before
> -       ldr     \type, [cur_\()\type\()p]       // loading the entry
> -       tbz     \type, #0, skip_\()\type        // Skip invalid and
> -       tbnz    \type, #11, skip_\()\type       // non-global entries
> +       ldr     \type, [cur_\()\type\()p]       // Load the entry
> +       tbz     \type, #0, next_\()\type        // Skip invalid and
> +       tbnz    \type, #11, next_\()\type       // non-global entries
>         .endm
>
>         .macro __idmap_kpti_put_pgtable_ent_ng, type
>         orr     \type, \type, #PTE_NG           // Same bit for blocks and pages
> -       str     \type, [cur_\()\type\()p]       // Update the entry and ensure
> -       dmb     sy                              // that it is visible to all
> -       dc      civac, cur_\()\type\()p         // CPUs.
> +       str     \type, [cur_\()\type\()p]       // Update the entry
> +       .endm
> +
> +       /*
> +        * Dereference the current table entry and map it into the temporary
> +        * page table slot associated with the current level. The ad-hoc fixmap
> +        * is a set of PTEs that is located above the PTEs that cover the level 3
> +        * page table and the scratch page that precedes it.
> +        */
> +       .macro  __idmap_kpti_map_pgtable, type, level
> +       phys_to_pte cur_\type\()p, cur_\type\()p
> +       orr     cur_\type\()p, cur_\type\()p, pte_flags
> +       str     cur_\type\()p, [temp_pte, #8 * (\level + 2)]

Just realised that this needs a break before make, and that the first
DSB probably needs ISHST scope as well.

> +       add     cur_\type\()p, flag_ptr, #PAGE_SIZE * (\level + 2)
> +       dsb     nshst
> +       tlbi    vaae1is, cur_\type\()p
> +       dsb     nsh
>         .endm
>



More information about the linux-arm-kernel mailing list