[PATCH v5 2/2] arm64: mm: install KPTI nG mappings with MMU enabled
Mark Rutland
mark.rutland at arm.com
Tue Jun 14 02:01:36 PDT 2022
On Thu, Jun 09, 2022 at 07:43:20PM +0200, Ard Biesheuvel wrote:
> In cases where we unmap the kernel while running in user space, we rely
> on ASIDs to distinguish the minimal trampoline from the full kernel
> mapping, and this means we must use non-global attributes for those
> mappings, to ensure they are scoped by ASID and will not hit in the TLB
> inadvertently.
>
> We only do this when needed, as this is generally more costly in terms
> of TLB pressure, and so we boot without these non-global attributes, and
> apply them to all existing kernel mappings once all CPUs are up and we
> know whether or not the non-global attributes are needed. At this point,
> we cannot simply unmap and remap the entire address space, so we have to
> update all existing block and page descriptors in place.
>
> Currently, we go through a lot of trouble to perform these updates with
> the MMU and caches off, to avoid violating break before make (BBM) rules
> imposed by the architecture. Since we make changes to page tables that
> are not covered by the ID map, we gain access to those descriptors by
> disabling translations altogether. This means that the stores to memory
> are issued with device attributes, and require extra care in terms of
> coherency, which is costly. We also rely on the ID map to access a
> shared flag, which requires the ID map to be executable and writable at
> the same time, which is another thing we'd prefer to avoid.
>
> So let's switch to an approach where we replace the kernel mapping with
> a minimal mapping of a few pages that can be used for a minimal, ad-hoc
> fixmap that we can use to map each page table in turn as we traverse the
> hierarchy.
>
> Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
This addresses all my concerns, so FWIW:
Reviewed-by: Mark Rutland <mark.rutland at arm.com>
Thanks for respinning this, and sorry I didn't post the version I promised!
Mark.
> ---
> arch/arm64/kernel/cpufeature.c | 54 ++++++++++++-
> arch/arm64/mm/mmu.c | 7 ++
> arch/arm64/mm/proc.S | 81 +++++++++++++-------
> 3 files changed, 113 insertions(+), 29 deletions(-)
>
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index 42ea2bd856c6..c2a64c9e451e 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -1645,14 +1645,34 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
> }
>
> #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
> +#define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT))
> +
> +extern
> +void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
> + phys_addr_t size, pgprot_t prot,
> + phys_addr_t (*pgtable_alloc)(int), int flags);
> +
> +static phys_addr_t kpti_ng_temp_alloc;
> +
> +static phys_addr_t kpti_ng_pgd_alloc(int shift)
> +{
> + kpti_ng_temp_alloc -= PAGE_SIZE;
> + return kpti_ng_temp_alloc;
> +}
> +
> static void __nocfi
> kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
> {
> - typedef void (kpti_remap_fn)(int, int, phys_addr_t);
> + typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long);
> extern kpti_remap_fn idmap_kpti_install_ng_mappings;
> kpti_remap_fn *remap_fn;
>
> int cpu = smp_processor_id();
> + int levels = CONFIG_PGTABLE_LEVELS;
> + int order = order_base_2(levels);
> + u64 kpti_ng_temp_pgd_pa = 0;
> + pgd_t *kpti_ng_temp_pgd;
> + u64 alloc = 0;
>
> if (__this_cpu_read(this_cpu_vector) == vectors) {
> const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI);
> @@ -1670,12 +1690,40 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
>
> remap_fn = (void *)__pa_symbol(function_nocfi(idmap_kpti_install_ng_mappings));
>
> + if (!cpu) {
> + alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order);
> + kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE);
> + kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd);
> +
> + //
> + // Create a minimal page table hierarchy that permits us to map
> + // the swapper page tables temporarily as we traverse them.
> + //
> + // The physical pages are laid out as follows:
> + //
> + // +--------+-/-------+-/------ +-\\--------+
> + // : PTE[] : | PMD[] : | PUD[] : || PGD[] :
> + // +--------+-\-------+-\------ +-//--------+
> + // ^
> + // The first page is mapped into this hierarchy at a PMD_SHIFT
> + // aligned virtual address, so that we can manipulate the PTE
> + // level entries while the mapping is active. The first entry
> + // covers the PTE[] page itself, the remaining entries are free
> + // to be used as a ad-hoc fixmap.
> + //
> + create_kpti_ng_temp_pgd(kpti_ng_temp_pgd, __pa(alloc),
> + KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL,
> + kpti_ng_pgd_alloc, 0);
> + }
> +
> cpu_install_idmap();
> - remap_fn(cpu, num_online_cpus(), __pa_symbol(swapper_pg_dir));
> + remap_fn(cpu, num_online_cpus(), kpti_ng_temp_pgd_pa, KPTI_NG_TEMP_VA);
> cpu_uninstall_idmap();
>
> - if (!cpu)
> + if (!cpu) {
> + free_pages(alloc, order);
> arm64_use_ng_mappings = true;
> + }
> }
> #else
> static void
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index be4d6c3f5692..c5563ff990da 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -388,6 +388,13 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
> } while (pgdp++, addr = next, addr != end);
> }
>
> +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
> +extern __alias(__create_pgd_mapping)
> +void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
> + phys_addr_t size, pgprot_t prot,
> + phys_addr_t (*pgtable_alloc)(int), int flags);
> +#endif
> +
> static phys_addr_t __pgd_pgtable_alloc(int shift)
> {
> void *ptr = (void *)__get_free_page(GFP_PGTABLE_KERNEL);
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index 660887152dba..972ce8d7f2c5 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -14,6 +14,7 @@
> #include <asm/asm-offsets.h>
> #include <asm/asm_pointer_auth.h>
> #include <asm/hwcap.h>
> +#include <asm/kernel-pgtable.h>
> #include <asm/pgtable-hwdef.h>
> #include <asm/cpufeature.h>
> #include <asm/alternative.h>
> @@ -200,20 +201,19 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1)
> .popsection
>
> #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
> +
> +#define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
> +
> .pushsection ".idmap.text", "awx"
>
> .macro kpti_mk_tbl_ng, type, num_entries
> add end_\type\()p, cur_\type\()p, #\num_entries * 8
> .Ldo_\type:
> - dc cvac, cur_\type\()p // Ensure any existing dirty
> - dmb sy // lines are written back before
> - ldr \type, [cur_\type\()p] // loading the entry
> + ldr \type, [cur_\type\()p] // Load the entry
> tbz \type, #0, .Lnext_\type // Skip invalid and
> tbnz \type, #11, .Lnext_\type // non-global entries
> orr \type, \type, #PTE_NG // Same bit for blocks and pages
> - str \type, [cur_\type\()p] // Update the entry and ensure
> - dmb sy // that it is visible to all
> - dc civac, cur_\()\type\()p // CPUs.
> + str \type, [cur_\type\()p] // Update the entry
> .ifnc \type, pte
> tbnz \type, #1, .Lderef_\type
> .endif
> @@ -223,8 +223,29 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1)
> b.ne .Ldo_\type
> .endm
>
> + /*
> + * Dereference the current table entry and map it into the temporary
> + * fixmap slot associated with the current level.
> + */
> + .macro kpti_map_pgtbl, type, level
> + str xzr, [temp_pte, #8 * (\level + 1)] // break before make
> + dsb nshst
> + add pte, temp_pte, #PAGE_SIZE * (\level + 1)
> + lsr pte, pte, #12
> + tlbi vaae1, pte
> + dsb nsh
> + isb
> +
> + phys_to_pte pte, cur_\type\()p
> + add cur_\type\()p, temp_pte, #PAGE_SIZE * (\level + 1)
> + orr pte, pte, pte_flags
> + str pte, [temp_pte, #8 * (\level + 1)]
> + dsb nshst
> + .endm
> +
> /*
> - * void __kpti_install_ng_mappings(int cpu, int num_cpus, phys_addr_t swapper)
> + * void __kpti_install_ng_mappings(int cpu, int num_secondaries, phys_addr_t temp_pgd,
> + * unsigned long temp_pte_va)
> *
> * Called exactly once from stop_machine context by each CPU found during boot.
> */
> @@ -232,8 +253,10 @@ __idmap_kpti_flag:
> .long 1
> SYM_FUNC_START(idmap_kpti_install_ng_mappings)
> cpu .req w0
> + temp_pte .req x0
> num_cpus .req w1
> - swapper_pa .req x2
> + pte_flags .req x1
> + temp_pgd_phys .req x2
> swapper_ttb .req x3
> flag_ptr .req x4
> cur_pgdp .req x5
> @@ -246,9 +269,10 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings)
> cur_ptep .req x14
> end_ptep .req x15
> pte .req x16
> + valid .req x17
>
> + mov x5, x3 // preserve temp_pte arg
> mrs swapper_ttb, ttbr1_el1
> - restore_ttbr1 swapper_ttb
> adr flag_ptr, __idmap_kpti_flag
>
> cbnz cpu, __idmap_kpti_secondary
> @@ -260,28 +284,28 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings)
> eor w17, w17, num_cpus
> cbnz w17, 1b
>
> - /* We need to walk swapper, so turn off the MMU. */
> - pre_disable_mmu_workaround
> - mrs x17, sctlr_el1
> - bic x17, x17, #SCTLR_ELx_M
> - msr sctlr_el1, x17
> + /* Switch to the temporary page tables on this CPU only */
> + __idmap_cpu_set_reserved_ttbr1 x8, x9
> + offset_ttbr1 temp_pgd_phys, x8
> + msr ttbr1_el1, temp_pgd_phys
> isb
>
> + mov temp_pte, x5
> + mov pte_flags, #KPTI_NG_PTE_FLAGS
> +
> /* Everybody is enjoying the idmap, so we can rewrite swapper. */
> /* PGD */
> - mov cur_pgdp, swapper_pa
> + adrp cur_pgdp, swapper_pg_dir
> + kpti_map_pgtbl pgd, 0
> kpti_mk_tbl_ng pgd, PTRS_PER_PGD
>
> - /* Publish the updated tables and nuke all the TLBs */
> - dsb sy
> - tlbi vmalle1is
> - dsb ish
> - isb
> + /* Ensure all the updated entries are visible to secondary CPUs */
> + dsb ishst
>
> - /* We're done: fire up the MMU again */
> - mrs x17, sctlr_el1
> - orr x17, x17, #SCTLR_ELx_M
> - set_sctlr_el1 x17
> + /* We're done: fire up swapper_pg_dir again */
> + __idmap_cpu_set_reserved_ttbr1 x8, x9
> + msr ttbr1_el1, swapper_ttb
> + isb
>
> /* Set the flag to zero to indicate that we're all done */
> str wzr, [flag_ptr]
> @@ -292,6 +316,7 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings)
> .if CONFIG_PGTABLE_LEVELS > 3
> pud .req x10
> pte_to_phys cur_pudp, pgd
> + kpti_map_pgtbl pud, 1
> kpti_mk_tbl_ng pud, PTRS_PER_PUD
> b .Lnext_pgd
> .else /* CONFIG_PGTABLE_LEVELS <= 3 */
> @@ -304,6 +329,7 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings)
> .if CONFIG_PGTABLE_LEVELS > 2
> pmd .req x13
> pte_to_phys cur_pmdp, pud
> + kpti_map_pgtbl pmd, 2
> kpti_mk_tbl_ng pmd, PTRS_PER_PMD
> b .Lnext_pud
> .else /* CONFIG_PGTABLE_LEVELS <= 2 */
> @@ -314,12 +340,15 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings)
> .Lderef_pmd:
> /* PTE */
> pte_to_phys cur_ptep, pmd
> + kpti_map_pgtbl pte, 3
> kpti_mk_tbl_ng pte, PTRS_PER_PTE
> b .Lnext_pmd
>
> .unreq cpu
> + .unreq temp_pte
> .unreq num_cpus
> - .unreq swapper_pa
> + .unreq pte_flags
> + .unreq temp_pgd_phys
> .unreq cur_pgdp
> .unreq end_pgdp
> .unreq pgd
> @@ -332,6 +361,7 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings)
> .unreq cur_ptep
> .unreq end_ptep
> .unreq pte
> + .unreq valid
>
> /* Secondary CPUs end up here */
> __idmap_kpti_secondary:
> @@ -351,7 +381,6 @@ __idmap_kpti_secondary:
> cbnz w16, 1b
>
> /* All done, act like nothing happened */
> - offset_ttbr1 swapper_ttb, x16
> msr ttbr1_el1, swapper_ttb
> isb
> ret
> --
> 2.30.2
>
More information about the linux-arm-kernel
mailing list