[PATCH 4/7] arm64: kpti: Add ->enable callback to remap swapper using nG mappings
Marc Zyngier
marc.zyngier at arm.com
Fri Jan 26 05:40:50 PST 2018
Hey Will,
On 26/01/18 12:03, Will Deacon wrote:
> Defaulting to global mappings for kernel space is generally good for
> performance and appears to be necessary for Cavium ThunderX. If we
> subsequently decide that we need to enable kpti, then we need to rewrite
> our existing page table entries to be non-global. This is fiddly, and
> made worse by the possible use of contiguous mappings, which require
> a strict break-before-make sequence.
>
> Since the enable callback runs on each online CPU from stop_machine
> context, we can have all CPUs enter the idmap, where secondaries can
> wait for the primary CPU to rewrite swapper with its MMU off. It's all
> fairly horrible, but at least it only runs once.
>
> Signed-off-by: Will Deacon <will.deacon at arm.com>
> ---
> arch/arm64/include/asm/assembler.h | 10 ++
> arch/arm64/kernel/cpufeature.c | 25 +++++
> arch/arm64/mm/proc.S | 204 +++++++++++++++++++++++++++++++++++--
> 3 files changed, 231 insertions(+), 8 deletions(-)
>
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index 3873dd7b5a32..23251eae6e8a 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -523,6 +523,16 @@ alternative_endif
> #endif
> .endm
>
> + .macro pte_to_phys, phys, pte
> +#ifdef CONFIG_ARM64_PA_BITS_52
> + ror \phys, \pte, #16
> + bfi \phys, \phys, #(16 + 12), #32
> + lsr \phys, \phys, #12
> +#else
> + and \phys, \pte, #PTE_ADDR_MASK
> +#endif
> + .endm
> +
> /**
> * Errata workaround prior to disable MMU. Insert an ISB immediately prior
> * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0.
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index 4b6f9051cf0c..cd9f46952db3 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -880,6 +880,30 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
> ID_AA64PFR0_CSV3_SHIFT);
> }
>
> +static int kpti_install_ng_mappings(void *__unused)
> +{
> + typedef void (kpti_remap_fn)(int, int, phys_addr_t);
> + extern kpti_remap_fn idmap_kpti_install_ng_mappings;
> + kpti_remap_fn *remap_fn;
> +
> + static bool kpti_applied = false;
> + int cpu = smp_processor_id();
> +
> + if (kpti_applied)
> + return 0;
> +
> + remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings);
> +
> + cpu_install_idmap();
> + remap_fn(cpu, num_online_cpus(), __pa_symbol(swapper_pg_dir));
> + cpu_uninstall_idmap();
> +
> + if (!cpu)
> + kpti_applied = true;
> +
> + return 0;
> +}
> +
> static int __init parse_kpti(char *str)
> {
> bool enabled;
> @@ -1003,6 +1027,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
> .capability = ARM64_UNMAP_KERNEL_AT_EL0,
> .def_scope = SCOPE_SYSTEM,
> .matches = unmap_kernel_at_el0,
> + .enable = kpti_install_ng_mappings,
> },
> #endif
> {
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index 9f177aac6390..cc7d2389edc8 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -166,6 +166,17 @@ ENTRY(cpu_do_switch_mm)
> ENDPROC(cpu_do_switch_mm)
>
> .pushsection ".idmap.text", "ax"
> +
> +.macro __idmap_cpu_set_reserved_ttbr1, tmp1, tmp2
> + adrp \tmp1, empty_zero_page
> + phys_to_ttbr \tmp1, \tmp2
> + msr ttbr1_el1, \tmp2
> + isb
> + tlbi vmalle1
> + dsb nsh
> + isb
> +.endm
> +
> /*
> * void idmap_cpu_replace_ttbr1(phys_addr_t new_pgd)
> *
> @@ -175,14 +186,7 @@ ENDPROC(cpu_do_switch_mm)
> ENTRY(idmap_cpu_replace_ttbr1)
> save_and_disable_daif flags=x2
>
> - adrp x1, empty_zero_page
> - phys_to_ttbr x1, x3
> - msr ttbr1_el1, x3
> - isb
> -
> - tlbi vmalle1
> - dsb nsh
> - isb
> + __idmap_cpu_set_reserved_ttbr1 x1, x3
>
> phys_to_ttbr x0, x3
> msr ttbr1_el1, x3
> @@ -194,6 +198,190 @@ ENTRY(idmap_cpu_replace_ttbr1)
> ENDPROC(idmap_cpu_replace_ttbr1)
> .popsection
>
> +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
> + .pushsection ".idmap.text", "ax"
> +
> + .macro __idmap_kpti_get_pgtable_ent, type
> + dc cvac, cur_\()\type\()p // Ensure any existing dirty
> + dmb sy // lines are written back before
> + ldr \type, [cur_\()\type\()p] // loading the entry
> + tbz \type, #0, next_\()\type // Skip invalid entries
> + .endm
> +
> + .macro __idmap_kpti_put_pgtable_ent_ng, type
> + orr \type, \type, #PTE_NG // Same bit for blocks and pages
> + str \type, [cur_\()\type\()p] // Update the entry and ensure it
> + dc civac, cur_\()\type\()p // is visible to all CPUs.
> + .endm
> +
> +/*
> + * void __kpti_install_ng_mappings(int cpu, int num_cpus, phys_addr_t swapper)
> + *
> + * Called exactly once from stop_machine context by each CPU found during boot.
> + */
> +__idmap_kpti_flag:
> + .long 1
I'm a bit concerned that we're placing the counter in a section that is
not writeable (at least in theory). We can either have the idmap
writeable entirely, or have a idmap data section (which means extending
it to be more than a single page).
Not sure if that's a concern, but I thought I'd raise it. The core works
pretty well as is, so it cannot be completely wrong... ;-)
> +ENTRY(idmap_kpti_install_ng_mappings)
> + cpu .req w0
> + num_cpus .req w1
> + swapper_pa .req x2
> + swapper_ttb .req x3
> + flag_ptr .req x4
> + cur_pgdp .req x5
> + end_pgdp .req x6
> + pgd .req x7
> + cur_pudp .req x8
> + end_pudp .req x9
> + pud .req x10
> + cur_pmdp .req x11
> + end_pmdp .req x12
> + pmd .req x13
> + cur_ptep .req x14
> + end_ptep .req x15
> + pte .req x16
> +
> + mrs swapper_ttb, ttbr1_el1
> + adr flag_ptr, __idmap_kpti_flag
> +
> + cbnz cpu, __idmap_kpti_secondary
> +
> + /* We're the boot CPU. Wait for the others to catch up */
> + sevl
> +1: wfe
> + ldaxr w18, [flag_ptr]
> + eor w18, w18, num_cpus
> + cbnz w18, 1b
> +
> + /* We need to walk swapper, so turn off the MMU. */
> + pre_disable_mmu_workaround
> + mrs x18, sctlr_el1
> + bic x18, x18, #1
nit: You should be able to use SCTLR_ELx_M, as we already include
sysreg.h (indirectly).
> + msr sctlr_el1, x18
> + isb
> +
> + /* Everybody is enjoying the idmap, so we can rewrite swapper. */
> + /* PGD */
> + mov cur_pgdp, swapper_pa
> + add end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8)
> +do_pgd: __idmap_kpti_get_pgtable_ent pgd
> + tbnz pgd, #1, walk_puds
> + __idmap_kpti_put_pgtable_ent_ng pgd
> +next_pgd:
> + add cur_pgdp, cur_pgdp, #8
> + cmp cur_pgdp, end_pgdp
> + b.ne do_pgd
> +
> + /* Publish the updated tables and nuke all the TLBs */
> + dsb sy
> + tlbi vmalle1is
> + dsb ish
> + isb
> +
> + /* We're done: fire up the MMU again */
> + mrs x18, sctlr_el1
> + orr x18, x18, #1
Same here.
> + msr sctlr_el1, x18
> + isb
> +
> + /* Set the flag to zero to indicate that we're all done */
> + str wzr, [flag_ptr]
> + ret
> +
> + /* PUD */
> +walk_puds:
> + .if CONFIG_PGTABLE_LEVELS > 3
> + pte_to_phys cur_pudp, pgd
> + add end_pudp, cur_pudp, #(PTRS_PER_PUD * 8)
> +do_pud: __idmap_kpti_get_pgtable_ent pud
> + tbnz pud, #1, walk_pmds
> + __idmap_kpti_put_pgtable_ent_ng pud
> +next_pud:
> + add cur_pudp, cur_pudp, 8
> + cmp cur_pudp, end_pudp
> + b.ne do_pud
> + b next_pgd
> + .else /* CONFIG_PGTABLE_LEVELS <= 3 */
> + mov pud, pgd
> + b walk_pmds
> +next_pud:
> + b next_pgd
> + .endif
> +
> + /* PMD */
> +walk_pmds:
> + .if CONFIG_PGTABLE_LEVELS > 2
> + pte_to_phys cur_pmdp, pud
> + add end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8)
> +do_pmd: __idmap_kpti_get_pgtable_ent pmd
> + tbnz pmd, #1, walk_ptes
> + __idmap_kpti_put_pgtable_ent_ng pmd
> +next_pmd:
> + add cur_pmdp, cur_pmdp, #8
> + cmp cur_pmdp, end_pmdp
> + b.ne do_pmd
> + b next_pud
> + .else /* CONFIG_PGTABLE_LEVELS <= 2 */
> + mov pmd, pud
> + b walk_ptes
> +next_pmd:
> + b next_pud
> + .endif
> +
> + /* PTE */
> +walk_ptes:
> + pte_to_phys cur_ptep, pmd
> + add end_ptep, cur_ptep, #(PTRS_PER_PTE * 8)
> +do_pte: __idmap_kpti_get_pgtable_ent pte
> + __idmap_kpti_put_pgtable_ent_ng pte
> +next_pte:
> + add cur_ptep, cur_ptep, #8
> + cmp cur_ptep, end_ptep
> + b.ne do_pte
> + b next_pmd
> +
> + /* Secondary CPUs end up here */
> +__idmap_kpti_secondary:
> + /* Uninstall swapper before surgery begins */
> + __idmap_cpu_set_reserved_ttbr1 x18, x17
> +
> + /* Increment the flag to let the boot CPU we're ready */
> +1: ldxr w18, [flag_ptr]
> + add w18, w18, #1
> + stxr w17, w18, [flag_ptr]
> + cbnz w17, 1b
> +
> + /* Wait for the boot CPU to finish messing around with swapper */
> + sevl
> +1: wfe
> + ldxr w18, [flag_ptr]
> + cbnz w18, 1b
> +
> + /* All done, act like nothing happened */
> + msr ttbr1_el1, swapper_ttb
> + isb
> + ret
> +
> + .unreq cpu
> + .unreq num_cpus
> + .unreq swapper_pa
> + .unreq swapper_ttb
> + .unreq flag_ptr
> + .unreq cur_pgdp
> + .unreq end_pgdp
> + .unreq pgd
> + .unreq cur_pudp
> + .unreq end_pudp
> + .unreq pud
> + .unreq cur_pmdp
> + .unreq end_pmdp
> + .unreq pmd
> + .unreq cur_ptep
> + .unreq end_ptep
> + .unreq pte
> +ENDPROC(idmap_kpti_install_ng_mappings)
> + .popsection
> +#endif
> +
> /*
> * __cpu_setup
> *
>
Thanks,
M.
--
Jazz is not dead. It just smells funny...
More information about the linux-arm-kernel
mailing list