[PATCH v2 14/19] arm64: Enable LPA2 at boot if supported by the system
Ryan Roberts
ryan.roberts at arm.com
Mon Nov 28 06:54:21 PST 2022
On 24/11/2022 12:39, Ard Biesheuvel wrote:
> Update the early kernel mapping code to take 52-bit virtual addressing
> into account based on the LPA2 feature. This is a bit more involved than
> LVA (which is supported with 64k pages only), given that some page table
> descriptor bits change meaning in this case.
>
> To keep the handling in asm to a minimum, the initial ID map is still
> created with 48-bit virtual addressing, which implies that the kernel
> image must be loaded into 48-bit addressable physical memory. This is
> currently required by the boot protocol, even though we happen to
> support placement outside of that for LVA/64k based configurations.
>
> Enabling LPA2 involves more than setting TCR.T1SZ to a lower value,
> there is also a DS bit in TCR that needs to be set, and which changes
> the meaning of bits [9:8] in all page table descriptors. Since we cannot
> enable DS and every live page table descriptor at the same time, let's
> pivot through another temporary mapping. This avoids the need to
> reintroduce manipulations of the page tables with the MMU and caches
> disabled.
>
> To permit the LPA2 feature to be overridden on the kernel command line,
> which may be necessary to work around silicon errata, or to deal with
> mismatched features on heterogeneous SoC designs, test for CPU feature
> overrides first, and only then enable LPA2.
>
> Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
> ---
> arch/arm64/include/asm/assembler.h | 7 +-
> arch/arm64/include/asm/kernel-pgtable.h | 25 +++--
> arch/arm64/include/asm/memory.h | 4 +
> arch/arm64/kernel/head.S | 9 +-
> arch/arm64/kernel/image-vars.h | 2 +
> arch/arm64/kernel/pi/map_kernel.c | 103 +++++++++++++++++++-
> arch/arm64/mm/init.c | 2 +-
> arch/arm64/mm/mmu.c | 8 +-
> arch/arm64/mm/proc.S | 4 +
> 9 files changed, 151 insertions(+), 13 deletions(-)
>
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index 786bf62826a8..30eee6473cf0 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -609,11 +609,16 @@ alternative_endif
> * but we have to add an offset so that the TTBR1 address corresponds with the
> * pgdir entry that covers the lowest 48-bit addressable VA.
> *
> + * Note that this trick only works for LVA/64k pages - LPA2/4k pages uses an
> + * additional paging level, and on LPA2/16k pages, we would end up with a TTBR
> + * address that is not 64 byte aligned, so there we reduce the number of paging
> + * levels for the non-LPA2 case.
> + *
> * orr is used as it can cover the immediate value (and is idempotent).
> * ttbr: Value of ttbr to set, modified.
> */
> .macro offset_ttbr1, ttbr, tmp
> -#ifdef CONFIG_ARM64_VA_BITS_52
> +#if defined(CONFIG_ARM64_VA_BITS_52) && !defined(CONFIG_ARM64_LPA2)
> mrs \tmp, tcr_el1
> and \tmp, \tmp, #TCR_T1SZ_MASK
> cmp \tmp, #TCR_T1SZ(VA_BITS_MIN)
> diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
> index faa11e8b4a0e..2359b2af0c4c 100644
> --- a/arch/arm64/include/asm/kernel-pgtable.h
> +++ b/arch/arm64/include/asm/kernel-pgtable.h
> @@ -20,12 +20,16 @@
> */
> #ifdef CONFIG_ARM64_4K_PAGES
> #define INIT_IDMAP_USES_PMD_MAPS 1
> -#define INIT_IDMAP_TABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1)
> #else
> #define INIT_IDMAP_USES_PMD_MAPS 0
> -#define INIT_IDMAP_TABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
> #endif
>
> +/* how many levels of translation are required to cover 'x' bits of VA space */
> +#define VA_LEVELS(x) (((x) - 4) / (PAGE_SHIFT - 3))
> +#define INIT_IDMAP_TABLE_LEVELS (VA_LEVELS(VA_BITS_MIN) - INIT_IDMAP_USES_PMD_MAPS)
> +
> +#define INIT_IDMAP_ROOT_SHIFT (VA_LEVELS(VA_BITS_MIN) * (PAGE_SHIFT - 3) + 3)
> +
> /*
> * If KASLR is enabled, then an offset K is added to the kernel address
> * space. The bottom 21 bits of this offset are zero to guarantee 2MB
> @@ -52,7 +56,14 @@
> #define EARLY_ENTRIES(vstart, vend, shift, add) \
> ((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1 + add)
>
> -#define EARLY_PGDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT, add))
> +#if CONFIG_PGTABLE_LEVELS > 4
> +/* the kernel is covered entirely by the pgd_t at the top of the VA space */
> +#define EARLY_PGDS 1
> +#else
> +#define EARLY_PGDS 0
> +#endif
> +
> +#define EARLY_P4DS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, INIT_IDMAP_ROOT_SHIFT, add))
>
> #if INIT_IDMAP_TABLE_LEVELS > 3
> #define EARLY_PUDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PUD_SHIFT, add))
> @@ -66,11 +77,13 @@
> #define EARLY_PMDS(vstart, vend, add) (0)
> #endif
>
> -#define EARLY_PAGES(vstart, vend, add) ( 1 /* PGDIR page */ \
> - + EARLY_PGDS((vstart), (vend), add) /* each PGDIR needs a next level page table */ \
> +#define EARLY_PAGES(vstart, vend, add) ( 1 /* PGDIR/P4D page */ \
> + + EARLY_P4DS((vstart), (vend), add) /* each P4D needs a next level page table */ \
> + EARLY_PUDS((vstart), (vend), add) /* each PUD needs a next level page table */ \
> + EARLY_PMDS((vstart), (vend), add)) /* each PMD needs a next level page table */
> -#define INIT_DIR_SIZE (PAGE_SIZE * (EARLY_PAGES(KIMAGE_VADDR, _end, EARLY_KASLR) + EARLY_SEGMENT_EXTRA_PAGES))
> +
> +#define INIT_DIR_SIZE (PAGE_SIZE * (EARLY_PAGES(KIMAGE_VADDR, _end, EARLY_KASLR) + \
> + EARLY_SEGMENT_EXTRA_PAGES + EARLY_PGDS))
>
> /* the initial ID map may need two extra pages if it needs to be extended */
> #if VA_BITS_MIN < 48
> diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
> index b3826ff2e52b..4f617e271008 100644
> --- a/arch/arm64/include/asm/memory.h
> +++ b/arch/arm64/include/asm/memory.h
> @@ -54,7 +54,11 @@
> #define FIXADDR_TOP (VMEMMAP_START - SZ_32M)
>
> #if VA_BITS > 48
> +#ifdef CONFIG_ARM64_16K_PAGES
> +#define VA_BITS_MIN (47)
> +#else
> #define VA_BITS_MIN (48)
> +#endif
> #else
> #define VA_BITS_MIN (VA_BITS)
> #endif
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 94de42dfe97d..6be121949c06 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -198,7 +198,7 @@ SYM_CODE_END(preserve_boot_args)
> mov \tbl, \sv
> .endif
> .L_\@:
> - compute_indices \vstart, \vend, #PGDIR_SHIFT, \istart, \iend, \count
> + compute_indices \vstart, \vend, #INIT_IDMAP_ROOT_SHIFT, \istart, \iend, \count
> mov \sv, \rtbl
> populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
> mov \tbl, \sv
> @@ -610,9 +610,16 @@ SYM_FUNC_START(__cpu_secondary_check52bitva)
> alternative_if_not ARM64_HAS_LVA
> ret
> alternative_else_nop_endif
> +#ifndef CONFIG_ARM64_LPA2
> mrs_s x0, SYS_ID_AA64MMFR2_EL1
> and x0, x0, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT)
> cbnz x0, 2f
> +#else
> + mrs x0, id_aa64mmfr0_el1
> + sbfx x0, x0, #ID_AA64MMFR0_EL1_TGRAN_SHIFT, 4
> + cmp x0, #ID_AA64MMFR0_EL1_TGRAN_LPA2
> + b.ge 2f
> +#endif
>
> update_early_cpu_boot_status \
> CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_52_BIT_VA, x0, x1
> diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
> index 82bafa1f869c..f48b6f09d278 100644
> --- a/arch/arm64/kernel/image-vars.h
> +++ b/arch/arm64/kernel/image-vars.h
> @@ -56,6 +56,8 @@ PROVIDE(__pi_arm64_sw_feature_override = arm64_sw_feature_override);
> PROVIDE(__pi_arm64_use_ng_mappings = arm64_use_ng_mappings);
> PROVIDE(__pi__ctype = _ctype);
>
> +PROVIDE(__pi_init_idmap_pg_dir = init_idmap_pg_dir);
> +PROVIDE(__pi_init_idmap_pg_end = init_idmap_pg_end);
> PROVIDE(__pi_init_pg_dir = init_pg_dir);
> PROVIDE(__pi_init_pg_end = init_pg_end);
> PROVIDE(__pi_swapper_pg_dir = swapper_pg_dir);
> diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c
> index a9472ab8d901..75d643da56c8 100644
> --- a/arch/arm64/kernel/pi/map_kernel.c
> +++ b/arch/arm64/kernel/pi/map_kernel.c
> @@ -133,6 +133,20 @@ static bool __init arm64_early_this_cpu_has_lva(void)
> ID_AA64MMFR2_EL1_VARange_SHIFT);
> }
>
> +static bool __init arm64_early_this_cpu_has_lpa2(void)
> +{
> + u64 mmfr0;
> + int feat;
> +
> + mmfr0 = read_sysreg(id_aa64mmfr0_el1);
> + mmfr0 &= ~id_aa64mmfr0_override.mask;
> + mmfr0 |= id_aa64mmfr0_override.val;
> + feat = cpuid_feature_extract_signed_field(mmfr0,
> + ID_AA64MMFR0_EL1_TGRAN_SHIFT);
> +
> + return feat >= ID_AA64MMFR0_EL1_TGRAN_LPA2;
> +}
This fails to compile when configured for 64KB pages, since
ID_AA64MMFR0_EL1_TGRAN_LPA2 is only defined for 4KB and 16KB granules (see
sysreg.h).
Suggest:
static bool __init arm64_early_this_cpu_has_lpa2(void)
{
#ifdef ID_AA64MMFR0_EL1_TGRAN_LPA2
u64 mmfr0;
int feat;
mmfr0 = read_sysreg(id_aa64mmfr0_el1);
mmfr0 &= ~id_aa64mmfr0_override.mask;
mmfr0 |= id_aa64mmfr0_override.val;
feat = cpuid_feature_extract_signed_field(mmfr0,
ID_AA64MMFR0_EL1_TGRAN_SHIFT);
return feat >= ID_AA64MMFR0_EL1_TGRAN_LPA2;
#else
return false;
#endif
}
> +
> static bool __init arm64_early_this_cpu_has_pac(void)
> {
> u64 isar1, isar2;
> @@ -254,11 +268,85 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
> }
>
> /* Copy the root page table to its final location */
> - memcpy((void *)swapper_pg_dir + va_offset, init_pg_dir, PGD_SIZE);
> + memcpy((void *)swapper_pg_dir + va_offset, init_pg_dir, PAGE_SIZE);
> dsb(ishst);
> idmap_cpu_replace_ttbr1(swapper_pg_dir);
> }
>
> +static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(u64 ttbr)
> +{
> + u64 sctlr = read_sysreg(sctlr_el1);
> + u64 tcr = read_sysreg(tcr_el1) | TCR_DS;
> +
> + /* Update TCR.T0SZ in case we entered with a 47-bit ID map */
> + tcr &= ~TCR_T0SZ_MASK;
> + tcr |= TCR_T0SZ(48);
> +
> + asm(" msr sctlr_el1, %0 ;"
> + " isb ;"
> + " msr ttbr0_el1, %1 ;"
> + " msr tcr_el1, %2 ;"
> + " isb ;"
> + " tlbi vmalle1 ;"
> + " dsb nsh ;"
> + " isb ;"
> + " msr sctlr_el1, %3 ;"
> + " isb ;"
> + :: "r"(sctlr & ~SCTLR_ELx_M), "r"(ttbr), "r"(tcr), "r"(sctlr));
> +}
> +
> +static void remap_idmap_for_lpa2(void)
> +{
> + extern pgd_t init_idmap_pg_dir[], init_idmap_pg_end[];
> + pgd_t *pgdp = (void *)init_pg_dir + PAGE_SIZE;
> + pgprot_t text_prot = PAGE_KERNEL_ROX;
> + pgprot_t data_prot = PAGE_KERNEL;
> +
> + /* clear the bits that change meaning once LPA2 is turned on */
> + pgprot_val(text_prot) &= ~PTE_SHARED;
> + pgprot_val(data_prot) &= ~PTE_SHARED;
> +
> + /*
> + * We have to clear bits [9:8] in all block or page descriptors in the
> + * initial ID map, as otherwise they will be (mis)interpreted as
> + * physical address bits once we flick the LPA2 switch (TCR.DS). Since
> + * we cannot manipulate live descriptors in that way without creating
> + * potential TLB conflicts, let's create another temporary ID map in a
> + * LPA2 compatible fashion, and update the initial ID map while running
> + * from that.
> + */
> + map_segment(init_pg_dir, &pgdp, 0, _stext, __inittext_end, text_prot,
> + false, 0);
> + map_segment(init_pg_dir, &pgdp, 0, __initdata_begin, _end, data_prot,
> + false, 0);
> + dsb(ishst);
> + set_ttbr0_for_lpa2((u64)init_pg_dir);
> +
> + /*
> + * Recreate the initial ID map with the same granularity as before.
> + * Don't bother with the FDT, we no longer need it after this.
> + */
> + memset(init_idmap_pg_dir, 0,
> + (u64)init_idmap_pg_dir - (u64)init_idmap_pg_end);
> +
> + pgdp = (void *)init_idmap_pg_dir + PAGE_SIZE;
> + map_segment(init_idmap_pg_dir, &pgdp, 0,
> + PTR_ALIGN_DOWN(&_stext[0], INIT_IDMAP_BLOCK_SIZE),
> + PTR_ALIGN_DOWN(&__bss_start[0], INIT_IDMAP_BLOCK_SIZE),
> + text_prot, false, 0);
> + map_segment(init_idmap_pg_dir, &pgdp, 0,
> + PTR_ALIGN_DOWN(&__bss_start[0], INIT_IDMAP_BLOCK_SIZE),
> + PTR_ALIGN(&_end[0], INIT_IDMAP_BLOCK_SIZE),
> + data_prot, false, 0);
> + dsb(ishst);
> +
> + /* switch back to the updated initial ID map */
> + set_ttbr0_for_lpa2((u64)init_idmap_pg_dir);
> +
> + /* wipe the temporary ID map from memory */
> + memset(init_pg_dir, 0, (u64)init_pg_end - (u64)init_pg_dir);
> +}
> +
> asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
> {
> static char const chosen_str[] __initconst = "/chosen";
> @@ -266,6 +354,7 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
> u64 va_base, pa_base = (u64)&_text;
> u64 kaslr_offset = pa_base % MIN_KIMG_ALIGN;
> int root_level = 4 - CONFIG_PGTABLE_LEVELS;
> + bool va52 = (VA_BITS == 52);
>
> /* Clear BSS and the initial page tables */
> memset(__bss_start, 0, (u64)init_pg_end - (u64)__bss_start);
> @@ -295,7 +384,17 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
> arm64_use_ng_mappings = true;
> }
>
> - if (VA_BITS == 52 && arm64_early_this_cpu_has_lva())
> + if (IS_ENABLED(CONFIG_ARM64_LPA2)) {
> + if (arm64_early_this_cpu_has_lpa2()) {
> + remap_idmap_for_lpa2();
> + } else {
> + va52 = false;
> + root_level++;
> + }
> + } else if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) {
> + va52 &= arm64_early_this_cpu_has_lva();
> + }
> + if (va52)
> sysreg_clear_set(tcr_el1, TCR_T1SZ_MASK, TCR_T1SZ(VA_BITS));
>
> va_base = KIMAGE_VADDR + kaslr_offset;
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index 4b4651ee47f2..498d327341b4 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -315,7 +315,7 @@ void __init arm64_memblock_init(void)
> * physical address of PAGE_OFFSET, we have to *subtract* from it.
> */
> if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52) && (vabits_actual != 52))
> - memstart_addr -= _PAGE_OFFSET(48) - _PAGE_OFFSET(52);
> + memstart_addr -= _PAGE_OFFSET(vabits_actual) - _PAGE_OFFSET(52);
>
> /*
> * Apply the memory limit if it was set. Since the kernel may be loaded
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index d089bc78e592..ba5423ff7039 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -541,8 +541,12 @@ static void __init map_mem(pgd_t *pgdp)
> * entries at any level are being shared between the linear region and
> * the vmalloc region. Check whether this is true for the PGD level, in
> * which case it is guaranteed to be true for all other levels as well.
> + * (Unless we are running with support for LPA2, in which case the
> + * entire reduced VA space is covered by a single pgd_t which will have
> + * been populated without the PXNTable attribute by the time we get here.)
> */
> - BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
> + BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end) &&
> + pgd_index(_PAGE_OFFSET(VA_BITS_MIN)) != PTRS_PER_PGD - 1);
>
> if (can_set_direct_map())
> flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
> @@ -726,7 +730,7 @@ static void __init create_idmap(void)
>
> void __init paging_init(void)
> {
> - idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0));
> + idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(vabits_actual - 1, 0));
>
> map_mem(swapper_pg_dir);
>
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index 179e213bbe2d..d95df732b672 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -489,7 +489,11 @@ SYM_FUNC_START(__cpu_setup)
> #if VA_BITS > VA_BITS_MIN
> mov x9, #64 - VA_BITS
> alternative_if ARM64_HAS_LVA
> + tcr_set_t0sz tcr, x9
> tcr_set_t1sz tcr, x9
> +#ifdef CONFIG_ARM64_LPA2
> + orr tcr, tcr, #TCR_DS
> +#endif
> alternative_else_nop_endif
> #endif
>
More information about the linux-arm-kernel
mailing list