[PATCH v2 14/19] arm64: Enable LPA2 at boot if supported by the system

Ryan Roberts ryan.roberts at arm.com
Mon Nov 28 06:54:21 PST 2022


On 24/11/2022 12:39, Ard Biesheuvel wrote:
> Update the early kernel mapping code to take 52-bit virtual addressing
> into account based on the LPA2 feature. This is a bit more involved than
> LVA (which is supported with 64k pages only), given that some page table
> descriptor bits change meaning in this case.
> 
> To keep the handling in asm to a minimum, the initial ID map is still
> created with 48-bit virtual addressing, which implies that the kernel
> image must be loaded into 48-bit addressable physical memory. This is
> currently required by the boot protocol, even though we happen to
> support placement outside of that for LVA/64k based configurations.
> 
> Enabling LPA2 involves more than setting TCR.T1SZ to a lower value,
> there is also a DS bit in TCR that needs to be set, and which changes
> the meaning of bits [9:8] in all page table descriptors. Since we cannot
> enable DS and every live page table descriptor at the same time, let's
> pivot through another temporary mapping. This avoids the need to
> reintroduce manipulations of the page tables with the MMU and caches
> disabled.
> 
> To permit the LPA2 feature to be overridden on the kernel command line,
> which may be necessary to work around silicon errata, or to deal with
> mismatched features on heterogeneous SoC designs, test for CPU feature
> overrides first, and only then enable LPA2.
> 
> Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
> ---
>  arch/arm64/include/asm/assembler.h      |   7 +-
>  arch/arm64/include/asm/kernel-pgtable.h |  25 +++--
>  arch/arm64/include/asm/memory.h         |   4 +
>  arch/arm64/kernel/head.S                |   9 +-
>  arch/arm64/kernel/image-vars.h          |   2 +
>  arch/arm64/kernel/pi/map_kernel.c       | 103 +++++++++++++++++++-
>  arch/arm64/mm/init.c                    |   2 +-
>  arch/arm64/mm/mmu.c                     |   8 +-
>  arch/arm64/mm/proc.S                    |   4 +
>  9 files changed, 151 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index 786bf62826a8..30eee6473cf0 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -609,11 +609,16 @@ alternative_endif
>   * but we have to add an offset so that the TTBR1 address corresponds with the
>   * pgdir entry that covers the lowest 48-bit addressable VA.
>   *
> + * Note that this trick only works for LVA/64k pages - LPA2/4k pages uses an
> + * additional paging level, and on LPA2/16k pages, we would end up with a TTBR
> + * address that is not 64 byte aligned, so there we reduce the number of paging
> + * levels for the non-LPA2 case.
> + *
>   * orr is used as it can cover the immediate value (and is idempotent).
>   * 	ttbr: Value of ttbr to set, modified.
>   */
>  	.macro	offset_ttbr1, ttbr, tmp
> -#ifdef CONFIG_ARM64_VA_BITS_52
> +#if defined(CONFIG_ARM64_VA_BITS_52) && !defined(CONFIG_ARM64_LPA2)
>  	mrs	\tmp, tcr_el1
>  	and	\tmp, \tmp, #TCR_T1SZ_MASK
>  	cmp	\tmp, #TCR_T1SZ(VA_BITS_MIN)
> diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
> index faa11e8b4a0e..2359b2af0c4c 100644
> --- a/arch/arm64/include/asm/kernel-pgtable.h
> +++ b/arch/arm64/include/asm/kernel-pgtable.h
> @@ -20,12 +20,16 @@
>   */
>  #ifdef CONFIG_ARM64_4K_PAGES
>  #define INIT_IDMAP_USES_PMD_MAPS	1
> -#define INIT_IDMAP_TABLE_LEVELS		(CONFIG_PGTABLE_LEVELS - 1)
>  #else
>  #define INIT_IDMAP_USES_PMD_MAPS	0
> -#define INIT_IDMAP_TABLE_LEVELS		(CONFIG_PGTABLE_LEVELS)
>  #endif
>  
> +/* how many levels of translation are required to cover 'x' bits of VA space */
> +#define VA_LEVELS(x)		(((x) - 4) / (PAGE_SHIFT - 3))
> +#define INIT_IDMAP_TABLE_LEVELS	(VA_LEVELS(VA_BITS_MIN) - INIT_IDMAP_USES_PMD_MAPS)
> +
> +#define INIT_IDMAP_ROOT_SHIFT	(VA_LEVELS(VA_BITS_MIN) * (PAGE_SHIFT - 3) + 3)
> +
>  /*
>   * If KASLR is enabled, then an offset K is added to the kernel address
>   * space. The bottom 21 bits of this offset are zero to guarantee 2MB
> @@ -52,7 +56,14 @@
>  #define EARLY_ENTRIES(vstart, vend, shift, add) \
>  	((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1 + add)
>  
> -#define EARLY_PGDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT, add))
> +#if CONFIG_PGTABLE_LEVELS > 4
> +/* the kernel is covered entirely by the pgd_t at the top of the VA space */
> +#define EARLY_PGDS	1
> +#else
> +#define EARLY_PGDS	0
> +#endif
> +
> +#define EARLY_P4DS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, INIT_IDMAP_ROOT_SHIFT, add))
>  
>  #if INIT_IDMAP_TABLE_LEVELS > 3
>  #define EARLY_PUDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PUD_SHIFT, add))
> @@ -66,11 +77,13 @@
>  #define EARLY_PMDS(vstart, vend, add) (0)
>  #endif
>  
> -#define EARLY_PAGES(vstart, vend, add) ( 1 			/* PGDIR page */				\
> -			+ EARLY_PGDS((vstart), (vend), add) 	/* each PGDIR needs a next level page table */	\
> +#define EARLY_PAGES(vstart, vend, add) ( 1 			/* PGDIR/P4D page */				\
> +			+ EARLY_P4DS((vstart), (vend), add) 	/* each P4D needs a next level page table */	\
>  			+ EARLY_PUDS((vstart), (vend), add)	/* each PUD needs a next level page table */	\
>  			+ EARLY_PMDS((vstart), (vend), add))	/* each PMD needs a next level page table */
> -#define INIT_DIR_SIZE (PAGE_SIZE * (EARLY_PAGES(KIMAGE_VADDR, _end, EARLY_KASLR) + EARLY_SEGMENT_EXTRA_PAGES))
> +
> +#define INIT_DIR_SIZE	(PAGE_SIZE * (EARLY_PAGES(KIMAGE_VADDR, _end, EARLY_KASLR) + \
> +			 EARLY_SEGMENT_EXTRA_PAGES + EARLY_PGDS))
>  
>  /* the initial ID map may need two extra pages if it needs to be extended */
>  #if VA_BITS_MIN < 48
> diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
> index b3826ff2e52b..4f617e271008 100644
> --- a/arch/arm64/include/asm/memory.h
> +++ b/arch/arm64/include/asm/memory.h
> @@ -54,7 +54,11 @@
>  #define FIXADDR_TOP		(VMEMMAP_START - SZ_32M)
>  
>  #if VA_BITS > 48
> +#ifdef CONFIG_ARM64_16K_PAGES
> +#define VA_BITS_MIN		(47)
> +#else
>  #define VA_BITS_MIN		(48)
> +#endif
>  #else
>  #define VA_BITS_MIN		(VA_BITS)
>  #endif
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 94de42dfe97d..6be121949c06 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -198,7 +198,7 @@ SYM_CODE_END(preserve_boot_args)
>  	mov \tbl, \sv
>  	.endif
>  .L_\@:
> -	compute_indices \vstart, \vend, #PGDIR_SHIFT, \istart, \iend, \count
> +	compute_indices \vstart, \vend, #INIT_IDMAP_ROOT_SHIFT, \istart, \iend, \count
>  	mov \sv, \rtbl
>  	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
>  	mov \tbl, \sv
> @@ -610,9 +610,16 @@ SYM_FUNC_START(__cpu_secondary_check52bitva)
>  alternative_if_not ARM64_HAS_LVA
>  	ret
>  alternative_else_nop_endif
> +#ifndef CONFIG_ARM64_LPA2
>  	mrs_s	x0, SYS_ID_AA64MMFR2_EL1
>  	and	x0, x0, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT)
>  	cbnz	x0, 2f
> +#else
> +	mrs	x0, id_aa64mmfr0_el1
> +	sbfx	x0, x0, #ID_AA64MMFR0_EL1_TGRAN_SHIFT, 4
> +	cmp	x0, #ID_AA64MMFR0_EL1_TGRAN_LPA2
> +	b.ge	2f
> +#endif
>  
>  	update_early_cpu_boot_status \
>  		CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_52_BIT_VA, x0, x1
> diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
> index 82bafa1f869c..f48b6f09d278 100644
> --- a/arch/arm64/kernel/image-vars.h
> +++ b/arch/arm64/kernel/image-vars.h
> @@ -56,6 +56,8 @@ PROVIDE(__pi_arm64_sw_feature_override	= arm64_sw_feature_override);
>  PROVIDE(__pi_arm64_use_ng_mappings	= arm64_use_ng_mappings);
>  PROVIDE(__pi__ctype			= _ctype);
>  
> +PROVIDE(__pi_init_idmap_pg_dir		= init_idmap_pg_dir);
> +PROVIDE(__pi_init_idmap_pg_end		= init_idmap_pg_end);
>  PROVIDE(__pi_init_pg_dir		= init_pg_dir);
>  PROVIDE(__pi_init_pg_end		= init_pg_end);
>  PROVIDE(__pi_swapper_pg_dir		= swapper_pg_dir);
> diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c
> index a9472ab8d901..75d643da56c8 100644
> --- a/arch/arm64/kernel/pi/map_kernel.c
> +++ b/arch/arm64/kernel/pi/map_kernel.c
> @@ -133,6 +133,20 @@ static bool __init arm64_early_this_cpu_has_lva(void)
>  						    ID_AA64MMFR2_EL1_VARange_SHIFT);
>  }
>  
> +static bool __init arm64_early_this_cpu_has_lpa2(void)
> +{
> +	u64 mmfr0;
> +	int feat;
> +
> +	mmfr0 = read_sysreg(id_aa64mmfr0_el1);
> +	mmfr0 &= ~id_aa64mmfr0_override.mask;
> +	mmfr0 |= id_aa64mmfr0_override.val;
> +	feat = cpuid_feature_extract_signed_field(mmfr0,
> +						  ID_AA64MMFR0_EL1_TGRAN_SHIFT);
> +
> +	return feat >= ID_AA64MMFR0_EL1_TGRAN_LPA2;
> +}

This fails to compile when configured for 64KB pages, since
ID_AA64MMFR0_EL1_TGRAN_LPA2 is only defined for 4KB and 16KB granules (see
sysreg.h).

Suggest:

static bool __init arm64_early_this_cpu_has_lpa2(void)
{
#ifdef ID_AA64MMFR0_EL1_TGRAN_LPA2
	u64 mmfr0;
	int feat;

	mmfr0 = read_sysreg(id_aa64mmfr0_el1);
	mmfr0 &= ~id_aa64mmfr0_override.mask;
	mmfr0 |= id_aa64mmfr0_override.val;
	feat = cpuid_feature_extract_signed_field(mmfr0,
						  ID_AA64MMFR0_EL1_TGRAN_SHIFT);

	return feat >= ID_AA64MMFR0_EL1_TGRAN_LPA2;
#else
	return false;
#endif
}


> +
>  static bool __init arm64_early_this_cpu_has_pac(void)
>  {
>  	u64 isar1, isar2;
> @@ -254,11 +268,85 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
>  	}
>  
>  	/* Copy the root page table to its final location */
> -	memcpy((void *)swapper_pg_dir + va_offset, init_pg_dir, PGD_SIZE);
> +	memcpy((void *)swapper_pg_dir + va_offset, init_pg_dir, PAGE_SIZE);
>  	dsb(ishst);
>  	idmap_cpu_replace_ttbr1(swapper_pg_dir);
>  }
>  
> +static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(u64 ttbr)
> +{
> +	u64 sctlr = read_sysreg(sctlr_el1);
> +	u64 tcr = read_sysreg(tcr_el1) | TCR_DS;
> +
> +	/* Update TCR.T0SZ in case we entered with a 47-bit ID map */
> +	tcr &= ~TCR_T0SZ_MASK;
> +	tcr |= TCR_T0SZ(48);
> +
> +	asm("	msr	sctlr_el1, %0		;"
> +	    "	isb				;"
> +	    "   msr     ttbr0_el1, %1		;"
> +	    "   msr     tcr_el1, %2		;"
> +	    "	isb				;"
> +	    "	tlbi    vmalle1			;"
> +	    "	dsb     nsh			;"
> +	    "	isb				;"
> +	    "	msr     sctlr_el1, %3		;"
> +	    "	isb				;"
> +	    ::	"r"(sctlr & ~SCTLR_ELx_M), "r"(ttbr), "r"(tcr), "r"(sctlr));
> +}
> +
> +static void remap_idmap_for_lpa2(void)
> +{
> +	extern pgd_t init_idmap_pg_dir[], init_idmap_pg_end[];
> +	pgd_t *pgdp = (void *)init_pg_dir + PAGE_SIZE;
> +	pgprot_t text_prot = PAGE_KERNEL_ROX;
> +	pgprot_t data_prot = PAGE_KERNEL;
> +
> +	/* clear the bits that change meaning once LPA2 is turned on */
> +	pgprot_val(text_prot) &= ~PTE_SHARED;
> +	pgprot_val(data_prot) &= ~PTE_SHARED;
> +
> +	/*
> +	 * We have to clear bits [9:8] in all block or page descriptors in the
> +	 * initial ID map, as otherwise they will be (mis)interpreted as
> +	 * physical address bits once we flick the LPA2 switch (TCR.DS). Since
> +	 * we cannot manipulate live descriptors in that way without creating
> +	 * potential TLB conflicts, let's create another temporary ID map in a
> +	 * LPA2 compatible fashion, and update the initial ID map while running
> +	 * from that.
> +	 */
> +	map_segment(init_pg_dir, &pgdp, 0, _stext, __inittext_end, text_prot,
> +		    false, 0);
> +	map_segment(init_pg_dir, &pgdp, 0, __initdata_begin, _end, data_prot,
> +		    false, 0);
> +	dsb(ishst);
> +	set_ttbr0_for_lpa2((u64)init_pg_dir);
> +
> +	/*
> +	 * Recreate the initial ID map with the same granularity as before.
> +	 * Don't bother with the FDT, we no longer need it after this.
> +	 */
> +	memset(init_idmap_pg_dir, 0,
> +	       (u64)init_idmap_pg_dir - (u64)init_idmap_pg_end);
> +
> +	pgdp = (void *)init_idmap_pg_dir + PAGE_SIZE;
> +	map_segment(init_idmap_pg_dir, &pgdp, 0,
> +		    PTR_ALIGN_DOWN(&_stext[0], INIT_IDMAP_BLOCK_SIZE),
> +		    PTR_ALIGN_DOWN(&__bss_start[0], INIT_IDMAP_BLOCK_SIZE),
> +		    text_prot, false, 0);
> +	map_segment(init_idmap_pg_dir, &pgdp, 0,
> +		    PTR_ALIGN_DOWN(&__bss_start[0], INIT_IDMAP_BLOCK_SIZE),
> +		    PTR_ALIGN(&_end[0], INIT_IDMAP_BLOCK_SIZE),
> +		    data_prot, false, 0);
> +	dsb(ishst);
> +
> +	/* switch back to the updated initial ID map */
> +	set_ttbr0_for_lpa2((u64)init_idmap_pg_dir);
> +
> +	/* wipe the temporary ID map from memory */
> +	memset(init_pg_dir, 0, (u64)init_pg_end - (u64)init_pg_dir);
> +}
> +
>  asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
>  {
>  	static char const chosen_str[] __initconst = "/chosen";
> @@ -266,6 +354,7 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
>  	u64 va_base, pa_base = (u64)&_text;
>  	u64 kaslr_offset = pa_base % MIN_KIMG_ALIGN;
>  	int root_level = 4 - CONFIG_PGTABLE_LEVELS;
> +	bool va52 = (VA_BITS == 52);
>  
>  	/* Clear BSS and the initial page tables */
>  	memset(__bss_start, 0, (u64)init_pg_end - (u64)__bss_start);
> @@ -295,7 +384,17 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
>  			arm64_use_ng_mappings = true;
>  	}
>  
> -	if (VA_BITS == 52 && arm64_early_this_cpu_has_lva())
> +	if (IS_ENABLED(CONFIG_ARM64_LPA2)) {
> +		if (arm64_early_this_cpu_has_lpa2()) {
> +			remap_idmap_for_lpa2();
> +		} else {
> +			va52 = false;
> +			root_level++;
> +		}
> +	} else if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) {
> +		va52 &= arm64_early_this_cpu_has_lva();
> +	}
> +	if (va52)
>  		sysreg_clear_set(tcr_el1, TCR_T1SZ_MASK, TCR_T1SZ(VA_BITS));
>  
>  	va_base = KIMAGE_VADDR + kaslr_offset;
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index 4b4651ee47f2..498d327341b4 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -315,7 +315,7 @@ void __init arm64_memblock_init(void)
>  	 * physical address of PAGE_OFFSET, we have to *subtract* from it.
>  	 */
>  	if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52) && (vabits_actual != 52))
> -		memstart_addr -= _PAGE_OFFSET(48) - _PAGE_OFFSET(52);
> +		memstart_addr -= _PAGE_OFFSET(vabits_actual) - _PAGE_OFFSET(52);
>  
>  	/*
>  	 * Apply the memory limit if it was set. Since the kernel may be loaded
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index d089bc78e592..ba5423ff7039 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -541,8 +541,12 @@ static void __init map_mem(pgd_t *pgdp)
>  	 * entries at any level are being shared between the linear region and
>  	 * the vmalloc region. Check whether this is true for the PGD level, in
>  	 * which case it is guaranteed to be true for all other levels as well.
> +	 * (Unless we are running with support for LPA2, in which case the
> +	 * entire reduced VA space is covered by a single pgd_t which will have
> +	 * been populated without the PXNTable attribute by the time we get here.)
>  	 */
> -	BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end));
> +	BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end) &&
> +		     pgd_index(_PAGE_OFFSET(VA_BITS_MIN)) != PTRS_PER_PGD - 1);
>  
>  	if (can_set_direct_map())
>  		flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
> @@ -726,7 +730,7 @@ static void __init create_idmap(void)
>  
>  void __init paging_init(void)
>  {
> -	idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0));
> +	idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(vabits_actual - 1, 0));
>  
>  	map_mem(swapper_pg_dir);
>  
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index 179e213bbe2d..d95df732b672 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -489,7 +489,11 @@ SYM_FUNC_START(__cpu_setup)
>  #if VA_BITS > VA_BITS_MIN
>  	mov		x9, #64 - VA_BITS
>  alternative_if ARM64_HAS_LVA
> +	tcr_set_t0sz	tcr, x9
>  	tcr_set_t1sz	tcr, x9
> +#ifdef CONFIG_ARM64_LPA2
> +	orr		tcr, tcr, #TCR_DS
> +#endif
>  alternative_else_nop_endif
>  #endif
>  




More information about the linux-arm-kernel mailing list