[PATCH v5 05/21] arm64: head: simplify page table mapping macros (slightly)

Mark Rutland mark.rutland at arm.com
Sun Jun 26 03:07:20 PDT 2022


On Fri, Jun 24, 2022 at 05:06:35PM +0200, Ard Biesheuvel wrote:
> Simplify the macros in head.S that are used to set up the early page
> tables, by switching to immediates for the number of bits that are
> interpreted as the table index at each level. This makes it much
> easier to infer from the instruction stream what is going on, and
> reduces the number of instructions emitted substantially.

Nice!

> Note that the extended ID map for cases where no additional level needs
> to be configured now uses a compile time size as well, which means that
> we interpret up to 10 bits as the table index at the root level (for
> 52-bit physical addressing), without taking into account whether or not
> this is supported on the current system.  However, those bits can only
> be set if we are executing the image from an address that exceeds the
> 48-bit PA range, and are guaranteed to be cleared otherwise, and given
> that we are dealing with a mapping in the lower TTBR0 range of the
> address space, the result is therefore the same as if we'd mask off only
> 6 bits.
> 
> Signed-off-by: Ard Biesheuvel <ardb at kernel.org>

Aside from one trivial comment below, this looks good to me, so either way:

Acked-by: Mark Rutland <mark.rutland at arm.com>

> ---
>  arch/arm64/kernel/head.S | 55 ++++++++------------
>  1 file changed, 22 insertions(+), 33 deletions(-)
> 
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 53126a35d73c..9fdde2f9cc0f 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -179,31 +179,20 @@ SYM_CODE_END(preserve_boot_args)
>   *	vstart:	virtual address of start of range
>   *	vend:	virtual address of end of range - we map [vstart, vend]
>   *	shift:	shift used to transform virtual address into index
> - *	ptrs:	number of entries in page table
> + *	order:  #imm 2log(number of entries in page table)
>   *	istart:	index in table corresponding to vstart
>   *	iend:	index in table corresponding to vend
>   *	count:	On entry: how many extra entries were required in previous level, scales
>   *			  our end index.
>   *		On exit: returns how many extra entries required for next page table level
>   *
> - * Preserves:	vstart, vend, shift, ptrs
> + * Preserves:	vstart, vend
>   * Returns:	istart, iend, count
>   */
> -	.macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count
> -	lsr	\iend, \vend, \shift
> -	mov	\istart, \ptrs
> -	sub	\istart, \istart, #1
> -	and	\iend, \iend, \istart	// iend = (vend >> shift) & (ptrs - 1)
> -	mov	\istart, \ptrs
> -	mul	\istart, \istart, \count
> -	add	\iend, \iend, \istart	// iend += count * ptrs
> -					// our entries span multiple tables
> -
> -	lsr	\istart, \vstart, \shift
> -	mov	\count, \ptrs
> -	sub	\count, \count, #1
> -	and	\istart, \istart, \count
> -
> +	.macro compute_indices, vstart, vend, shift, order, istart, iend, count
> +	ubfx	\istart, \vstart, \shift, \order
> +	ubfx	\iend, \vend, \shift, \order
> +	add	\iend, \iend, \count, lsl \order
>  	sub	\count, \iend, \istart
>  	.endm
>  
> @@ -218,38 +207,39 @@ SYM_CODE_END(preserve_boot_args)
>   *	vend:	virtual address of end of range - we map [vstart, vend - 1]
>   *	flags:	flags to use to map last level entries
>   *	phys:	physical address corresponding to vstart - physical memory is contiguous
> - *	pgds:	the number of pgd entries
> + *	order:  #imm 2log(number of entries in PGD table)

For clarity, perhaps: s/2log/ilog2/ ? The latter is used much more commonly
throughot the kernel.

>   *
>   * Temporaries:	istart, iend, tmp, count, sv - these need to be different registers
>   * Preserves:	vstart, flags
>   * Corrupts:	tbl, rtbl, vend, istart, iend, tmp, count, sv
>   */
> -	.macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv
> +	.macro map_memory, tbl, rtbl, vstart, vend, flags, phys, order, istart, iend, tmp, count, sv
>  	sub \vend, \vend, #1
>  	add \rtbl, \tbl, #PAGE_SIZE
> -	mov \sv, \rtbl
>  	mov \count, #0
> -	compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count
> +
> +	compute_indices \vstart, \vend, #PGDIR_SHIFT, #\order, \istart, \iend, \count
> +	mov \sv, \rtbl
>  	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
>  	mov \tbl, \sv
> -	mov \sv, \rtbl

FWIW, moving the temporary save of (r)tbl immediately around populate_entries
is *much* clearer!

Mark.

>  
>  #if SWAPPER_PGTABLE_LEVELS > 3
> -	compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count
> +	compute_indices \vstart, \vend, #PUD_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
> +	mov \sv, \rtbl
>  	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
>  	mov \tbl, \sv
> -	mov \sv, \rtbl
>  #endif
>  
>  #if SWAPPER_PGTABLE_LEVELS > 2
> -	compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count
> +	compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
> +	mov \sv, \rtbl
>  	populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
>  	mov \tbl, \sv
>  #endif
>  
> -	compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count
> -	bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1
> -	populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
> +	compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
> +	bic \rtbl, \phys, #SWAPPER_BLOCK_SIZE - 1
> +	populate_entries \tbl, \rtbl, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
>  	.endm
>  
>  /*
> @@ -300,12 +290,12 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
>  	 * range in that case, and configure an additional translation level
>  	 * if needed.
>  	 */
> -	mov	x4, #PTRS_PER_PGD
>  	idmap_get_t0sz x5
>  	cmp	x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough?
>  	b.ge	1f			// .. then skip VA range extension
>  
>  #if (VA_BITS < 48)
> +#define IDMAP_PGD_ORDER	(VA_BITS - PGDIR_SHIFT)
>  #define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
>  #define EXTRA_PTRS	(1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
>  
> @@ -323,16 +313,16 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
>  	mov	x2, EXTRA_PTRS
>  	create_table_entry x0, x3, EXTRA_SHIFT, x2, x5, x6
>  #else
> +#define IDMAP_PGD_ORDER	(PHYS_MASK_SHIFT - PGDIR_SHIFT)
>  	/*
>  	 * If VA_BITS == 48, we don't have to configure an additional
>  	 * translation level, but the top-level table has more entries.
>  	 */
> -	mov	x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
>  #endif
>  1:
>  	adr_l	x6, __idmap_text_end		// __pa(__idmap_text_end)
>  
> -	map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14
> +	map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14
>  
>  	/*
>  	 * Map the kernel image (starting with PHYS_OFFSET).
> @@ -340,13 +330,12 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
>  	adrp	x0, init_pg_dir
>  	mov_q	x5, KIMAGE_VADDR		// compile time __va(_text)
>  	add	x5, x5, x23			// add KASLR displacement
> -	mov	x4, PTRS_PER_PGD
>  	adrp	x6, _end			// runtime __pa(_end)
>  	adrp	x3, _text			// runtime __pa(_text)
>  	sub	x6, x6, x3			// _end - _text
>  	add	x6, x6, x5			// runtime __va(_end)
>  
> -	map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14
> +	map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
>  
>  	/*
>  	 * Since the page tables have been populated with non-cacheable
> -- 
> 2.35.1
> 



More information about the linux-arm-kernel mailing list