[PATCH v3 1/2] arm64: kpti-ng: simplify page table traversal logic

Mark Rutland mark.rutland at arm.com
Thu May 19 10:09:03 PDT 2022


Hi Ard,

This is a really nice cleanup!

On Thu, Apr 21, 2022 at 04:03:38PM +0200, Ard Biesheuvel wrote:
> Simplify the KPTI G-to-nG asm helper code by:
> - pulling the 'table bit' test into the get/put macros so we can combine
>   them and incorporate the entire loop;
> - moving the 'table bit' test after the update of bit #11 so we no
>   longer need separate next_xxx and skip_xxx labels;
> - redefining the pmd/pud register aliases and the next_pmd/next_pud
>   labels instead of branching to them if the number of configured page
>   table levels is less than 3 or 4, respectively;

All of this looks good to me.

> - folding the descriptor pointer increment into the LDR instructions.

I think this leads to issuing a CMO to the wrong address; explained below with
a proposed fixup.

> No functional change intended, except for the fact that we now descend
> into a next level table after setting bit #11 on its descriptor but this
> should make no difference in practice.

Since we don't play games with recursive tables I agree that should fine.

> While at it, switch to .L prefixed local labels so they don't clutter up
> the symbol tables, kallsyms, etc, and clean up the indentation for
> legibility.

This also sounds good to me.

> Signed-off-by: Ard Biesheuvel <ardb at kernel.org>

If you're happy with my proposed fixup below:

Reviewed-by: Mark Rutland <mark.rutland at arm.com>

With that applied, I've tested this in VMs on ThunderX2 (with KPTI forced on an
kaslr_requires_kpti() hacked to false) in the following configurations: 

* 4K  / 39-bit VAs / 3 levels
* 4K  / 48-bit VAs / 4 levels
* 4K  / 48-bit VAs / 4 levels + KASAN (to test shadow skipping)
* 64K / 42-bit VAs / 2 levels
* 64K / 48-bit VAs / 3 levels
* 64K / 52-bit VAs / 3 levels

... in each case checking the resulting kernel tables with ptudmp.

In all cases that looked good, so (with the fixup):

Tested-by: Mark Rutland <mark.rutland at arm.com>

Beware when looking at the ptdump output that we have 3 KPTI trampoline pages
in the fixmap region which are (legitimately) executable and global; I got very
confused when I first spotted that!

> ---
>  arch/arm64/mm/proc.S | 97 +++++++-------------
>  1 file changed, 34 insertions(+), 63 deletions(-)
> 
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index 50bbed947bec..5619c00f8cd4 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -202,19 +202,24 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1)
>  #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
>  	.pushsection ".idmap.text", "awx"
>  
> -	.macro	__idmap_kpti_get_pgtable_ent, type
> +	.macro	kpti_mk_tbl_ng, type, num_entries
> +	add	end_\type\()p, cur_\type\()p, #\num_entries * 8
> +.Ldo_\type:
>  	dc	cvac, cur_\()\type\()p		// Ensure any existing dirty

Could clean up the (existing) extraneous `\()` here too? That'd make it easier
to spot the `cur_\type\()p` patter consistently.

>  	dmb	sy				// lines are written back before
> -	ldr	\type, [cur_\()\type\()p]	// loading the entry
> -	tbz	\type, #0, skip_\()\type	// Skip invalid and
> -	tbnz	\type, #11, skip_\()\type	// non-global entries
> -	.endm
> -
> -	.macro __idmap_kpti_put_pgtable_ent_ng, type
> +	ldr	\type, [cur_\type\()p], #8	// loading the entry

So now `cur_`\type()p` points at the *next* entry, which is a bit confusing ...

> +	tbz	\type, #0, .Lnext_\type		// Skip invalid and
> +	tbnz	\type, #11, .Lnext_\type	// non-global entries
>  	orr	\type, \type, #PTE_NG		// Same bit for blocks and pages
> -	str	\type, [cur_\()\type\()p]	// Update the entry and ensure
> +	str	\type, [cur_\type\()p, #-8]	// Update the entry and ensure

... though we handle the offset correctly here ...

>  	dmb	sy				// that it is visible to all
>  	dc	civac, cur_\()\type\()p		// CPUs.

... but here we don't, and perform the CMO for the *next* entry. So for the
last entry in each cacheline we're missing an invalidate.

> +	.ifnc	\type, pte
> +	tbnz	\type, #1, .Lderef_\type
> +	.endif
> +.Lnext_\type:
> +	cmp	cur_\type\()p, end_\type\()p
> +	b.ne	.Ldo_\type
>  	.endm

I reckon it's be slightly clearer and safer to have an explicit `ADD` at the
start of `.Lnext_\type`, i.e.

|         .macro  kpti_mk_tbl_ng, type, num_entries
|         add     end_\type\()p, cur_\type\()p, #\num_entries * 8
| .Ldo_\type:
|         dc      cvac, cur_\type\()p             // Ensure any existing dirty
|         dmb     sy                              // lines are written back before
|         ldr     \type, [cur_\type\()p]          // loading the entry
|         tbz     \type, #0, .Lnext_\type         // Skip invalid and
|         tbnz    \type, #11, .Lnext_\type        // non-global entries
|         orr     \type, \type, #PTE_NG           // Same bit for blocks and pages
|         str     \type, [cur_\type\()p]          // Update the entry and ensure
|         dmb     sy                              // that it is visible to all
|         dc      civac, cur_\()\type\()p         // CPUs.
|         .ifnc   \type, pte 
|         tbnz    \type, #1, .Lderef_\type
|         .endif
| .Lnext_\type:
|         add     cur_\type\()p, cur_\type\()p, #8
|         cmp     cur_\type\()p, end_\type\()p
|         b.ne    .Ldo_\type
|         .endm

Other than that, this looks good to me.

Thanks,
Mark.

>  /*
> @@ -235,10 +240,8 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings)
>  	pgd		.req	x7
>  	cur_pudp	.req	x8
>  	end_pudp	.req	x9
> -	pud		.req	x10
>  	cur_pmdp	.req	x11
>  	end_pmdp	.req	x12
> -	pmd		.req	x13
>  	cur_ptep	.req	x14
>  	end_ptep	.req	x15
>  	pte		.req	x16
> @@ -265,16 +268,8 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings)
>  
>  	/* Everybody is enjoying the idmap, so we can rewrite swapper. */
>  	/* PGD */
> -	mov	cur_pgdp, swapper_pa
> -	add	end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8)
> -do_pgd:	__idmap_kpti_get_pgtable_ent	pgd
> -	tbnz	pgd, #1, walk_puds
> -next_pgd:
> -	__idmap_kpti_put_pgtable_ent_ng	pgd
> -skip_pgd:
> -	add	cur_pgdp, cur_pgdp, #8
> -	cmp	cur_pgdp, end_pgdp
> -	b.ne	do_pgd
> +	mov		cur_pgdp, swapper_pa
> +	kpti_mk_tbl_ng	pgd, PTRS_PER_PGD
>  
>  	/* Publish the updated tables and nuke all the TLBs */
>  	dsb	sy
> @@ -291,59 +286,35 @@ skip_pgd:
>  	str	wzr, [flag_ptr]
>  	ret
>  
> +.Lderef_pgd:
>  	/* PUD */
> -walk_puds:
> -	.if CONFIG_PGTABLE_LEVELS > 3
> +	.if		CONFIG_PGTABLE_LEVELS > 3
> +	pud		.req	x10
>  	pte_to_phys	cur_pudp, pgd
> -	add	end_pudp, cur_pudp, #(PTRS_PER_PUD * 8)
> -do_pud:	__idmap_kpti_get_pgtable_ent	pud
> -	tbnz	pud, #1, walk_pmds
> -next_pud:
> -	__idmap_kpti_put_pgtable_ent_ng	pud
> -skip_pud:
> -	add	cur_pudp, cur_pudp, 8
> -	cmp	cur_pudp, end_pudp
> -	b.ne	do_pud
> -	b	next_pgd
> -	.else /* CONFIG_PGTABLE_LEVELS <= 3 */
> -	mov	pud, pgd
> -	b	walk_pmds
> -next_pud:
> -	b	next_pgd
> +	kpti_mk_tbl_ng	pud, PTRS_PER_PUD
> +	b		.Lnext_pgd
> +	.else		/* CONFIG_PGTABLE_LEVELS <= 3 */
> +	pud		.req	pgd
> +	.set		.Lnext_pud, .Lnext_pgd
>  	.endif
>  
> +.Lderef_pud:
>  	/* PMD */
> -walk_pmds:
> -	.if CONFIG_PGTABLE_LEVELS > 2
> +	.if		CONFIG_PGTABLE_LEVELS > 2
> +	pmd		.req	x13
>  	pte_to_phys	cur_pmdp, pud
> -	add	end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8)
> -do_pmd:	__idmap_kpti_get_pgtable_ent	pmd
> -	tbnz	pmd, #1, walk_ptes
> -next_pmd:
> -	__idmap_kpti_put_pgtable_ent_ng	pmd
> -skip_pmd:
> -	add	cur_pmdp, cur_pmdp, #8
> -	cmp	cur_pmdp, end_pmdp
> -	b.ne	do_pmd
> -	b	next_pud
> -	.else /* CONFIG_PGTABLE_LEVELS <= 2 */
> -	mov	pmd, pud
> -	b	walk_ptes
> -next_pmd:
> -	b	next_pud
> +	kpti_mk_tbl_ng	pmd, PTRS_PER_PMD
> +	b		.Lnext_pud
> +	.else		/* CONFIG_PGTABLE_LEVELS <= 2 */
> +	pmd		.req	pgd
> +	.set		.Lnext_pmd, .Lnext_pgd
>  	.endif
>  
> +.Lderef_pmd:
>  	/* PTE */
> -walk_ptes:
>  	pte_to_phys	cur_ptep, pmd
> -	add	end_ptep, cur_ptep, #(PTRS_PER_PTE * 8)
> -do_pte:	__idmap_kpti_get_pgtable_ent	pte
> -	__idmap_kpti_put_pgtable_ent_ng	pte
> -skip_pte:
> -	add	cur_ptep, cur_ptep, #8
> -	cmp	cur_ptep, end_ptep
> -	b.ne	do_pte
> -	b	next_pmd
> +	kpti_mk_tbl_ng	pte, PTRS_PER_PTE
> +	b		.Lnext_pmd
>  
>  	.unreq	cpu
>  	.unreq	num_cpus
> -- 
> 2.30.2
> 



More information about the linux-arm-kernel mailing list