[RFC V2 14/14] arm64/mm: Add initial support for FEAT_D128 page tables

Ryan Roberts ryan.roberts at arm.com
Wed May 27 07:54:03 PDT 2026


On 13/05/2026 05:45, Anshuman Khandual wrote:
> Add build time support for FEAT_D128 page tables with a new Kconfig option
> i.e CONFIG_ARM64_D128. When selected, PTE types become 128 bits wide and
> PTE bits are mapped to their new locations. Besides the basic page table
> geometry is also updated since each table page now holds half the number
> of entries (aka PTRS_PER_PXX) as it did previously.
> 
> Since FEAT_D128 exclusively supports the permission indirection style for
> page table entry permission management, given kernel compiled for FEAT_D128
> requires both FEAT_S1PIE and FEAT_D128. If these architecture features are
> not present at boot, the kernel panics just like it does when there is a
> granule size mismatch.
> 
> TTBR0/1_EL1 and PAR_EL1 registers become 128 bit wide when D128 is enabled,
> thus requiring MSRR/MRRS instructions for their updates. Because PA_BITS is
> still capped at 52 bits, MRS/MSR instructions are currently sufficient for
> the register accesses that basically operate on the lower 64 bits. Although
> entire 128 bits for these registers get cleared during boot via MSRR.
> 
> Add support for TLBIP instruction for TLB flush macros with level hint and
> address range operations. Although existing TLBI based TLB flush would have
> been sufficient given PA_BITS is still capped at 52, but then it would have
> lacked both level hint and range support.
> 
> This enables support for all granule size, VA_BITS and PA_BITS combination.
> 
> Cc: Catalin Marinas <catalin.marinas at arm.com>
> Cc: Will Deacon <will at kernel.org>
> Cc: Ryan Roberts <ryan.roberts at arm.com>
> Cc: Mark Rutland <mark.rutland at arm.com>
> Cc: linux-arm-kernel at lists.infradead.org
> Cc: linux-kernel at vger.kernel.org
> Signed-off-by: Linu Cherian <linu.cherian at arm.com> (TLBIP instructions)
> Signed-off-by: Anshuman Khandual <anshuman.khandual at arm.com>
> ---
> Changes in RFC V2:
> 
> - Updated ARM64_CONT_[PTE|PMD]_SHIFT both for 16K and 64K base pages
> - Adopted TLBIP implementation to recent TLB flush changes
> - Renamed __PRIpte as __PRIpxx per David
> - Renamed all ptdesc_ instances as pxxval_ instead
> 
>  arch/arm64/Kconfig                     |  51 ++++++++-
>  arch/arm64/Makefile                    |   4 +
>  arch/arm64/include/asm/assembler.h     |   4 +-
>  arch/arm64/include/asm/el2_setup.h     |   9 ++
>  arch/arm64/include/asm/pgtable-hwdef.h | 137 +++++++++++++++++++++++++
>  arch/arm64/include/asm/pgtable-prot.h  |  18 +++-
>  arch/arm64/include/asm/pgtable-types.h |   9 ++
>  arch/arm64/include/asm/pgtable.h       |  56 +++++++++-
>  arch/arm64/include/asm/smp.h           |   1 +
>  arch/arm64/include/asm/tlbflush.h      |  68 ++++++++++--
>  arch/arm64/kernel/head.S               |  12 +++
>  arch/arm64/mm/proc.S                   |  25 ++++-
>  12 files changed, 374 insertions(+), 20 deletions(-)
> 

[...]

Some comments on tlbflush.h only:

> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
> index 361d74ef8016..7831759b98e1 100644
> --- a/arch/arm64/include/asm/tlbflush.h
> +++ b/arch/arm64/include/asm/tlbflush.h
> @@ -41,6 +41,25 @@
>  
>  #define __tlbi(op, ...)		__TLBI_N(op, ##__VA_ARGS__, 1, 0)
>  
> +#ifdef CONFIG_ARM64_D128
> +#define __tlbip(op, arg) do {		\
> +	asm (ARM64_ASM_PREAMBLE		\
> +	".arch_extension d128\n\t"	\
> +	"tlbip " #op ", %0, %H0\n"	\
> +	: : "r" (arg.full));		\
> +} while (0)
> +
> +#define __tlbip_user(op, arg) do {		\
> +	if (arm64_kernel_unmapped_at_el0()) {	\
> +		arg.low |= USER_ASID_FLAG;	\
> +		__tlbip(op, (arg));		\
> +	}					\
> +} while (0)
> +
> +#endif
> +
> +#define TLBI_ASID_MASK		GENMASK_ULL(63, 48)
> +
>  #define __tlbi_user(op, arg) do {						\
>  	if (arm64_kernel_unmapped_at_el0())					\
>  		__tlbi(op, (arg) | USER_ASID_FLAG);				\
> @@ -162,9 +181,15 @@ static inline void sme_dvmsync_batch(struct arch_tlbflush_unmap_batch *batch)
>  
>  #define TLBI_TTL_UNKNOWN	INT_MAX
>  
> +#ifdef CONFIG_ARM64_D128
> +typedef union __u128_halves tlbi_args_t;

I wonder if you could just define this as u128? That would make things a bit
neater I think? - You should be able to do normal bit twiddling I think?

> +#define __tlbi_wrapper(op, arg)		__tlbip(op, arg)
> +#define __tlbi_user_wrapper(op, arg)	__tlbip_user(op, arg)
> +#else
>  typedef u64 tlbi_args_t;
>  #define __tlbi_wrapper(op, arg)		__tlbi(op, arg)
>  #define __tlbi_user_wrapper(op, arg)	__tlbi_user(op, arg)
> +#endif
>  
>  typedef void (*tlbi_op)(tlbi_args_t arg);
>  
> @@ -211,17 +236,28 @@ static __always_inline void ipas2e1is(tlbi_args_t arg)
>  	__tlbi_wrapper(ipas2e1is, arg);
>  }
>  
> -static __always_inline void __tlbi_level_asid(tlbi_op op, u64 addr, u32 level,
> -					      u16 asid)
> +static __always_inline void __tlbi_update_level(u32 level, u64 *arg)
>  {
> -	u64 arg = __TLBI_VADDR(addr, asid);
> -
>  	if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && level <= 3) {
>  		u64 ttl = level | (get_trans_granule() << 2);
>  
> -		FIELD_MODIFY(TLBI_TTL_MASK, &arg, ttl);
> +		FIELD_MODIFY(TLBI_TTL_MASK, arg, ttl);
>  	}
> +}
> +
> +static __always_inline void __tlbi_level_asid(tlbi_op op, u64 addr, u32 level, u16 asid)
> +{
> +#ifdef CONFIG_ARM64_D128
> +	union __u128_halves arg;
> +
> +	arg.low = FIELD_PREP(TLBI_ASID_MASK, asid);
> +	__tlbi_update_level(level, &arg.low);
> +	arg.high = addr >> 12;
> +#else
> +	u64 arg = __TLBI_VADDR(addr, asid);
>  
> +	__tlbi_update_level(level, &arg);
> +#endif
>  	op(arg);
>  }

It would be a neater change if you could get away with something like this. If
you typedef tlbi_arg_t as u128, will FIELD_MODIFY() work? Not sure...


static __always_inline void __tlbi_level_asid(tlbi_op op, u64 addr, u32 level,
					      u16 asid)
{
	tlbi_arg_t arg;

#ifdef CONFIG_ARM64_D128
	arg = FIELD_PREP(TLBI_ASID_MASK, asid);
	arg |= (addr >> 12) << 64;
#else
	arg = __TLBI_VADDR(addr, asid);
#endif

	if (alternative_has_cap_unlikely(ARM64_HAS_ARMv8_4_TTL) && level <= 3) {
		u64 ttl = level | (get_trans_granule() << 2);

		FIELD_MODIFY(TLBI_TTL_MASK, &arg, ttl);
	}

	op(arg);
}


>  
> @@ -507,19 +543,33 @@ static __always_inline void ripas2e1is(tlbi_args_t arg)
>  	__tlbi_wrapper(ripas2e1is, arg);
>  }
>  
> -static __always_inline void __tlbi_range(tlbi_op op, u64 addr,
> -					 u16 asid, int scale, int num,
> -					 u32 level, bool lpa2)
> +static __always_inline u64 __tlbi_range_args_encode_comm(u16 asid, int scale, int num, u32 level)
>  {
>  	u64 arg = 0;
>  
> -	arg |= FIELD_PREP(TLBIR_BADDR_MASK, addr >> (lpa2 ? 16 : PAGE_SHIFT));
>  	arg |= FIELD_PREP(TLBIR_TTL_MASK, level > 3 ? 0 : level);
>  	arg |= FIELD_PREP(TLBIR_NUM_MASK, num);
>  	arg |= FIELD_PREP(TLBIR_SCALE_MASK, scale);
>  	arg |= FIELD_PREP(TLBIR_TG_MASK, get_trans_granule());
>  	arg |= FIELD_PREP(TLBIR_ASID_MASK, asid);
>  
> +	return arg;
> +}
> +
> +static __always_inline void __tlbi_range(tlbi_op op, u64 addr,
> +					 u16 asid, int scale, int num,
> +					 u32 level, bool lpa2)
> +{
> +#ifdef CONFIG_ARM64_D128
> +	union __u128_halves arg;
> +
> +	arg.low = __tlbi_range_args_encode_comm(asid, scale, num, level);
> +	arg.high = addr >> 12;
> +#else
> +	u64 arg = __tlbi_range_args_encode_comm(asid, scale, num, level);
> +
> +	arg |= FIELD_PREP(TLBIR_BADDR_MASK, addr >> (lpa2 ? 16 : PAGE_SHIFT));
> +#endif
>  	op(arg);
>  }

And you could do the same thing here, keeping a single function with only a
minimal diff.

[...]




More information about the linux-arm-kernel mailing list