[PATCH 03/10] arm64: mm: Implicitly invalidate user ASID based on TLBI operation

Ryan Roberts ryan.roberts at arm.com
Mon Jul 14 02:46:42 PDT 2025


On 14/07/2025 09:44, Ryan Roberts wrote:
> On 11/07/2025 17:17, Will Deacon wrote:
>> When kpti is enabled, separate ASIDs are used for userspace and
>> kernelspace, requiring ASID-qualified TLB invalidation by virtual
>> address to invalidate both of them.
>>
>> Push the logic for invalidating the two ASIDs down into the low-level
>> __tlbi_level_op() function based on the TLBI operation and remove the
>> burden from the caller to handle the kpti-specific behaviour.
>>
>> Signed-off-by: Will Deacon <will at kernel.org>
>> ---
>>  arch/arm64/include/asm/tlbflush.h | 45 ++++++++++++++++++-------------
>>  1 file changed, 26 insertions(+), 19 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
>> index 4408aeebf4d5..08e509f37b28 100644
>> --- a/arch/arm64/include/asm/tlbflush.h
>> +++ b/arch/arm64/include/asm/tlbflush.h
>> @@ -115,17 +115,25 @@ enum tlbi_op {
>>  
>>  #define TLBI_TTL_UNKNOWN	INT_MAX
>>  
>> -#define __GEN_TLBI_OP_CASE(op)						\
>> +#define ___GEN_TLBI_OP_CASE(op)						\
>>  	case op:							\
>> -		__tlbi(op, arg);					\
>> +		__tlbi(op, arg)
>> +
>> +#define __GEN_TLBI_OP_ASID_CASE(op)					\
>> +	___GEN_TLBI_OP_CASE(op);					\
>> +		__tlbi_user(op, arg);					\
>> +		break
>> +
>> +#define __GEN_TLBI_OP_CASE(op)						\
>> +	___GEN_TLBI_OP_CASE(op);					\
>>  		break
>>  
>>  static __always_inline void __tlbi_level_op(const enum tlbi_op op, u64 arg)
>>  {
>>  	switch (op) {
>> -	__GEN_TLBI_OP_CASE(vae1is);
>> +	__GEN_TLBI_OP_ASID_CASE(vae1is);
>>  	__GEN_TLBI_OP_CASE(vae2is);
>> -	__GEN_TLBI_OP_CASE(vale1is);
>> +	__GEN_TLBI_OP_ASID_CASE(vale1is);
>>  	__GEN_TLBI_OP_CASE(vale2is);
>>  	__GEN_TLBI_OP_CASE(vaale1is);
>>  	__GEN_TLBI_OP_CASE(ipas2e1);
>> @@ -134,7 +142,8 @@ static __always_inline void __tlbi_level_op(const enum tlbi_op op, u64 arg)
>>  		BUILD_BUG();
>>  	}
>>  }
>> -#undef __GEN_TLBI_OP_CASE
>> +#undef __GEN_TLBI_OP_ASID_CASE
>> +#undef ___GEN_TLBI_OP_CASE
>>  
>>  #define __tlbi_level(op, addr, level) do {				\
>>  	u64 arg = addr;							\
>> @@ -150,11 +159,6 @@ static __always_inline void __tlbi_level_op(const enum tlbi_op op, u64 arg)
>>  	__tlbi_level_op(op, arg);					\
>>  } while(0)
>>  
>> -#define __tlbi_user_level(op, arg, level) do {				\
>> -	if (arm64_kernel_unmapped_at_el0())				\
>> -		__tlbi_level(op, (arg | USER_ASID_FLAG), level);	\
>> -} while (0)
>> -
>>  /*
>>   * This macro creates a properly formatted VA operand for the TLB RANGE. The
>>   * value bit assignments are:
>> @@ -418,22 +422,28 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
>>   *    operations can only span an even number of pages. We save this for last to
>>   *    ensure 64KB start alignment is maintained for the LPA2 case.
>>   */
>> -#define __GEN_TLBI_OP_CASE(op)						\
>> +#define ___GEN_TLBI_OP_CASE(op)						\
>>  	case op:							\
>> -		__tlbi(r ## op, arg);					\
>> +		__tlbi(r ## op, arg)
>> +
>> +#define __GEN_TLBI_OP_ASID_CASE(op)					\
>> +	___GEN_TLBI_OP_CASE(op);					\
>> +		__tlbi_user(r ## op, arg);				\
>>  		break
>>  
>>  static __always_inline void __tlbi_range(const enum tlbi_op op, u64 arg)
>>  {
>>  	switch (op) {
>> -	__GEN_TLBI_OP_CASE(vae1is);
>> -	__GEN_TLBI_OP_CASE(vale1is);
>> +	__GEN_TLBI_OP_ASID_CASE(vae1is);
>> +	__GEN_TLBI_OP_ASID_CASE(vale1is);
>>  	__GEN_TLBI_OP_CASE(vaale1is);
>>  	__GEN_TLBI_OP_CASE(ipas2e1is);
> 
> Bug? This 2 underscore version is still defined from the level case above. So
> this is no longer issuing a range-based tlbi? (i.e. you're no longer prepending
> the "r" here.

Do thse __GEN_TLBI_*() macros really help that much? I think I'd prefer to see
the case statement just written out long hand. It will make things much clearer
for not that many more lines, and if I'm right about that bug, would have
prevented it.

Thanks,
Ryan


> 
>>  	default:
>>  		BUILD_BUG();
>>  	}
>>  }
>> +#undef __GEN_TLBI_OP_ASID_CASE
>> +#undef ___GEN_TLBI_OP_CASE
>>  #undef __GEN_TLBI_OP_CASE
>>  
>>  #define __flush_tlb_range_op(op, start, pages, stride,			\
>> @@ -452,8 +462,6 @@ do {									\
>>  		    (lpa2 && __flush_start != ALIGN(__flush_start, SZ_64K))) {	\
>>  			addr = __TLBI_VADDR(__flush_start, asid);	\
>>  			__tlbi_level(op, addr, tlb_level);		\
>> -			if (tlbi_user)					\
>> -				__tlbi_user_level(op, addr, tlb_level);	\
>>  			__flush_start += stride;			\
>>  			__flush_pages -= stride >> PAGE_SHIFT;		\
>>  			continue;					\
>> @@ -464,8 +472,6 @@ do {									\
>>  			addr = __TLBI_VADDR_RANGE(__flush_start >> shift, asid, \
>>  						scale, num, tlb_level);	\
>>  			__tlbi_range(op, addr);				\
>> -			if (tlbi_user)					\
>> -				__tlbi_user(r##op, addr);		\
>>  			__flush_start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
>>  			__flush_pages -= __TLBI_RANGE_PAGES(num, scale);\
>>  		}							\
>> @@ -584,6 +590,7 @@ static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *b
>>  {
>>  	__flush_tlb_range_nosync(mm, start, end, PAGE_SIZE, true, 3);
>>  }
>> -#endif
>>  
>> +#undef __tlbi_user
>> +#endif
>>  #endif
> 




More information about the linux-arm-kernel mailing list