[PATCH 10/10] arm64: mm: Re-implement the __flush_tlb_range_op macro in C

Ryan Roberts ryan.roberts at arm.com
Mon Jul 14 02:44:10 PDT 2025


On 11/07/2025 17:17, Will Deacon wrote:
> The __flush_tlb_range_op() macro is horrible and has been a previous

Amen to that!

> source of bugs thanks to multiple expansions of its arguments (see
> commit f7edb07ad7c6 ("Fix mmu notifiers for range-based invalidates")).
> 
> Rewrite the thing in C.

This looks much better! Do we know it's definitely valuable to have all these
functions inline though? They have grown a lot over the years. I wonder how much
code size cost they have, vs the performance they actually save?

Perhaps it's worth considering if at least these should move to a c file?

__flush_tlb_range_nosync
flush_tlb_kernel_range


FYI, I've got a patch that uses local tlbi when we can prove only the local cpu
has seen the old pgtable entries that we are trying to flush. These changes to
use enum tlbi_op make that patch quite a bit neater. I'll post that as an RFC at
some point, as I expect it will need quite a bit of discussion.

Thanks,
Ryan


> 
> Suggested-by: Linus Torvalds <torvalds at linux-foundation.org>
> Signed-off-by: Will Deacon <will at kernel.org>

Reviewed-by: Ryan Roberts <ryan.roberts at arm.com>

> ---
>  arch/arm64/include/asm/tlbflush.h | 63 +++++++++++++++++--------------
>  1 file changed, 34 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
> index 2541863721af..ee69efdc12ab 100644
> --- a/arch/arm64/include/asm/tlbflush.h
> +++ b/arch/arm64/include/asm/tlbflush.h
> @@ -376,12 +376,12 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
>  /*
>   * __flush_tlb_range_op - Perform TLBI operation upon a range
>   *
> - * @op:	TLBI instruction that operates on a range (has 'r' prefix)
> + * @op:		TLBI instruction that operates on a range
>   * @start:	The start address of the range
>   * @pages:	Range as the number of pages from 'start'
>   * @stride:	Flush granularity
>   * @asid:	The ASID of the task (0 for IPA instructions)
> - * @tlb_level:	Translation Table level hint, if known
> + * @level:	Translation Table level hint, if known
>   * @lpa2:	If 'true', the lpa2 scheme is used as set out below
>   *
>   * When the CPU does not support TLB range operations, flush the TLB
> @@ -439,33 +439,38 @@ static __always_inline void __tlbi_range(const enum tlbi_op op, u64 addr,
>  #undef ___GEN_TLBI_OP_CASE
>  #undef __GEN_TLBI_OP_CASE
>  
> -#define __flush_tlb_range_op(op, start, pages, stride,			\
> -				asid, tlb_level, lpa2)			\
> -do {									\
> -	typeof(start) __flush_start = start;				\
> -	typeof(pages) __flush_pages = pages;				\
> -	int num = 0;							\
> -	int scale = 3;							\
> -									\
> -	while (__flush_pages > 0) {					\
> -		if (!system_supports_tlb_range() ||			\
> -		    __flush_pages == 1 ||				\
> -		    (lpa2 && __flush_start != ALIGN(__flush_start, SZ_64K))) {	\
> -			__tlbi_level_asid(op, __flush_start, tlb_level, asid);	\
> -			__flush_start += stride;			\
> -			__flush_pages -= stride >> PAGE_SHIFT;		\
> -			continue;					\
> -		}							\
> -									\
> -		num = __TLBI_RANGE_NUM(__flush_pages, scale);		\
> -		if (num >= 0) {						\
> -			__tlbi_range(op, __flush_start, asid, scale, num, tlb_level, lpa2); \
> -			__flush_start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
> -			__flush_pages -= __TLBI_RANGE_PAGES(num, scale);\
> -		}							\
> -		scale--;						\
> -	}								\
> -} while (0)
> +static __always_inline void __flush_tlb_range_op(const enum tlbi_op op,
> +						 u64 start, size_t pages,
> +						 u64 stride, u16 asid,
> +						 u32 level, bool lpa2)
> +{
> +	u64 addr = start, end = start + pages * PAGE_SIZE;
> +	int scale = 3;
> +
> +	while (addr != end) {
> +		int num;
> +
> +		pages = (end - addr) >> PAGE_SHIFT;
> +
> +		if (!system_supports_tlb_range() || pages == 1)
> +			goto invalidate_one;
> +
> +		if (lpa2 && !IS_ALIGNED(addr, SZ_64K))
> +			goto invalidate_one;
> +
> +		num = __TLBI_RANGE_NUM(pages, scale);
> +		if (num >= 0) {
> +			__tlbi_range(op, addr, asid, scale, num, level, lpa2);
> +			addr += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
> +		}
> +
> +		scale--;
> +		continue;
> +invalidate_one:
> +		__tlbi_level_asid(op, addr, level, asid);
> +		addr += stride;
> +	}
> +}
>  
>  #define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
>  	__flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, kvm_lpa2_is_enabled());




More information about the linux-arm-kernel mailing list