[PATCH v6 05/11] arm64: tlb: Refactor the core flush algorithm of __flush_tlb_range
Shaoqin Huang
shahuang at redhat.com
Mon Jul 17 23:21:30 PDT 2023
On 7/15/23 08:53, Raghavendra Rao Ananta wrote:
> Currently, the core TLB flush functionality of __flush_tlb_range()
> hardcodes vae1is (and variants) for the flush operation. In the
> upcoming patches, the KVM code reuses this core algorithm with
> ipas2e1is for range based TLB invalidations based on the IPA.
> Hence, extract the core flush functionality of __flush_tlb_range()
> into its own macro that accepts an 'op' argument to pass any
> TLBI operation, such that other callers (KVM) can benefit.
>
> No functional changes intended.
>
> Signed-off-by: Raghavendra Rao Ananta <rananta at google.com>
> Reviewed-by: Catalin Marinas <catalin.marinas at arm.com>
> Reviewed-by: Gavin Shan <gshan at redhat.com>
Reviewed-by: Shaoqin Huang <shahuang at redhat.com>
> ---
> arch/arm64/include/asm/tlbflush.h | 109 +++++++++++++++---------------
> 1 file changed, 56 insertions(+), 53 deletions(-)
>
> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
> index 412a3b9a3c25..f7fafba25add 100644
> --- a/arch/arm64/include/asm/tlbflush.h
> +++ b/arch/arm64/include/asm/tlbflush.h
> @@ -278,14 +278,62 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
> */
> #define MAX_TLBI_OPS PTRS_PER_PTE
>
> +/* When the CPU does not support TLB range operations, flush the TLB
> + * entries one by one at the granularity of 'stride'. If the TLB
> + * range ops are supported, then:
> + *
> + * 1. If 'pages' is odd, flush the first page through non-range
> + * operations;
> + *
> + * 2. For remaining pages: the minimum range granularity is decided
> + * by 'scale', so multiple range TLBI operations may be required.
> + * Start from scale = 0, flush the corresponding number of pages
> + * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
> + * until no pages left.
> + *
> + * Note that certain ranges can be represented by either num = 31 and
> + * scale or num = 0 and scale + 1. The loop below favours the latter
> + * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
> + */
> +#define __flush_tlb_range_op(op, start, pages, stride, \
> + asid, tlb_level, tlbi_user) \
> +do { \
> + int num = 0; \
> + int scale = 0; \
> + unsigned long addr; \
> + \
> + while (pages > 0) { \
> + if (!system_supports_tlb_range() || \
> + pages % 2 == 1) { \
> + addr = __TLBI_VADDR(start, asid); \
> + __tlbi_level(op, addr, tlb_level); \
> + if (tlbi_user) \
> + __tlbi_user_level(op, addr, tlb_level); \
> + start += stride; \
> + pages -= stride >> PAGE_SHIFT; \
> + continue; \
> + } \
> + \
> + num = __TLBI_RANGE_NUM(pages, scale); \
> + if (num >= 0) { \
> + addr = __TLBI_VADDR_RANGE(start, asid, scale, \
> + num, tlb_level); \
> + __tlbi(r##op, addr); \
> + if (tlbi_user) \
> + __tlbi_user(r##op, addr); \
> + start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
> + pages -= __TLBI_RANGE_PAGES(num, scale); \
> + } \
> + scale++; \
> + } \
> +} while (0)
> +
> static inline void __flush_tlb_range(struct vm_area_struct *vma,
> unsigned long start, unsigned long end,
> unsigned long stride, bool last_level,
> int tlb_level)
> {
> - int num = 0;
> - int scale = 0;
> - unsigned long asid, addr, pages;
> + unsigned long asid, pages;
>
> start = round_down(start, stride);
> end = round_up(end, stride);
> @@ -307,56 +355,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
> dsb(ishst);
> asid = ASID(vma->vm_mm);
>
> - /*
> - * When the CPU does not support TLB range operations, flush the TLB
> - * entries one by one at the granularity of 'stride'. If the TLB
> - * range ops are supported, then:
> - *
> - * 1. If 'pages' is odd, flush the first page through non-range
> - * operations;
> - *
> - * 2. For remaining pages: the minimum range granularity is decided
> - * by 'scale', so multiple range TLBI operations may be required.
> - * Start from scale = 0, flush the corresponding number of pages
> - * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
> - * until no pages left.
> - *
> - * Note that certain ranges can be represented by either num = 31 and
> - * scale or num = 0 and scale + 1. The loop below favours the latter
> - * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
> - */
> - while (pages > 0) {
> - if (!system_supports_tlb_range() ||
> - pages % 2 == 1) {
> - addr = __TLBI_VADDR(start, asid);
> - if (last_level) {
> - __tlbi_level(vale1is, addr, tlb_level);
> - __tlbi_user_level(vale1is, addr, tlb_level);
> - } else {
> - __tlbi_level(vae1is, addr, tlb_level);
> - __tlbi_user_level(vae1is, addr, tlb_level);
> - }
> - start += stride;
> - pages -= stride >> PAGE_SHIFT;
> - continue;
> - }
> -
> - num = __TLBI_RANGE_NUM(pages, scale);
> - if (num >= 0) {
> - addr = __TLBI_VADDR_RANGE(start, asid, scale,
> - num, tlb_level);
> - if (last_level) {
> - __tlbi(rvale1is, addr);
> - __tlbi_user(rvale1is, addr);
> - } else {
> - __tlbi(rvae1is, addr);
> - __tlbi_user(rvae1is, addr);
> - }
> - start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
> - pages -= __TLBI_RANGE_PAGES(num, scale);
> - }
> - scale++;
> - }
> + if (last_level)
> + __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
> + else
> + __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
> +
> dsb(ish);
> }
>
--
Shaoqin
More information about the linux-arm-kernel
mailing list