[PATCH] RISC-V: add uniprocessor flush_tlb_range() support

Yunhui Cui cuiyunhui at bytedance.com
Wed Jan 24 22:20:44 PST 2024


Add support for flush_tlb_range() to improve TLB performance for
UP systems. In order to avoid the mutual inclusion of tlbflush.h
and hugetlb.h, the UP part is also implemented in tlbflush.c.

Signed-off-by: Yunhui Cui <cuiyunhui at bytedance.com>
---
 arch/riscv/include/asm/tlbflush.h |  61 ++++++----
 arch/riscv/mm/Makefile            |   2 +-
 arch/riscv/mm/tlbflush.c          | 195 ++++++++++++++++++------------
 3 files changed, 156 insertions(+), 102 deletions(-)

diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 928f096dca21..426f043fb450 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -10,12 +10,21 @@
 #include <linux/mm_types.h>
 #include <asm/smp.h>
 #include <asm/errata_list.h>
+#include <asm/tlbbatch.h>
 
 #define FLUSH_TLB_MAX_SIZE      ((unsigned long)-1)
 #define FLUSH_TLB_NO_ASID       ((unsigned long)-1)
 
 #ifdef CONFIG_MMU
 extern unsigned long asid_mask;
+DECLARE_STATIC_KEY_FALSE(use_asid_allocator);
+
+struct flush_tlb_range_data {
+	unsigned long asid;
+	unsigned long start;
+	unsigned long size;
+	unsigned long stride;
+};
 
 static inline void local_flush_tlb_all(void)
 {
@@ -27,12 +36,40 @@ static inline void local_flush_tlb_page(unsigned long addr)
 {
 	ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory"));
 }
+
+static inline void local_flush_tlb_all_asid(unsigned long asid)
+{
+	if (asid != FLUSH_TLB_NO_ASID)
+		__asm__ __volatile__ ("sfence.vma x0, %0"
+				:
+				: "r" (asid)
+				: "memory");
+	else
+		local_flush_tlb_all();
+}
+
+static inline void local_flush_tlb_page_asid(unsigned long addr,
+		unsigned long asid)
+{
+	if (asid != FLUSH_TLB_NO_ASID)
+		__asm__ __volatile__ ("sfence.vma %0, %1"
+				:
+				: "r" (addr), "r" (asid)
+				: "memory");
+	else
+		local_flush_tlb_page(addr);
+}
+
+static inline unsigned long get_mm_asid(struct mm_struct *mm)
+{
+	return static_branch_unlikely(&use_asid_allocator) ?
+			atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID;
+}
 #else /* CONFIG_MMU */
 #define local_flush_tlb_all()			do { } while (0)
 #define local_flush_tlb_page(addr)		do { } while (0)
 #endif /* CONFIG_MMU */
 
-#if defined(CONFIG_SMP) && defined(CONFIG_MMU)
 void flush_tlb_all(void);
 void flush_tlb_mm(struct mm_struct *mm);
 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
@@ -55,26 +92,4 @@ void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
 void arch_flush_tlb_batched_pending(struct mm_struct *mm);
 void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
 
-#else /* CONFIG_SMP && CONFIG_MMU */
-
-#define flush_tlb_all() local_flush_tlb_all()
-#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr)
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
-		unsigned long start, unsigned long end)
-{
-	local_flush_tlb_all();
-}
-
-/* Flush a range of kernel pages */
-static inline void flush_tlb_kernel_range(unsigned long start,
-	unsigned long end)
-{
-	local_flush_tlb_all();
-}
-
-#define flush_tlb_mm(mm) flush_tlb_all()
-#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
-#endif /* !CONFIG_SMP || !CONFIG_MMU */
-
 #endif /* _ASM_RISCV_TLBFLUSH_H */
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index 2c869f8026a8..7c6c4c858a6b 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -19,7 +19,7 @@ obj-y += context.o
 obj-y += pmem.o
 
 ifeq ($(CONFIG_MMU),y)
-obj-$(CONFIG_SMP) += tlbflush.o
+obj-y += tlbflush.o
 endif
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 8d12b26f5ac3..4765603fa08a 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -6,28 +6,36 @@
 #include <linux/hugetlb.h>
 #include <asm/sbi.h>
 #include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
 
-static inline void local_flush_tlb_all_asid(unsigned long asid)
+static unsigned long get_stride_size(struct vm_area_struct *vma)
 {
-	if (asid != FLUSH_TLB_NO_ASID)
-		__asm__ __volatile__ ("sfence.vma x0, %0"
-				:
-				: "r" (asid)
-				: "memory");
-	else
-		local_flush_tlb_all();
-}
+	unsigned long stride_size;
 
-static inline void local_flush_tlb_page_asid(unsigned long addr,
-		unsigned long asid)
-{
-	if (asid != FLUSH_TLB_NO_ASID)
-		__asm__ __volatile__ ("sfence.vma %0, %1"
-				:
-				: "r" (addr), "r" (asid)
-				: "memory");
-	else
-		local_flush_tlb_page(addr);
+	if (!is_vm_hugetlb_page(vma))
+		return PAGE_SIZE;
+
+	stride_size = huge_page_size(hstate_vma(vma));
+
+	/*
+	 * As stated in the privileged specification, every PTE in a
+	 * NAPOT region must be invalidated, so reset the stride in that
+	 * case.
+	 */
+	if (has_svnapot()) {
+		if (stride_size >= PGDIR_SIZE)
+			stride_size = PGDIR_SIZE;
+		else if (stride_size >= P4D_SIZE)
+			stride_size = P4D_SIZE;
+		else if (stride_size >= PUD_SIZE)
+			stride_size = PUD_SIZE;
+		else if (stride_size >= PMD_SIZE)
+			stride_size = PMD_SIZE;
+		else
+			stride_size = PAGE_SIZE;
+	}
+
+	return stride_size;
 }
 
 /*
@@ -66,31 +74,12 @@ static inline void local_flush_tlb_range_asid(unsigned long start,
 		local_flush_tlb_range_threshold_asid(start, size, stride, asid);
 }
 
-void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
-{
-	local_flush_tlb_range_asid(start, end, PAGE_SIZE, FLUSH_TLB_NO_ASID);
-}
-
+#ifdef CONFIG_SMP
 static void __ipi_flush_tlb_all(void *info)
 {
 	local_flush_tlb_all();
 }
 
-void flush_tlb_all(void)
-{
-	if (riscv_use_ipi_for_rfence())
-		on_each_cpu(__ipi_flush_tlb_all, NULL, 1);
-	else
-		sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID);
-}
-
-struct flush_tlb_range_data {
-	unsigned long asid;
-	unsigned long start;
-	unsigned long size;
-	unsigned long stride;
-};
-
 static void __ipi_flush_tlb_range_asid(void *info)
 {
 	struct flush_tlb_range_data *d = info;
@@ -138,10 +127,18 @@ static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid,
 		put_cpu();
 }
 
-static inline unsigned long get_mm_asid(struct mm_struct *mm)
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
 {
-	return static_branch_unlikely(&use_asid_allocator) ?
-			atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID;
+	__flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
+			  addr, PAGE_SIZE, PAGE_SIZE);
+}
+
+void flush_tlb_all(void)
+{
+	if (riscv_use_ipi_for_rfence())
+		on_each_cpu(__ipi_flush_tlb_all, NULL, 1);
+	else
+		sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID);
 }
 
 void flush_tlb_mm(struct mm_struct *mm)
@@ -158,41 +155,12 @@ void flush_tlb_mm_range(struct mm_struct *mm,
 			  start, end - start, page_size);
 }
 
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
-{
-	__flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
-			  addr, PAGE_SIZE, PAGE_SIZE);
-}
-
 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 		     unsigned long end)
 {
 	unsigned long stride_size;
 
-	if (!is_vm_hugetlb_page(vma)) {
-		stride_size = PAGE_SIZE;
-	} else {
-		stride_size = huge_page_size(hstate_vma(vma));
-
-		/*
-		 * As stated in the privileged specification, every PTE in a
-		 * NAPOT region must be invalidated, so reset the stride in that
-		 * case.
-		 */
-		if (has_svnapot()) {
-			if (stride_size >= PGDIR_SIZE)
-				stride_size = PGDIR_SIZE;
-			else if (stride_size >= P4D_SIZE)
-				stride_size = P4D_SIZE;
-			else if (stride_size >= PUD_SIZE)
-				stride_size = PUD_SIZE;
-			else if (stride_size >= PMD_SIZE)
-				stride_size = PMD_SIZE;
-			else
-				stride_size = PAGE_SIZE;
-		}
-	}
-
+	stride_size = get_stride_size(vma);
 	__flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
 			  start, end - start, stride_size);
 }
@@ -203,6 +171,12 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 			  start, end - start, PAGE_SIZE);
 }
 
+void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+{
+	__flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0,
+			  FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			unsigned long end)
@@ -212,6 +186,77 @@ void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
 }
 #endif
 
+#else
+static void __flush_tlb_range_up(struct mm_struct *mm, unsigned long start,
+				 unsigned long size, unsigned long stride)
+{
+	unsigned long asid = FLUSH_TLB_NO_ASID;
+
+	if (mm)
+		asid = get_mm_asid(mm);
+
+	local_flush_tlb_range_asid(start, size, stride, asid);
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
+{
+	local_flush_tlb_page(addr);
+}
+
+void flush_tlb_all(void)
+{
+	local_flush_tlb_all();
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	__flush_tlb_range_up(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
+}
+
+void flush_tlb_mm_range(struct mm_struct *mm,
+			unsigned long start, unsigned long end,
+			unsigned int page_size)
+{
+	__flush_tlb_range_up(mm, start, end - start, page_size);
+}
+
+void flush_tlb_range(struct vm_area_struct *vma,
+		unsigned long start, unsigned long end)
+{
+	unsigned long stride_size;
+
+	stride_size = get_stride_size(vma);
+	__flush_tlb_range_up(vma->vm_mm, start, end - start, stride_size);
+}
+
+/* Flush a range of kernel pages */
+void flush_tlb_kernel_range(unsigned long start,
+	unsigned long end)
+{
+	__flush_tlb_range_up(NULL, start, end - start, PAGE_SIZE);
+}
+
+void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+{
+	__flush_tlb_range_up(NULL, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			unsigned long end)
+{
+	__flush_tlb_range_up(vma->vm_mm, start, end - start, PMD_SIZE);
+}
+#endif
+
+#endif
+
+void local_flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	local_flush_tlb_range_asid(start, end - start, PAGE_SIZE,
+				   FLUSH_TLB_NO_ASID);
+}
+
 bool arch_tlbbatch_should_defer(struct mm_struct *mm)
 {
 	return true;
@@ -228,9 +273,3 @@ void arch_flush_tlb_batched_pending(struct mm_struct *mm)
 {
 	flush_tlb_mm(mm);
 }
-
-void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
-{
-	__flush_tlb_range(&batch->cpumask, FLUSH_TLB_NO_ASID, 0,
-			  FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
-}
-- 
2.20.1




More information about the linux-riscv mailing list