[RFC PATCH v2 12/21] riscv: mm: Reimplement tlb flush function

Xu Lu luxu.kernel at bytedance.com
Thu Dec 5 02:37:20 PST 2024


When tlb flushing a page correponding to a certain address, CPU actually
only flushes tlb entries of the first 4K hardware page. This commit
reimplements tlb flushing function to flush all tlb entries of hardware pages
in the same software page.

Signed-off-by: Xu Lu <luxu.kernel at bytedance.com>
---
 arch/riscv/include/asm/pgtable.h  |  9 ++++++---
 arch/riscv/include/asm/tlbflush.h | 26 ++++++++++++++++++++------
 arch/riscv/mm/fault.c             | 13 +++++++++----
 arch/riscv/mm/init.c              |  2 +-
 arch/riscv/mm/tlbflush.c          | 31 +++++++++++++++++++++----------
 5 files changed, 57 insertions(+), 24 deletions(-)

diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index c0f7442c8a9e..9fa16c0c20aa 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -701,7 +701,7 @@ static inline void update_mmu_cache_range(struct vm_fault *vmf,
 	 * the extra traps reduce performance.  So, eagerly SFENCE.VMA.
 	 */
 	while (nr--)
-		local_flush_tlb_page(address + nr * PAGE_SIZE);
+		local_flush_tlb_page(address + nr * PAGE_SIZE, PAGE_SIZE);
 
 svvptc:;
 	/*
@@ -719,9 +719,12 @@ svvptc:;
 static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
 		unsigned long address, pmd_t *pmdp)
 {
-	pte_t *ptep = (pte_t *)pmdp;
+	asm goto(ALTERNATIVE("nop", "j %l[svvptc]", 0, RISCV_ISA_EXT_SVVPTC, 1)
+		 : : : : svvptc);
 
-	update_mmu_cache(vma, address, ptep);
+	local_flush_tlb_page(address, PMD_SIZE);
+
+svvptc:;
 }
 
 #define __HAVE_ARCH_PTE_SAME
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 72e559934952..25cc39ab84d5 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -29,18 +29,32 @@ static inline void local_flush_tlb_all_asid(unsigned long asid)
 }
 
 /* Flush one page from local TLB */
-static inline void local_flush_tlb_page(unsigned long addr)
+static inline void local_flush_tlb_page(unsigned long addr,
+					unsigned long page_size)
 {
-	ALT_SFENCE_VMA_ADDR(addr);
+	unsigned int i;
+	unsigned long hw_page_num = 1 << (PAGE_SHIFT - HW_PAGE_SHIFT);
+	unsigned long hw_page_size = page_size >> (PAGE_SHIFT - HW_PAGE_SHIFT);
+
+	for (i = 0; i < hw_page_num; i++, addr += hw_page_size)
+		ALT_SFENCE_VMA_ADDR(addr);
 }
 
 static inline void local_flush_tlb_page_asid(unsigned long addr,
+					     unsigned long page_size,
 					     unsigned long asid)
 {
-	if (asid != FLUSH_TLB_NO_ASID)
-		ALT_SFENCE_VMA_ADDR_ASID(addr, asid);
-	else
-		local_flush_tlb_page(addr);
+	unsigned int i;
+	unsigned long hw_page_num, hw_page_size;
+
+	if (asid != FLUSH_TLB_NO_ASID) {
+		hw_page_num = 1 << (PAGE_SHIFT - HW_PAGE_SHIFT);
+		hw_page_size = page_size >> (PAGE_SHIFT - HW_PAGE_SHIFT);
+
+		for (i = 0; i < hw_page_num; i++, addr += hw_page_size)
+			ALT_SFENCE_VMA_ADDR_ASID(addr, asid);
+	} else
+		local_flush_tlb_page(addr, page_size);
 }
 
 void flush_tlb_all(void);
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 4772152be0f9..94524e5adc0b 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -118,7 +118,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
 	pmd_t *pmd_k;
 	pte_t *pte_k;
 	int index;
-	unsigned long pfn;
+	unsigned long pfn, page_size;
 
 	/* User mode accesses just cause a SIGSEGV */
 	if (user_mode(regs))
@@ -154,8 +154,10 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
 		no_context(regs, addr);
 		return;
 	}
-	if (pud_leaf(pudp_get(pud_k)))
+	if (pud_leaf(pudp_get(pud_k))) {
+		page_size = PUD_SIZE;
 		goto flush_tlb;
+	}
 
 	/*
 	 * Since the vmalloc area is global, it is unnecessary
@@ -166,8 +168,10 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
 		no_context(regs, addr);
 		return;
 	}
-	if (pmd_leaf(pmdp_get(pmd_k)))
+	if (pmd_leaf(pmdp_get(pmd_k))) {
+		page_size = PMD_SIZE;
 		goto flush_tlb;
+	}
 
 	/*
 	 * Make sure the actual PTE exists as well to
@@ -180,6 +184,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
 		no_context(regs, addr);
 		return;
 	}
+	page_size = PAGE_SIZE;
 
 	/*
 	 * The kernel assumes that TLBs don't cache invalid
@@ -188,7 +193,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
 	 * necessary even after writing invalid entries.
 	 */
 flush_tlb:
-	local_flush_tlb_page(addr);
+	local_flush_tlb_page(addr, page_size);
 }
 
 static inline bool access_error(unsigned long cause, struct vm_area_struct *vma)
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index f9334aab45a6..678b892b4ed8 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -356,7 +356,7 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
 		set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot));
 	else
 		pte_clear(&init_mm, addr, ptep);
-	local_flush_tlb_page(addr);
+	local_flush_tlb_page(addr, PAGE_SIZE);
 }
 
 static inline pte_t *__init get_pte_virt_early(phys_addr_t pa)
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 9b6e86ce3867..d5036f2a8244 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -27,7 +27,7 @@ static void local_flush_tlb_range_threshold_asid(unsigned long start,
 	}
 
 	for (i = 0; i < nr_ptes_in_range; ++i) {
-		local_flush_tlb_page_asid(start, asid);
+		local_flush_tlb_page_asid(start, stride, asid);
 		start += stride;
 	}
 }
@@ -36,7 +36,7 @@ static inline void local_flush_tlb_range_asid(unsigned long start,
 		unsigned long size, unsigned long stride, unsigned long asid)
 {
 	if (size <= stride)
-		local_flush_tlb_page_asid(start, asid);
+		local_flush_tlb_page_asid(start, stride, asid);
 	else if (size == FLUSH_TLB_MAX_SIZE)
 		local_flush_tlb_all_asid(asid);
 	else
@@ -126,14 +126,7 @@ void flush_tlb_mm_range(struct mm_struct *mm,
 			  start, end - start, page_size);
 }
 
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
-{
-	__flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
-			  addr, PAGE_SIZE, PAGE_SIZE);
-}
-
-void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
-		     unsigned long end)
+static inline unsigned long local_flush_tlb_page_size(struct vm_area_struct *vma)
 {
 	unsigned long stride_size;
 
@@ -161,6 +154,24 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 		}
 	}
 
+	return stride_size;
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
+{
+	unsigned long page_size;
+
+	page_size = local_flush_tlb_page_size(vma);
+	__flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
+			  addr, page_size, page_size);
+}
+
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+		     unsigned long end)
+{
+	unsigned long stride_size;
+
+	stride_size = local_flush_tlb_page_size(vma);
 	__flush_tlb_range(mm_cpumask(vma->vm_mm), get_mm_asid(vma->vm_mm),
 			  start, end - start, stride_size);
 }
-- 
2.20.1




More information about the linux-riscv mailing list