[PATCH -v2 1/2] mm: add spurious fault fixing support for huge pmd

Huang Ying ying.huang at linux.alibaba.com
Mon Oct 13 02:20:37 PDT 2025


In the current kernel, there is spurious fault fixing support for pte,
but not for huge pmd because no architectures need it. But in the
next patch in the series, we will change the write protection fault
handling logic on arm64, so that some stale huge pmd entries may
remain in the TLB. These entries need to be flushed via the huge pmd
spurious fault fixing mechanism.

Signed-off-by: Huang Ying <ying.huang at linux.alibaba.com>
Cc: Catalin Marinas <catalin.marinas at arm.com>
Cc: Will Deacon <will at kernel.org>
Cc: Andrew Morton <akpm at linux-foundation.org>
Cc: David Hildenbrand <david at redhat.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes at oracle.com>
Cc: Vlastimil Babka <vbabka at suse.cz>
Cc: Zi Yan <ziy at nvidia.com>
Cc: Baolin Wang <baolin.wang at linux.alibaba.com>
Cc: Ryan Roberts <ryan.roberts at arm.com>
Cc: Yang Shi <yang at os.amperecomputing.com>
Cc: "Christoph Lameter (Ampere)" <cl at gentwo.org>
Cc: Dev Jain <dev.jain at arm.com>
Cc: Barry Song <baohua at kernel.org>
Cc: Anshuman Khandual <anshuman.khandual at arm.com>
Cc: Yicong Yang <yangyicong at hisilicon.com>
Cc: Kefeng Wang <wangkefeng.wang at huawei.com>
Cc: Kevin Brodsky <kevin.brodsky at arm.com>
Cc: Yin Fengwei <fengwei_yin at linux.alibaba.com>
Cc: linux-arm-kernel at lists.infradead.org
Cc: linux-kernel at vger.kernel.org
Cc: linux-mm at kvack.org
---
 include/linux/pgtable.h |  4 ++++
 mm/huge_memory.c        | 22 +++++++++++++++++-----
 mm/internal.h           |  4 ++--
 3 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 32e8457ad535..341622ec80e4 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1232,6 +1232,10 @@ static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
 #define flush_tlb_fix_spurious_fault(vma, address, ptep) flush_tlb_page(vma, address)
 #endif
 
+#ifndef flush_tlb_fix_spurious_fault_pmd
+#define flush_tlb_fix_spurious_fault_pmd(vma, address, ptep) do { } while (0)
+#endif
+
 /*
  * When walking page tables, get the address of the next boundary,
  * or the end address of the range if that comes earlier.  Although no
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1b81680b4225..8533457c52b7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1641,17 +1641,22 @@ vm_fault_t vmf_insert_folio_pud(struct vm_fault *vmf, struct folio *folio,
 EXPORT_SYMBOL_GPL(vmf_insert_folio_pud);
 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
 
-void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
-	       pmd_t *pmd, bool write)
+/* Returns whether the PMD entry is changed */
+int touch_pmd(struct vm_area_struct *vma, unsigned long addr,
+	      pmd_t *pmd, bool write)
 {
+	int changed;
 	pmd_t _pmd;
 
 	_pmd = pmd_mkyoung(*pmd);
 	if (write)
 		_pmd = pmd_mkdirty(_pmd);
-	if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK,
-				  pmd, _pmd, write))
+	changed = pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK,
+					pmd, _pmd, write);
+	if (changed)
 		update_mmu_cache_pmd(vma, addr, pmd);
+
+	return changed;
 }
 
 int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -1849,7 +1854,14 @@ void huge_pmd_set_accessed(struct vm_fault *vmf)
 	if (unlikely(!pmd_same(*vmf->pmd, vmf->orig_pmd)))
 		goto unlock;
 
-	touch_pmd(vmf->vma, vmf->address, vmf->pmd, write);
+	if (!touch_pmd(vmf->vma, vmf->address, vmf->pmd, write)) {
+		/* See corresponding comments in handle_pte_fault(). */
+		if (vmf->flags & FAULT_FLAG_TRIED)
+			goto unlock;
+		if (vmf->flags & FAULT_FLAG_WRITE)
+			flush_tlb_fix_spurious_fault_pmd(vmf->vma, vmf->address,
+							 vmf->pmd);
+	}
 
 unlock:
 	spin_unlock(vmf->ptl);
diff --git a/mm/internal.h b/mm/internal.h
index 1561fc2ff5b8..8b58ab00a7cd 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1402,8 +1402,8 @@ int __must_check try_grab_folio(struct folio *folio, int refs,
  */
 void touch_pud(struct vm_area_struct *vma, unsigned long addr,
 	       pud_t *pud, bool write);
-void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
-	       pmd_t *pmd, bool write);
+int touch_pmd(struct vm_area_struct *vma, unsigned long addr,
+	      pmd_t *pmd, bool write);
 
 /*
  * Parses a string with mem suffixes into its order. Useful to parse kernel
-- 
2.39.5




More information about the linux-arm-kernel mailing list