[PATCH 5/7] riscv: add contiguous PTE range clearing helpers
Yunhui Cui
cuiyunhui at bytedance.com
Tue Apr 21 02:24:55 PDT 2026
Add Svnapot-aware implementations of clear_full_ptes() and
get_and_clear_full_ptes() so full PTE batches can be cleared without
losing the required unfold semantics for NAPOT mappings.
Signed-off-by: Yunhui Cui <cuiyunhui at bytedance.com>
---
arch/riscv/include/asm/pgtable.h | 75 ++++++++++++++++++++++++-
arch/riscv/mm/contpte.c | 96 ++++++++++++++++++++++++++++++++
2 files changed, 170 insertions(+), 1 deletion(-)
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 722483d4df37f..3e6516b5a4587 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -657,7 +657,6 @@ static inline void __set_pte_at(struct mm_struct *mm, pte_t *ptep, pte_t pteval)
}
#define PFN_PTE_SHIFT _PAGE_PFN_SHIFT
-
static inline void __set_ptes(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval, unsigned int nr)
{
@@ -764,6 +763,47 @@ __ptep_get_and_clear(struct mm_struct *mm, unsigned long address, pte_t *ptep)
#define __ptep_get_and_clear __ptep_get_and_clear
+static inline void __clear_full_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr, int full)
+{
+ (void)full;
+
+ for (;;) {
+ __ptep_get_and_clear(mm, addr, ptep);
+ if (--nr == 0)
+ break;
+ ptep++;
+ addr += PAGE_SIZE;
+ }
+}
+
+#define __clear_full_ptes __clear_full_ptes
+
+static inline pte_t __get_and_clear_full_ptes(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep,
+ unsigned int nr,
+ int full)
+{
+ pte_t pte, tmp_pte;
+
+ (void)full;
+
+ pte = __ptep_get_and_clear(mm, addr, ptep);
+ while (--nr) {
+ ptep++;
+ addr += PAGE_SIZE;
+ tmp_pte = __ptep_get_and_clear(mm, addr, ptep);
+ if (pte_dirty(tmp_pte))
+ pte = pte_mkdirty(pte);
+ if (pte_young(tmp_pte))
+ pte = pte_mkyoung(pte);
+ }
+
+ return pte;
+}
+
+#define __get_and_clear_full_ptes __get_and_clear_full_ptes
static inline void
__ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
{
@@ -831,6 +871,11 @@ pte_t napotpte_ptep_get(pte_t *ptep, pte_t orig_pte);
pte_t napotpte_ptep_get_lockless(pte_t *ptep);
void napotpte_set_ptes(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, unsigned int nr);
+void napotpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr, int full);
+pte_t napotpte_get_and_clear_full_ptes(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, int full);
void napotpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
unsigned int nr, cydp_t flags);
@@ -933,6 +978,32 @@ static inline void clear_young_dirty_ptes(struct vm_area_struct *vma,
napotpte_clear_young_dirty_ptes(vma, addr, ptep, nr, flags);
}
+#define clear_full_ptes clear_full_ptes
+static inline void clear_full_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr, int full)
+{
+ if (likely(nr == 1)) {
+ napotpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+ __clear_full_ptes(mm, addr, ptep, nr, full);
+ return;
+ }
+
+ napotpte_clear_full_ptes(mm, addr, ptep, nr, full);
+}
+
+#define get_and_clear_full_ptes get_and_clear_full_ptes
+static inline pte_t get_and_clear_full_ptes(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, int full)
+{
+ if (likely(nr == 1)) {
+ napotpte_try_unfold(mm, addr, ptep, __ptep_get(ptep));
+ return __get_and_clear_full_ptes(mm, addr, ptep, nr, full);
+ }
+
+ return napotpte_get_and_clear_full_ptes(mm, addr, ptep, nr, full);
+}
+
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
static inline void ptep_set_wrprotect(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
@@ -989,6 +1060,8 @@ napotpte_ptep_clear_flush_young(struct vm_area_struct *vma,
#define ptep_get_lockless __ptep_get_lockless
#define ptep_get_and_clear __ptep_get_and_clear
#define clear_young_dirty_ptes __clear_young_dirty_ptes
+#define clear_full_ptes __clear_full_ptes
+#define get_and_clear_full_ptes __get_and_clear_full_ptes
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
#define ptep_set_wrprotect __ptep_set_wrprotect
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
diff --git a/arch/riscv/mm/contpte.c b/arch/riscv/mm/contpte.c
index f73af7d9b099a..77c2a4dbd3dda 100644
--- a/arch/riscv/mm/contpte.c
+++ b/arch/riscv/mm/contpte.c
@@ -107,6 +107,38 @@ __napot_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep
return pte;
}
+static void __napot_clear_full_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr)
+{
+ for (;;) {
+ __napot_ptep_get_and_clear(mm, addr, ptep);
+ if (--nr == 0)
+ break;
+ ptep++;
+ addr += PAGE_SIZE;
+ }
+}
+
+static pte_t __napot_get_and_clear_full_ptes(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr)
+{
+ pte_t pte, tmp_pte;
+
+ pte = __napot_ptep_get_and_clear(mm, addr, ptep);
+ while (--nr) {
+ ptep++;
+ addr += PAGE_SIZE;
+ tmp_pte = __napot_ptep_get_and_clear(mm, addr, ptep);
+ if (pte_dirty(tmp_pte))
+ pte = pte_mkdirty(pte);
+ if (pte_young(tmp_pte))
+ pte = pte_mkyoung(pte);
+ }
+
+ return pte;
+}
+
static void napotpte_convert(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t target)
{
@@ -202,6 +234,33 @@ void __napotpte_try_fold(struct mm_struct *mm, unsigned long addr,
}
EXPORT_SYMBOL(__napotpte_try_fold);
+static void napotpte_try_unfold_range(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr)
+{
+ unsigned long next;
+ pte_t pte;
+ unsigned int chunk;
+
+ while (nr) {
+ pte = READ_ONCE(*ptep);
+ if (pte_present_napot(pte)) {
+ __napotpte_try_unfold(mm, addr, ptep, pte);
+ next = napot_align_addr(addr) + napotpte_size();
+ chunk = (next - addr) >> PAGE_SHIFT;
+ } else {
+ chunk = 1;
+ }
+
+ if (chunk > nr)
+ chunk = nr;
+
+ ptep += chunk;
+ addr += chunk * PAGE_SIZE;
+ nr -= chunk;
+ }
+}
+
void __napotpte_try_unfold(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
@@ -349,6 +408,43 @@ void napotpte_set_ptes(struct mm_struct *mm, unsigned long addr,
}
EXPORT_SYMBOL(napotpte_set_ptes);
+void napotpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned int nr, int full)
+{
+ (void)full;
+
+ if (!napot_hw_supported() || !mm_is_user(mm)) {
+ __napot_clear_full_ptes(mm, addr, ptep, nr);
+ return;
+ }
+
+ /*
+ * Unlike arm64 contpte, a Svnapot PTE block stores identical
+ * napot-encoded entries across the whole block rather than per-page
+ * PFNs. Batch zap paths must therefore unfold the whole covered range
+ * so the core MM later sees ordinary per-page PTEs for rmap/rss/tlb
+ * batching.
+ */
+ napotpte_try_unfold_range(mm, addr, ptep, nr);
+ __napot_clear_full_ptes(mm, addr, ptep, nr);
+}
+EXPORT_SYMBOL(napotpte_clear_full_ptes);
+
+pte_t napotpte_get_and_clear_full_ptes(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ unsigned int nr, int full)
+{
+ (void)full;
+
+ if (!napot_hw_supported() || !mm_is_user(mm))
+ return __napot_get_and_clear_full_ptes(mm, addr, ptep, nr);
+
+ napotpte_try_unfold_range(mm, addr, ptep, nr);
+
+ return __napot_get_and_clear_full_ptes(mm, addr, ptep, nr);
+}
+EXPORT_SYMBOL(napotpte_get_and_clear_full_ptes);
+
void napotpte_clear_young_dirty_ptes(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
unsigned int nr, cydp_t flags)
--
2.39.5
More information about the linux-riscv
mailing list