[PATCH v3 0/6] support batched checking of the young flag for MGLRU

Andrew Morton akpm at linux-foundation.org
Fri Mar 6 15:20:42 PST 2026


On Fri,  6 Mar 2026 14:43:36 +0800 Baolin Wang <baolin.wang at linux.alibaba.com> wrote:

> This is a follow-up to the previous work [1], to support batched checking
> of the young flag for MGLRU.
> 
> Similarly, batched checking of young flag for large folios can improve
> performance during large-folio reclamation when MGLRU is enabled. I
> observed noticeable performance improvements (see patch 5) on an Arm64
> machine that supports contiguous PTEs. All mm-selftests are passed.

Thanks, I updated mm-new with this.

> Changes from v2:
> v2: https://lore.kernel.org/all/cover.1772185080.git.baolin.wang@linux.alibaba.com/
>  - Update the commit message of patch 5 (per David).
>  - Fix some coding style issues (per David).
>  - Remove 'cur_pte' variable in lru_gen_look_around() (per David).
>  - Move 'ptes += nr;' to the suitable place in folio_referenced_one() (per David).
>  - Add acked tag from David. Thanks.
> 

Here's how v3 altered mm.git:


 include/linux/pgtable.h |    3 +--
 mm/rmap.c               |    2 +-
 mm/vmscan.c             |   11 +++++------
 3 files changed, 7 insertions(+), 9 deletions(-)

--- a/include/linux/pgtable.h~b
+++ a/include/linux/pgtable.h
@@ -1124,8 +1124,7 @@ static inline int clear_flush_young_ptes
  * Returns: whether any PTE was young.
  */
 static inline int test_and_clear_young_ptes(struct vm_area_struct *vma,
-					    unsigned long addr, pte_t *ptep,
-					    unsigned int nr)
+		unsigned long addr, pte_t *ptep, unsigned int nr)
 {
 	int young = 0;
 
--- a/mm/rmap.c~b
+++ a/mm/rmap.c
@@ -964,7 +964,6 @@ static bool folio_referenced_one(struct
 			pte_t pteval = ptep_get(pvmw.pte);
 
 			nr = folio_pte_batch(folio, pvmw.pte, pteval, max_nr);
-			ptes += nr;
 		}
 
 		if (lru_gen_enabled() && pvmw.pte) {
@@ -982,6 +981,7 @@ static bool folio_referenced_one(struct
 			WARN_ON_ONCE(1);
 		}
 
+		ptes += nr;
 		pra->mapcount -= nr;
 		/*
 		 * If we are sure that we batched the entire folio,
--- a/mm/vmscan.c~b
+++ a/mm/vmscan.c
@@ -4201,7 +4201,6 @@ bool lru_gen_look_around(struct page_vma
 	struct lruvec *lruvec;
 	struct lru_gen_mm_state *mm_state;
 	unsigned long max_seq;
-	pte_t *cur_pte;
 	int gen;
 
 	lockdep_assert_held(pvmw->ptl);
@@ -4247,10 +4246,10 @@ bool lru_gen_look_around(struct page_vma
 
 	pte -= (addr - start) / PAGE_SIZE;
 
-	for (i = 0, addr = start, cur_pte = pte; addr != end;
-	     i += nr, cur_pte += nr, addr += nr * PAGE_SIZE) {
+	for (i = 0, addr = start; addr != end;
+	     i += nr, pte += nr, addr += nr * PAGE_SIZE) {
 		unsigned long pfn;
-		pte_t ptent = ptep_get(cur_pte);
+		pte_t ptent = ptep_get(pte);
 
 		nr = 1;
 		pfn = get_pte_pfn(ptent, vma, addr, pgdat);
@@ -4264,11 +4263,11 @@ bool lru_gen_look_around(struct page_vma
 		if (folio_test_large(folio)) {
 			const unsigned int max_nr = (end - addr) >> PAGE_SHIFT;
 
-			nr = folio_pte_batch_flags(folio, NULL, cur_pte, &ptent,
+			nr = folio_pte_batch_flags(folio, NULL, pte, &ptent,
 						   max_nr, FPB_MERGE_YOUNG_DIRTY);
 		}
 
-		if (!test_and_clear_young_ptes_notify(vma, addr, cur_pte, nr))
+		if (!test_and_clear_young_ptes_notify(vma, addr, pte, nr))
 			continue;
 
 		if (last != folio) {
_




More information about the linux-arm-kernel mailing list