[PATCH v2 4/7] mm/vmalloc: Extend page table walk to support larger page_shift sizes and eliminate page table rewalk
Wen Jiang
jiangwenxiaomi at gmail.com
Thu May 14 02:41:05 PDT 2026
From: "Barry Song (Xiaomi)" <baohua at kernel.org>
vmap_pages_range_noflush_walk() (formerly vmap_small_pages_range_noflush())
provides a clean interface by taking struct page **pages and mapping them
via direct PTE iteration. This avoids the page table rewalk seen when
using vmap_range_noflush() for page_shift values other than PAGE_SHIFT.
Extend it to support larger page_shift values, and add PMD- and
contiguous-PTE mappings as well. Rename it to vmap_pages_range_noflush_walk()
since it now handles more than just small pages.
For vmalloc() allocations with VM_ALLOW_HUGE_VMAP, we no longer need to
iterate over pages one by one via vmap_range_noflush(), which would
otherwise lead to page table rewalk. The code is now unified with the
PAGE_SHIFT case by simply calling vmap_pages_range_noflush_walk().
Signed-off-by: Barry Song (Xiaomi) <baohua at kernel.org>
Signed-off-by: Wen Jiang <jiangwen6 at xiaomi.com>
Tested-by: Xueyuan Chen <xueyuan.chen21 at gmail.com>
---
mm/vmalloc.c | 64 +++++++++++++++++++++++++++-------------------------
1 file changed, 33 insertions(+), 31 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 9bfd0aa34..516d40650 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -543,8 +543,10 @@ void vunmap_range(unsigned long addr, unsigned long end)
static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr,
- pgtbl_mod_mask *mask)
+ pgtbl_mod_mask *mask, unsigned int shift)
{
+ unsigned long pfn, size;
+ unsigned int steps;
int err = 0;
pte_t *pte;
@@ -575,9 +577,10 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
break;
}
- set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
- (*nr)++;
- } while (pte++, addr += PAGE_SIZE, addr != end);
+ pfn = page_to_pfn(page);
+ size = vmap_set_ptes(pte, addr, end, pfn, prot, shift);
+ steps = PFN_DOWN(size);
+ } while (pte += steps, *nr += steps, addr += size, addr != end);
lazy_mmu_mode_disable();
*mask |= PGTBL_PTE_MODIFIED;
@@ -587,7 +590,7 @@ static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
static int vmap_pages_pmd_range(pud_t *pud, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr,
- pgtbl_mod_mask *mask)
+ pgtbl_mod_mask *mask, unsigned int shift)
{
pmd_t *pmd;
unsigned long next;
@@ -597,7 +600,20 @@ static int vmap_pages_pmd_range(pud_t *pud, unsigned long addr,
return -ENOMEM;
do {
next = pmd_addr_end(addr, end);
- if (vmap_pages_pte_range(pmd, addr, next, prot, pages, nr, mask))
+
+ if (shift == PMD_SHIFT) {
+ struct page *page = pages[*nr];
+ phys_addr_t phys_addr = page_to_phys(page);
+
+ if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot,
+ shift)) {
+ *mask |= PGTBL_PMD_MODIFIED;
+ *nr += 1 << (shift - PAGE_SHIFT);
+ continue;
+ }
+ }
+
+ if (vmap_pages_pte_range(pmd, addr, next, prot, pages, nr, mask, shift))
return -ENOMEM;
} while (pmd++, addr = next, addr != end);
return 0;
@@ -605,7 +621,7 @@ static int vmap_pages_pmd_range(pud_t *pud, unsigned long addr,
static int vmap_pages_pud_range(p4d_t *p4d, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr,
- pgtbl_mod_mask *mask)
+ pgtbl_mod_mask *mask, unsigned int shift)
{
pud_t *pud;
unsigned long next;
@@ -615,7 +631,7 @@ static int vmap_pages_pud_range(p4d_t *p4d, unsigned long addr,
return -ENOMEM;
do {
next = pud_addr_end(addr, end);
- if (vmap_pages_pmd_range(pud, addr, next, prot, pages, nr, mask))
+ if (vmap_pages_pmd_range(pud, addr, next, prot, pages, nr, mask, shift))
return -ENOMEM;
} while (pud++, addr = next, addr != end);
return 0;
@@ -623,7 +639,7 @@ static int vmap_pages_pud_range(p4d_t *p4d, unsigned long addr,
static int vmap_pages_p4d_range(pgd_t *pgd, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr,
- pgtbl_mod_mask *mask)
+ pgtbl_mod_mask *mask, unsigned int shift)
{
p4d_t *p4d;
unsigned long next;
@@ -633,14 +649,14 @@ static int vmap_pages_p4d_range(pgd_t *pgd, unsigned long addr,
return -ENOMEM;
do {
next = p4d_addr_end(addr, end);
- if (vmap_pages_pud_range(p4d, addr, next, prot, pages, nr, mask))
+ if (vmap_pages_pud_range(p4d, addr, next, prot, pages, nr, mask, shift))
return -ENOMEM;
} while (p4d++, addr = next, addr != end);
return 0;
}
-static int vmap_small_pages_range_noflush(unsigned long addr, unsigned long end,
- pgprot_t prot, struct page **pages)
+static int vmap_pages_range_noflush_walk(unsigned long addr, unsigned long end,
+ pgprot_t prot, struct page **pages, unsigned int shift)
{
unsigned long start = addr;
pgd_t *pgd;
@@ -655,7 +671,7 @@ static int vmap_small_pages_range_noflush(unsigned long addr, unsigned long end,
next = pgd_addr_end(addr, end);
if (pgd_bad(*pgd))
mask |= PGTBL_PGD_MODIFIED;
- err = vmap_pages_p4d_range(pgd, addr, next, prot, pages, &nr, &mask);
+ err = vmap_pages_p4d_range(pgd, addr, next, prot, pages, &nr, &mask, shift);
if (err)
break;
} while (pgd++, addr = next, addr != end);
@@ -678,27 +694,13 @@ static int vmap_small_pages_range_noflush(unsigned long addr, unsigned long end,
int __vmap_pages_range_noflush(unsigned long addr, unsigned long end,
pgprot_t prot, struct page **pages, unsigned int page_shift)
{
- unsigned int i, nr = (end - addr) >> PAGE_SHIFT;
-
WARN_ON(page_shift < PAGE_SHIFT);
- if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC) ||
- page_shift == PAGE_SHIFT)
- return vmap_small_pages_range_noflush(addr, end, prot, pages);
-
- for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) {
- int err;
-
- err = vmap_range_noflush(addr, addr + (1UL << page_shift),
- page_to_phys(pages[i]), prot,
- page_shift);
- if (err)
- return err;
+ if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC))
+ page_shift = PAGE_SHIFT;
- addr += 1UL << page_shift;
- }
-
- return 0;
+ return vmap_pages_range_noflush_walk(addr, end, prot, pages,
+ min(page_shift, PMD_SHIFT));
}
int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
--
2.34.1
More information about the linux-arm-kernel
mailing list