[PATCH RFC v3 4/4] mm: add PMD-level huge page support for remap_pfn_range()
Yin Tirui
yintirui at huawei.com
Fri Feb 27 23:09:06 PST 2026
Add PMD-level huge page support to remap_pfn_range(), automatically
creating huge mappings when prerequisites are satisfied (size, alignment,
architecture support, etc.) and falling back to normal page mappings
otherwise.
Implement special huge PMD splitting by utilizing the pgtable deposit/
withdraw mechanism. When splitting is needed, the deposited pgtable is
withdrawn and populated with individual PTEs created from the original
huge mapping.
Signed-off-by: Yin Tirui <yintirui at huawei.com>
---
mm/huge_memory.c | 36 ++++++++++++++++++++++++++++++++++--
mm/memory.c | 40 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 74 insertions(+), 2 deletions(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d4ca8cfd7f9d..e463d51005ee 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1857,6 +1857,9 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd = pmdp_get_lockless(src_pmd);
if (unlikely(pmd_present(pmd) && pmd_special(pmd) &&
!is_huge_zero_pmd(pmd))) {
+ pgtable = pte_alloc_one(dst_mm);
+ if (unlikely(!pgtable))
+ goto out;
dst_ptl = pmd_lock(dst_mm, dst_pmd);
src_ptl = pmd_lockptr(src_mm, src_pmd);
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
@@ -1870,6 +1873,12 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
* able to wrongly write to the backend MMIO.
*/
VM_WARN_ON_ONCE(is_cow_mapping(src_vma->vm_flags) && pmd_write(pmd));
+
+ /* dax won't reach here, it will be intercepted at vma_needs_copy() */
+ VM_WARN_ON_ONCE(vma_is_dax(src_vma));
+
+ mm_inc_nr_ptes(dst_mm);
+ pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
goto set_pmd;
}
@@ -2360,6 +2369,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
arch_check_zapped_pmd(vma, orig_pmd);
tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
if (!vma_is_dax(vma) && vma_is_special_huge(vma)) {
+ if (pmd_special(orig_pmd))
+ zap_deposited_table(tlb->mm, pmd);
if (arch_needs_pgtable_deposit())
zap_deposited_table(tlb->mm, pmd);
spin_unlock(ptl);
@@ -3005,14 +3016,35 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
if (!vma_is_anonymous(vma)) {
old_pmd = pmdp_huge_clear_flush(vma, haddr, pmd);
+
+ if (!vma_is_dax(vma) && vma_is_special_huge(vma)) {
+ pte_t entry;
+
+ if (!pmd_special(old_pmd)) {
+ zap_deposited_table(mm, pmd);
+ return;
+ }
+ pgtable = pgtable_trans_huge_withdraw(mm, pmd);
+ if (unlikely(!pgtable))
+ return;
+ pmd_populate(mm, &_pmd, pgtable);
+ pte = pte_offset_map(&_pmd, haddr);
+ entry = pfn_pte(pmd_pfn(old_pmd), pmd_pgprot(old_pmd));
+ set_ptes(mm, haddr, pte, entry, HPAGE_PMD_NR);
+ pte_unmap(pte);
+
+ smp_wmb(); /* make pte visible before pmd */
+ pmd_populate(mm, pmd, pgtable);
+ return;
+ }
+
/*
* We are going to unmap this huge page. So
* just go ahead and zap it
*/
if (arch_needs_pgtable_deposit())
zap_deposited_table(mm, pmd);
- if (!vma_is_dax(vma) && vma_is_special_huge(vma))
- return;
+
if (unlikely(pmd_is_migration_entry(old_pmd))) {
const softleaf_t old_entry = softleaf_from_pmd(old_pmd);
diff --git a/mm/memory.c b/mm/memory.c
index 07778814b4a8..affccf38cbcf 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2890,6 +2890,40 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
return err;
}
+#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
+static int remap_try_huge_pmd(struct mm_struct *mm, pmd_t *pmd,
+ unsigned long addr, unsigned long end,
+ unsigned long pfn, pgprot_t prot)
+{
+ pgtable_t pgtable;
+ spinlock_t *ptl;
+
+ if ((end - addr) != PMD_SIZE)
+ return 0;
+
+ if (!IS_ALIGNED(addr, PMD_SIZE))
+ return 0;
+
+ if (!IS_ALIGNED(pfn, HPAGE_PMD_NR))
+ return 0;
+
+ if (pmd_present(*pmd) && !pmd_free_pte_page(pmd, addr))
+ return 0;
+
+ pgtable = pte_alloc_one(mm);
+ if (unlikely(!pgtable))
+ return 0;
+
+ mm_inc_nr_ptes(mm);
+ ptl = pmd_lock(mm, pmd);
+ set_pmd_at(mm, addr, pmd, pmd_mkspecial(pmd_mkhuge(pfn_pmd(pfn, prot))));
+ pgtable_trans_huge_deposit(mm, pmd, pgtable);
+ spin_unlock(ptl);
+
+ return 1;
+}
+#endif
+
static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
unsigned long addr, unsigned long end,
unsigned long pfn, pgprot_t prot)
@@ -2905,6 +2939,12 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
VM_BUG_ON(pmd_trans_huge(*pmd));
do {
next = pmd_addr_end(addr, end);
+#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
+ if (remap_try_huge_pmd(mm, pmd, addr, next,
+ pfn + (addr >> PAGE_SHIFT), prot)) {
+ continue;
+ }
+#endif
err = remap_pte_range(mm, pmd, addr, next,
pfn + (addr >> PAGE_SHIFT), prot);
if (err)
--
2.22.0
More information about the linux-arm-kernel
mailing list