[v8 PATCH] arm64: mm: show direct mapping use in /proc/meminfo
Yang Shi
yang at os.amperecomputing.com
Tue Jun 9 17:06:31 PDT 2026
On 6/9/26 2:42 PM, Yang Shi wrote:
> Since commit a166563e7ec3 ("arm64: mm: support large block mapping when
> rodata=full"), the direct mapping may be split on some machines instead
> keeping static since boot. It makes more sense to show the direct mapping
> use in /proc/meminfo than before.
> This patch will make /proc/meminfo show the direct mapping use like the
> below (4K base page size):
> DirectMap4K: 94792 kB
> DirectMap64K: 134208 kB
> DirectMap2M: 1173504 kB
> DirectMap32M: 5636096 kB
> DirectMap1G: 529530880 kB
>
> Although just the machines which support BBML2_NOABORT can split the
> direct mapping, show it on all machines regardless of BBML2_NOABORT so
> that the users have consistent view in order to avoid confusion.
>
> Although ptdump also can tell the direct map use, but it needs to dump
> the whole kernel page table. It is costly and overkilling. It is also
> in debugfs which may not be enabled by all distros. So showing direct
> map use in /proc/meminfo seems more convenient and has less overhead.
>
> Signed-off-by: Yang Shi <yang at os.amperecomputing.com>
> ---
> arch/arm64/mm/mmu.c | 200 +++++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 179 insertions(+), 21 deletions(-)
>
> v8: * Fixed the double accounting per Sashiko
> * Responded the review comments from Sashiko
> v7: * Rebased to v7.1-rc4
> * Changed "dm" to "lm" to follow ARM convention per Will
> * Used __is_lm_alias() instead of reinventing a new helper per Will
> v6: * Rebased to v7.0-rc3
> * Rebased on top of Anshuman's v5 "arm64/mm: Enable batched TLB flush
> in unmap_hotplug_range()"
> * Used const for direct map type array per Will
> * Defined PUD size for 16K/64K even though it is not used per Will
> * Removed the misleading comment in init_pmd() per Will
> v5: * Rebased to v6.19-rc4
> * Fixed the build error for !CONFIG_PROC_FS
> v4: * Used PAGE_END instead of _PAGE_END(VA_BITS_MIN) per Ryan
> * Used shorter name for the helpers and variables per Ryan
> * Fixed accounting for memory hotunplug
> v3: * Fixed the over-accounting problems per Ryan
> * Introduced helpers for add/sub direct map use and #ifdef them with
> CONFIG_PROC_FS per Ryan
> * v3 is a fix patch on top of v2
> v2: * Counted in size instead of the number of entries per Ryan
> * Removed shift array per Ryan
> * Use lower case "k" per Ryan
> * Fixed a couple of build warnings reported by kernel test robot
> * Fixed a couple of poential miscounts
Aha, Sashiko is so fast. 2 comments this time.
#1
> Will these updates suffer from data races?
> The lm_meminfo array tracks direct mapping statistics and is updated using
> non-atomic += and -= operations. These updates are invoked from multiple
> independent code paths that do not share a common lock.
> For example, runtime page permission changes call
> split_kernel_leaf_mapping_locked() which executes under
> pgtable_split_lock,
> while memory hotplug operations like arch_remove_memory() execute under
> mem_hotplug_lock. Because these paths can run concurrently on
> different CPUs,
> the non-atomic arithmetic could result in data races and lost updates.
Yes, it may race with memory hotplug. I missed memory hotplug for v7
Sashiko. Two options to solve it:
1. Use atomic variables. Make lm_meminfo[NR_LM_TYPE] atomic_long_t, then
manipulate it with atomic ops.
2. Protect it with a spin lock.
The contention for the cache line or the spin lock should be rare since
memory hotplug should happen rarely. Any preference?
#2 is repeated one from v7. I don't see any difference, so the response
for v7 is still valid.
Thanks,
Yang
>
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index dd85e093ffdb..783a473c71ed 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -29,6 +29,7 @@
> #include <linux/mm_inline.h>
> #include <linux/pagewalk.h>
> #include <linux/stop_machine.h>
> +#include <linux/proc_fs.h>
>
> #include <asm/barrier.h>
> #include <asm/cputype.h>
> @@ -164,6 +165,82 @@ static void init_clear_pgtable(void *table)
> dsb(ishst);
> }
>
> +enum lm_type {
> + PTE,
> + CONT_PTE,
> + PMD,
> + CONT_PMD,
> + PUD,
> + NR_LM_TYPE,
> +};
> +
> +#ifdef CONFIG_PROC_FS
> +static unsigned long lm_meminfo[NR_LM_TYPE];
> +
> +void arch_report_meminfo(struct seq_file *m)
> +{
> + const char *size[NR_LM_TYPE];
> +
> +#if defined(CONFIG_ARM64_4K_PAGES)
> + size[PTE] = "4k";
> + size[CONT_PTE] = "64k";
> + size[PMD] = "2M";
> + size[CONT_PMD] = "32M";
> + size[PUD] = "1G";
> +#elif defined(CONFIG_ARM64_16K_PAGES)
> + size[PTE] = "16k";
> + size[CONT_PTE] = "2M";
> + size[PMD] = "32M";
> + size[CONT_PMD] = "1G";
> + size[PUD] = "64G";
> +#elif defined(CONFIG_ARM64_64K_PAGES)
> + size[PTE] = "64k";
> + size[CONT_PTE] = "2M";
> + size[PMD] = "512M";
> + size[CONT_PMD] = "16G";
> + size[PUD] = "4T";
> +#endif
> +
> + seq_printf(m, "DirectMap%s: %8lu kB\n",
> + size[PTE], lm_meminfo[PTE] >> 10);
> + seq_printf(m, "DirectMap%s: %8lu kB\n",
> + size[CONT_PTE],
> + lm_meminfo[CONT_PTE] >> 10);
> + seq_printf(m, "DirectMap%s: %8lu kB\n",
> + size[PMD], lm_meminfo[PMD] >> 10);
> + seq_printf(m, "DirectMap%s: %8lu kB\n",
> + size[CONT_PMD],
> + lm_meminfo[CONT_PMD] >> 10);
> + if (pud_sect_supported())
> + seq_printf(m, "DirectMap%s: %8lu kB\n",
> + size[PUD], lm_meminfo[PUD] >> 10);
> +}
> +
> +static inline void lm_meminfo_add(unsigned long addr, unsigned long size,
> + enum lm_type type)
> +{
> + if (__is_lm_address(addr))
> + lm_meminfo[type] += size;
> +}
> +
> +static inline void lm_meminfo_sub(unsigned long addr, unsigned long size,
> + enum lm_type type)
> +{
> + if (__is_lm_address(addr))
> + lm_meminfo[type] -= size;
> +}
> +#else
> +static inline void lm_meminfo_add(unsigned long addr, unsigned long size,
> + enum lm_type type)
> +{
> +}
> +
> +static inline void lm_meminfo_sub(unsigned long addr, unsigned long size,
> + enum lm_type type)
> +{
> +}
> +#endif
> +
> static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end,
> phys_addr_t phys, pgprot_t prot)
> {
> @@ -219,6 +296,7 @@ static int alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
>
> do {
> pgprot_t __prot = prot;
> + bool count_lm = pte_none(__ptep_get(ptep));
>
> next = pte_cont_addr_end(addr, end);
>
> @@ -229,6 +307,13 @@ static int alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
>
> init_pte(ptep, addr, next, phys, __prot);
>
> + if (count_lm) {
> + if (pgprot_val(__prot) & PTE_CONT)
> + lm_meminfo_add(addr, (next - addr), CONT_PTE);
> + else
> + lm_meminfo_add(addr, (next - addr), PTE);
> + }
> +
> ptep += pte_index(next) - pte_index(addr);
> phys += next - addr;
> } while (addr = next, addr != end);
> @@ -251,6 +336,7 @@ static int init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,
>
> do {
> pmd_t old_pmd = READ_ONCE(*pmdp);
> + bool count_lm = pmd_none(old_pmd);
>
> next = pmd_addr_end(addr, end);
>
> @@ -259,6 +345,12 @@ static int init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,
> (flags & NO_BLOCK_MAPPINGS) == 0) {
> pmd_set_huge(pmdp, phys, prot);
>
> + if (count_lm) {
> + if (pgprot_val(prot) & PTE_CONT)
> + lm_meminfo_add(addr, (next - addr), CONT_PMD);
> + else
> + lm_meminfo_add(addr, (next - addr), PMD);
> + }
> /*
> * After the PMD entry has been populated once, we
> * only allow updates to the permission attributes.
> @@ -371,6 +463,7 @@ static int alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
>
> do {
> pud_t old_pud = READ_ONCE(*pudp);
> + bool count_lm = pud_none(old_pud);
>
> next = pud_addr_end(addr, end);
>
> @@ -382,6 +475,8 @@ static int alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
> (flags & NO_BLOCK_MAPPINGS) == 0) {
> pud_set_huge(pudp, phys, prot);
>
> + if (count_lm)
> + lm_meminfo_add(addr, (next - addr), PUD);
> /*
> * After the PUD entry has been populated once, we
> * only allow updates to the permission attributes.
> @@ -571,16 +666,21 @@ pgd_pgtable_alloc_special_mm(enum pgtable_level pgtable_level)
> return __pgd_pgtable_alloc(NULL, GFP_PGTABLE_KERNEL, pgtable_level);
> }
>
> -static void split_contpte(pte_t *ptep)
> +static void split_contpte(unsigned long addr, pte_t *ptep)
> {
> int i;
>
> + lm_meminfo_sub(addr, CONT_PTE_SIZE, CONT_PTE);
> +
> ptep = PTR_ALIGN_DOWN(ptep, sizeof(*ptep) * CONT_PTES);
> for (i = 0; i < CONT_PTES; i++, ptep++)
> __set_pte(ptep, pte_mknoncont(__ptep_get(ptep)));
> +
> + lm_meminfo_add(addr, CONT_PTE_SIZE, PTE);
> }
>
> -static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont)
> +static int split_pmd(unsigned long addr, pmd_t *pmdp, pmd_t pmd, gfp_t gfp,
> + bool to_cont)
> {
> pmdval_t tableprot = PMD_TYPE_TABLE | PMD_TABLE_UXN | PMD_TABLE_AF;
> unsigned long pfn = pmd_pfn(pmd);
> @@ -604,8 +704,13 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont)
> if (to_cont)
> prot = __pgprot(pgprot_val(prot) | PTE_CONT);
>
> + lm_meminfo_sub(addr, PMD_SIZE, PMD);
> for (i = 0; i < PTRS_PER_PTE; i++, ptep++, pfn++)
> __set_pte(ptep, pfn_pte(pfn, prot));
> + if (to_cont)
> + lm_meminfo_add(addr, PMD_SIZE, CONT_PTE);
> + else
> + lm_meminfo_add(addr, PMD_SIZE, PTE);
>
> /*
> * Ensure the pte entries are visible to the table walker by the time
> @@ -617,16 +722,21 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont)
> return 0;
> }
>
> -static void split_contpmd(pmd_t *pmdp)
> +static void split_contpmd(unsigned long addr, pmd_t *pmdp)
> {
> int i;
>
> + lm_meminfo_sub(addr, CONT_PMD_SIZE, CONT_PMD);
> +
> pmdp = PTR_ALIGN_DOWN(pmdp, sizeof(*pmdp) * CONT_PMDS);
> for (i = 0; i < CONT_PMDS; i++, pmdp++)
> set_pmd(pmdp, pmd_mknoncont(pmdp_get(pmdp)));
> +
> + lm_meminfo_add(addr, CONT_PMD_SIZE, PMD);
> }
>
> -static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont)
> +static int split_pud(unsigned long addr, pud_t *pudp, pud_t pud, gfp_t gfp,
> + bool to_cont)
> {
> pudval_t tableprot = PUD_TYPE_TABLE | PUD_TABLE_UXN | PUD_TABLE_AF;
> unsigned int step = PMD_SIZE >> PAGE_SHIFT;
> @@ -651,8 +761,13 @@ static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont)
> if (to_cont)
> prot = __pgprot(pgprot_val(prot) | PTE_CONT);
>
> + lm_meminfo_sub(addr, PUD_SIZE, PUD);
> for (i = 0; i < PTRS_PER_PMD; i++, pmdp++, pfn += step)
> set_pmd(pmdp, pfn_pmd(pfn, prot));
> + if (to_cont)
> + lm_meminfo_add(addr, PUD_SIZE, CONT_PMD);
> + else
> + lm_meminfo_add(addr, PUD_SIZE, PMD);
>
> /*
> * Ensure the pmd entries are visible to the table walker by the time
> @@ -707,7 +822,7 @@ static int split_kernel_leaf_mapping_locked(unsigned long addr)
> if (!pud_present(pud))
> goto out;
> if (pud_leaf(pud)) {
> - ret = split_pud(pudp, pud, GFP_PGTABLE_KERNEL, true);
> + ret = split_pud(addr, pudp, pud, GFP_PGTABLE_KERNEL, true);
> if (ret)
> goto out;
> }
> @@ -725,14 +840,14 @@ static int split_kernel_leaf_mapping_locked(unsigned long addr)
> goto out;
> if (pmd_leaf(pmd)) {
> if (pmd_cont(pmd))
> - split_contpmd(pmdp);
> + split_contpmd(addr, pmdp);
> /*
> * PMD: If addr is PMD aligned then addr already describes a
> * leaf boundary. Otherwise, split to contpte.
> */
> if (ALIGN_DOWN(addr, PMD_SIZE) == addr)
> goto out;
> - ret = split_pmd(pmdp, pmd, GFP_PGTABLE_KERNEL, true);
> + ret = split_pmd(addr, pmdp, pmd, GFP_PGTABLE_KERNEL, true);
> if (ret)
> goto out;
> }
> @@ -749,7 +864,7 @@ static int split_kernel_leaf_mapping_locked(unsigned long addr)
> if (!pte_present(pte))
> goto out;
> if (pte_cont(pte))
> - split_contpte(ptep);
> + split_contpte(addr, ptep);
>
> out:
> return ret;
> @@ -856,7 +971,7 @@ static int split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr,
> int ret = 0;
>
> if (pud_leaf(pud))
> - ret = split_pud(pudp, pud, gfp, false);
> + ret = split_pud(addr, pudp, pud, gfp, false);
>
> return ret;
> }
> @@ -870,8 +985,8 @@ static int split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr,
>
> if (pmd_leaf(pmd)) {
> if (pmd_cont(pmd))
> - split_contpmd(pmdp);
> - ret = split_pmd(pmdp, pmd, gfp, false);
> + split_contpmd(addr, pmdp);
> + ret = split_pmd(addr, pmdp, pmd, gfp, false);
>
> /*
> * We have split the pmd directly to ptes so there is no need to
> @@ -889,7 +1004,7 @@ static int split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr,
> pte_t pte = __ptep_get(ptep);
>
> if (pte_cont(pte))
> - split_contpte(ptep);
> + split_contpte(addr, ptep);
>
> return 0;
> }
> @@ -1463,20 +1578,20 @@ static bool pgtable_range_aligned(unsigned long start, unsigned long end,
> return true;
> }
>
> -static void unmap_hotplug_pte_range(pmd_t *pmdp, unsigned long addr,
> +static void unmap_hotplug_pte_range(pte_t *ptep, unsigned long addr,
> unsigned long end, bool free_mapped,
> struct vmem_altmap *altmap)
> {
> - pte_t *ptep, pte;
> + pte_t pte;
>
> do {
> - ptep = pte_offset_kernel(pmdp, addr);
> pte = __ptep_get(ptep);
> if (pte_none(pte))
> continue;
>
> WARN_ON(!pte_present(pte));
> __pte_clear(&init_mm, addr, ptep);
> + lm_meminfo_sub(addr, PAGE_SIZE, PTE);
> if (free_mapped) {
> /* CONT blocks are not supported in the vmemmap */
> WARN_ON(pte_cont(pte));
> @@ -1485,19 +1600,39 @@ static void unmap_hotplug_pte_range(pmd_t *pmdp, unsigned long addr,
> PAGE_SIZE, altmap);
> }
> /* unmap_hotplug_range() flushes TLB for !free_mapped */
> - } while (addr += PAGE_SIZE, addr < end);
> + } while (ptep++, addr += PAGE_SIZE, addr < end);
> +}
> +
> +static void unmap_hotplug_cont_pte_range(pmd_t *pmdp, unsigned long addr,
> + unsigned long end, bool free_mapped,
> + struct vmem_altmap *altmap)
> +{
> + unsigned long next;
> + pte_t *ptep, pte;
> +
> + do {
> + next = pte_cont_addr_end(addr, end);
> + ptep = pte_offset_kernel(pmdp, addr);
> + pte = __ptep_get(ptep);
> +
> + if (pte_present(pte) && pte_cont(pte)) {
> + lm_meminfo_sub(addr, CONT_PTE_SIZE, CONT_PTE);
> + lm_meminfo_add(addr, CONT_PTE_SIZE, PTE);
> + }
> +
> + unmap_hotplug_pte_range(ptep, addr, next, free_mapped, altmap);
> + } while (addr = next, addr < end);
> }
>
> -static void unmap_hotplug_pmd_range(pud_t *pudp, unsigned long addr,
> +static void unmap_hotplug_pmd_range(pmd_t *pmdp, unsigned long addr,
> unsigned long end, bool free_mapped,
> struct vmem_altmap *altmap)
> {
> unsigned long next;
> - pmd_t *pmdp, pmd;
> + pmd_t pmd;
>
> do {
> next = pmd_addr_end(addr, end);
> - pmdp = pmd_offset(pudp, addr);
> pmd = READ_ONCE(*pmdp);
> if (pmd_none(pmd))
> continue;
> @@ -1505,6 +1640,7 @@ static void unmap_hotplug_pmd_range(pud_t *pudp, unsigned long addr,
> WARN_ON(!pmd_present(pmd));
> if (pmd_leaf(pmd)) {
> pmd_clear(pmdp);
> + lm_meminfo_sub(addr, PMD_SIZE, PMD);
> if (free_mapped) {
> /* CONT blocks are not supported in the vmemmap */
> WARN_ON(pmd_cont(pmd));
> @@ -1516,7 +1652,28 @@ static void unmap_hotplug_pmd_range(pud_t *pudp, unsigned long addr,
> continue;
> }
> WARN_ON(!pmd_table(pmd));
> - unmap_hotplug_pte_range(pmdp, addr, next, free_mapped, altmap);
> + unmap_hotplug_cont_pte_range(pmdp, addr, next, free_mapped, altmap);
> + } while (pmdp++, addr = next, addr < end);
> +}
> +
> +static void unmap_hotplug_cont_pmd_range(pud_t *pudp, unsigned long addr,
> + unsigned long end, bool free_mapped,
> + struct vmem_altmap *altmap)
> +{
> + unsigned long next;
> + pmd_t *pmdp, pmd;
> +
> + do {
> + next = pmd_cont_addr_end(addr, end);
> + pmdp = pmd_offset(pudp, addr);
> + pmd = READ_ONCE(*pmdp);
> +
> + if (pmd_leaf(pmd) && pmd_cont(pmd)) {
> + lm_meminfo_sub(addr, CONT_PMD_SIZE, CONT_PMD);
> + lm_meminfo_add(addr, CONT_PMD_SIZE, PMD);
> + }
> +
> + unmap_hotplug_pmd_range(pmdp, addr, next, free_mapped, altmap);
> } while (addr = next, addr < end);
> }
>
> @@ -1537,6 +1694,7 @@ static void unmap_hotplug_pud_range(p4d_t *p4dp, unsigned long addr,
> WARN_ON(!pud_present(pud));
> if (pud_leaf(pud)) {
> pud_clear(pudp);
> + lm_meminfo_sub(addr, PUD_SIZE, PUD);
> if (free_mapped) {
> flush_tlb_kernel_range(addr, addr + PUD_SIZE);
> free_hotplug_page_range(pud_page(pud),
> @@ -1546,7 +1704,7 @@ static void unmap_hotplug_pud_range(p4d_t *p4dp, unsigned long addr,
> continue;
> }
> WARN_ON(!pud_table(pud));
> - unmap_hotplug_pmd_range(pudp, addr, next, free_mapped, altmap);
> + unmap_hotplug_cont_pmd_range(pudp, addr, next, free_mapped, altmap);
> } while (addr = next, addr < end);
> }
>
More information about the linux-arm-kernel
mailing list