[v8 PATCH] arm64: mm: show direct mapping use in /proc/meminfo

Yang Shi yang at os.amperecomputing.com
Tue Jun 9 17:06:31 PDT 2026



On 6/9/26 2:42 PM, Yang Shi wrote:
> Since commit a166563e7ec3 ("arm64: mm: support large block mapping when
> rodata=full"), the direct mapping may be split on some machines instead
> keeping static since boot. It makes more sense to show the direct mapping
> use in /proc/meminfo than before.
> This patch will make /proc/meminfo show the direct mapping use like the
> below (4K base page size):
> DirectMap4K:       94792 kB
> DirectMap64K:     134208 kB
> DirectMap2M:     1173504 kB
> DirectMap32M:    5636096 kB
> DirectMap1G:    529530880 kB
>
> Although just the machines which support BBML2_NOABORT can split the
> direct mapping, show it on all machines regardless of BBML2_NOABORT so
> that the users have consistent view in order to avoid confusion.
>
> Although ptdump also can tell the direct map use, but it needs to dump
> the whole kernel page table. It is costly and overkilling. It is also
> in debugfs which may not be enabled by all distros. So showing direct
> map use in /proc/meminfo seems more convenient and has less overhead.
>
> Signed-off-by: Yang Shi <yang at os.amperecomputing.com>
> ---
>   arch/arm64/mm/mmu.c | 200 +++++++++++++++++++++++++++++++++++++++-----
>   1 file changed, 179 insertions(+), 21 deletions(-)
>
> v8: * Fixed the double accounting per Sashiko
>      * Responded the review comments from Sashiko
> v7: * Rebased to v7.1-rc4
>      * Changed "dm" to "lm" to follow ARM convention per Will
>      * Used __is_lm_alias() instead of reinventing a new helper per Will
> v6: * Rebased to v7.0-rc3
>      * Rebased on top of Anshuman's v5 "arm64/mm: Enable batched TLB flush
>        in unmap_hotplug_range()"
>      * Used const for direct map type array per Will
>      * Defined PUD size for 16K/64K even though it is not used per Will
>      * Removed the misleading comment in init_pmd() per Will
> v5: * Rebased to v6.19-rc4
>      * Fixed the build error for !CONFIG_PROC_FS
> v4: * Used PAGE_END instead of _PAGE_END(VA_BITS_MIN) per Ryan
>      * Used shorter name for the helpers and variables per Ryan
>      * Fixed accounting for memory hotunplug
> v3: * Fixed the over-accounting problems per Ryan
>      * Introduced helpers for add/sub direct map use and #ifdef them with
>        CONFIG_PROC_FS per Ryan
>      * v3 is a fix patch on top of v2
> v2: * Counted in size instead of the number of entries per Ryan
>      * Removed shift array per Ryan
>      * Use lower case "k" per Ryan
>      * Fixed a couple of build warnings reported by kernel test robot
>      * Fixed a couple of poential miscounts

Aha, Sashiko is so fast. 2 comments this time.

#1
> Will these updates suffer from data races?
> The lm_meminfo array tracks direct mapping statistics and is updated using
> non-atomic += and -= operations. These updates are invoked from multiple
> independent code paths that do not share a common lock.
> For example, runtime page permission changes call
> split_kernel_leaf_mapping_locked() which executes under 
> pgtable_split_lock,
> while memory hotplug operations like arch_remove_memory() execute under
> mem_hotplug_lock. Because these paths can run concurrently on 
> different CPUs,
> the non-atomic arithmetic could result in data races and lost updates.

Yes, it may race with memory hotplug. I missed memory hotplug for v7 
Sashiko. Two options to solve it:

1. Use atomic variables. Make lm_meminfo[NR_LM_TYPE] atomic_long_t, then 
manipulate it with atomic ops.
2. Protect it with a spin lock.

The contention for the cache line or the spin lock should be rare since 
memory hotplug should happen rarely. Any preference?


#2 is repeated one from v7. I don't see any difference, so the response 
for v7 is still valid.

Thanks,
Yang

>
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index dd85e093ffdb..783a473c71ed 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -29,6 +29,7 @@
>   #include <linux/mm_inline.h>
>   #include <linux/pagewalk.h>
>   #include <linux/stop_machine.h>
> +#include <linux/proc_fs.h>
>   
>   #include <asm/barrier.h>
>   #include <asm/cputype.h>
> @@ -164,6 +165,82 @@ static void init_clear_pgtable(void *table)
>   	dsb(ishst);
>   }
>   
> +enum lm_type {
> +	PTE,
> +	CONT_PTE,
> +	PMD,
> +	CONT_PMD,
> +	PUD,
> +	NR_LM_TYPE,
> +};
> +
> +#ifdef CONFIG_PROC_FS
> +static unsigned long lm_meminfo[NR_LM_TYPE];
> +
> +void arch_report_meminfo(struct seq_file *m)
> +{
> +	const char *size[NR_LM_TYPE];
> +
> +#if defined(CONFIG_ARM64_4K_PAGES)
> +	size[PTE] = "4k";
> +	size[CONT_PTE] = "64k";
> +	size[PMD] = "2M";
> +	size[CONT_PMD] = "32M";
> +	size[PUD] = "1G";
> +#elif defined(CONFIG_ARM64_16K_PAGES)
> +	size[PTE] = "16k";
> +	size[CONT_PTE] = "2M";
> +	size[PMD] = "32M";
> +	size[CONT_PMD] = "1G";
> +	size[PUD] = "64G";
> +#elif defined(CONFIG_ARM64_64K_PAGES)
> +	size[PTE] = "64k";
> +	size[CONT_PTE] = "2M";
> +	size[PMD] = "512M";
> +	size[CONT_PMD] = "16G";
> +	size[PUD] = "4T";
> +#endif
> +
> +	seq_printf(m, "DirectMap%s:	%8lu kB\n",
> +			size[PTE], lm_meminfo[PTE] >> 10);
> +	seq_printf(m, "DirectMap%s:	%8lu kB\n",
> +			size[CONT_PTE],
> +			lm_meminfo[CONT_PTE] >> 10);
> +	seq_printf(m, "DirectMap%s:	%8lu kB\n",
> +			size[PMD], lm_meminfo[PMD] >> 10);
> +	seq_printf(m, "DirectMap%s:	%8lu kB\n",
> +			size[CONT_PMD],
> +			lm_meminfo[CONT_PMD] >> 10);
> +	if (pud_sect_supported())
> +		seq_printf(m, "DirectMap%s:	%8lu kB\n",
> +			size[PUD], lm_meminfo[PUD] >> 10);
> +}
> +
> +static inline void lm_meminfo_add(unsigned long addr, unsigned long size,
> +				  enum lm_type type)
> +{
> +	if (__is_lm_address(addr))
> +		lm_meminfo[type] += size;
> +}
> +
> +static inline void lm_meminfo_sub(unsigned long addr, unsigned long size,
> +				  enum lm_type type)
> +{
> +	if (__is_lm_address(addr))
> +		lm_meminfo[type] -= size;
> +}
> +#else
> +static inline void lm_meminfo_add(unsigned long addr, unsigned long size,
> +				  enum lm_type type)
> +{
> +}
> +
> +static inline void lm_meminfo_sub(unsigned long addr, unsigned long size,
> +				  enum lm_type type)
> +{
> +}
> +#endif
> +
>   static void init_pte(pte_t *ptep, unsigned long addr, unsigned long end,
>   		     phys_addr_t phys, pgprot_t prot)
>   {
> @@ -219,6 +296,7 @@ static int alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
>   
>   	do {
>   		pgprot_t __prot = prot;
> +		bool count_lm = pte_none(__ptep_get(ptep));
>   
>   		next = pte_cont_addr_end(addr, end);
>   
> @@ -229,6 +307,13 @@ static int alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
>   
>   		init_pte(ptep, addr, next, phys, __prot);
>   
> +		if (count_lm) {
> +			if (pgprot_val(__prot) & PTE_CONT)
> +				lm_meminfo_add(addr, (next - addr), CONT_PTE);
> +			else
> +				lm_meminfo_add(addr, (next - addr), PTE);
> +		}
> +
>   		ptep += pte_index(next) - pte_index(addr);
>   		phys += next - addr;
>   	} while (addr = next, addr != end);
> @@ -251,6 +336,7 @@ static int init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,
>   
>   	do {
>   		pmd_t old_pmd = READ_ONCE(*pmdp);
> +		bool count_lm = pmd_none(old_pmd);
>   
>   		next = pmd_addr_end(addr, end);
>   
> @@ -259,6 +345,12 @@ static int init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,
>   		    (flags & NO_BLOCK_MAPPINGS) == 0) {
>   			pmd_set_huge(pmdp, phys, prot);
>   
> +			if (count_lm) {
> +				if (pgprot_val(prot) & PTE_CONT)
> +					lm_meminfo_add(addr, (next - addr), CONT_PMD);
> +				else
> +					lm_meminfo_add(addr, (next - addr), PMD);
> +			}
>   			/*
>   			 * After the PMD entry has been populated once, we
>   			 * only allow updates to the permission attributes.
> @@ -371,6 +463,7 @@ static int alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
>   
>   	do {
>   		pud_t old_pud = READ_ONCE(*pudp);
> +		bool count_lm = pud_none(old_pud);
>   
>   		next = pud_addr_end(addr, end);
>   
> @@ -382,6 +475,8 @@ static int alloc_init_pud(p4d_t *p4dp, unsigned long addr, unsigned long end,
>   		    (flags & NO_BLOCK_MAPPINGS) == 0) {
>   			pud_set_huge(pudp, phys, prot);
>   
> +			if (count_lm)
> +				lm_meminfo_add(addr, (next - addr), PUD);
>   			/*
>   			 * After the PUD entry has been populated once, we
>   			 * only allow updates to the permission attributes.
> @@ -571,16 +666,21 @@ pgd_pgtable_alloc_special_mm(enum pgtable_level pgtable_level)
>   	return  __pgd_pgtable_alloc(NULL, GFP_PGTABLE_KERNEL, pgtable_level);
>   }
>   
> -static void split_contpte(pte_t *ptep)
> +static void split_contpte(unsigned long addr, pte_t *ptep)
>   {
>   	int i;
>   
> +	lm_meminfo_sub(addr, CONT_PTE_SIZE, CONT_PTE);
> +
>   	ptep = PTR_ALIGN_DOWN(ptep, sizeof(*ptep) * CONT_PTES);
>   	for (i = 0; i < CONT_PTES; i++, ptep++)
>   		__set_pte(ptep, pte_mknoncont(__ptep_get(ptep)));
> +
> +	lm_meminfo_add(addr, CONT_PTE_SIZE, PTE);
>   }
>   
> -static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont)
> +static int split_pmd(unsigned long addr, pmd_t *pmdp, pmd_t pmd, gfp_t gfp,
> +		     bool to_cont)
>   {
>   	pmdval_t tableprot = PMD_TYPE_TABLE | PMD_TABLE_UXN | PMD_TABLE_AF;
>   	unsigned long pfn = pmd_pfn(pmd);
> @@ -604,8 +704,13 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont)
>   	if (to_cont)
>   		prot = __pgprot(pgprot_val(prot) | PTE_CONT);
>   
> +	lm_meminfo_sub(addr, PMD_SIZE, PMD);
>   	for (i = 0; i < PTRS_PER_PTE; i++, ptep++, pfn++)
>   		__set_pte(ptep, pfn_pte(pfn, prot));
> +	if (to_cont)
> +		lm_meminfo_add(addr, PMD_SIZE, CONT_PTE);
> +	else
> +		lm_meminfo_add(addr, PMD_SIZE, PTE);
>   
>   	/*
>   	 * Ensure the pte entries are visible to the table walker by the time
> @@ -617,16 +722,21 @@ static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont)
>   	return 0;
>   }
>   
> -static void split_contpmd(pmd_t *pmdp)
> +static void split_contpmd(unsigned long addr, pmd_t *pmdp)
>   {
>   	int i;
>   
> +	lm_meminfo_sub(addr, CONT_PMD_SIZE, CONT_PMD);
> +
>   	pmdp = PTR_ALIGN_DOWN(pmdp, sizeof(*pmdp) * CONT_PMDS);
>   	for (i = 0; i < CONT_PMDS; i++, pmdp++)
>   		set_pmd(pmdp, pmd_mknoncont(pmdp_get(pmdp)));
> +
> +	lm_meminfo_add(addr, CONT_PMD_SIZE, PMD);
>   }
>   
> -static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont)
> +static int split_pud(unsigned long addr, pud_t *pudp, pud_t pud, gfp_t gfp,
> +		     bool to_cont)
>   {
>   	pudval_t tableprot = PUD_TYPE_TABLE | PUD_TABLE_UXN | PUD_TABLE_AF;
>   	unsigned int step = PMD_SIZE >> PAGE_SHIFT;
> @@ -651,8 +761,13 @@ static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont)
>   	if (to_cont)
>   		prot = __pgprot(pgprot_val(prot) | PTE_CONT);
>   
> +	lm_meminfo_sub(addr, PUD_SIZE, PUD);
>   	for (i = 0; i < PTRS_PER_PMD; i++, pmdp++, pfn += step)
>   		set_pmd(pmdp, pfn_pmd(pfn, prot));
> +	if (to_cont)
> +		lm_meminfo_add(addr, PUD_SIZE, CONT_PMD);
> +	else
> +		lm_meminfo_add(addr, PUD_SIZE, PMD);
>   
>   	/*
>   	 * Ensure the pmd entries are visible to the table walker by the time
> @@ -707,7 +822,7 @@ static int split_kernel_leaf_mapping_locked(unsigned long addr)
>   	if (!pud_present(pud))
>   		goto out;
>   	if (pud_leaf(pud)) {
> -		ret = split_pud(pudp, pud, GFP_PGTABLE_KERNEL, true);
> +		ret = split_pud(addr, pudp, pud, GFP_PGTABLE_KERNEL, true);
>   		if (ret)
>   			goto out;
>   	}
> @@ -725,14 +840,14 @@ static int split_kernel_leaf_mapping_locked(unsigned long addr)
>   		goto out;
>   	if (pmd_leaf(pmd)) {
>   		if (pmd_cont(pmd))
> -			split_contpmd(pmdp);
> +			split_contpmd(addr, pmdp);
>   		/*
>   		 * PMD: If addr is PMD aligned then addr already describes a
>   		 * leaf boundary. Otherwise, split to contpte.
>   		 */
>   		if (ALIGN_DOWN(addr, PMD_SIZE) == addr)
>   			goto out;
> -		ret = split_pmd(pmdp, pmd, GFP_PGTABLE_KERNEL, true);
> +		ret = split_pmd(addr, pmdp, pmd, GFP_PGTABLE_KERNEL, true);
>   		if (ret)
>   			goto out;
>   	}
> @@ -749,7 +864,7 @@ static int split_kernel_leaf_mapping_locked(unsigned long addr)
>   	if (!pte_present(pte))
>   		goto out;
>   	if (pte_cont(pte))
> -		split_contpte(ptep);
> +		split_contpte(addr, ptep);
>   
>   out:
>   	return ret;
> @@ -856,7 +971,7 @@ static int split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr,
>   	int ret = 0;
>   
>   	if (pud_leaf(pud))
> -		ret = split_pud(pudp, pud, gfp, false);
> +		ret = split_pud(addr, pudp, pud, gfp, false);
>   
>   	return ret;
>   }
> @@ -870,8 +985,8 @@ static int split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr,
>   
>   	if (pmd_leaf(pmd)) {
>   		if (pmd_cont(pmd))
> -			split_contpmd(pmdp);
> -		ret = split_pmd(pmdp, pmd, gfp, false);
> +			split_contpmd(addr, pmdp);
> +		ret = split_pmd(addr, pmdp, pmd, gfp, false);
>   
>   		/*
>   		 * We have split the pmd directly to ptes so there is no need to
> @@ -889,7 +1004,7 @@ static int split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr,
>   	pte_t pte = __ptep_get(ptep);
>   
>   	if (pte_cont(pte))
> -		split_contpte(ptep);
> +		split_contpte(addr, ptep);
>   
>   	return 0;
>   }
> @@ -1463,20 +1578,20 @@ static bool pgtable_range_aligned(unsigned long start, unsigned long end,
>   	return true;
>   }
>   
> -static void unmap_hotplug_pte_range(pmd_t *pmdp, unsigned long addr,
> +static void unmap_hotplug_pte_range(pte_t *ptep, unsigned long addr,
>   				    unsigned long end, bool free_mapped,
>   				    struct vmem_altmap *altmap)
>   {
> -	pte_t *ptep, pte;
> +	pte_t pte;
>   
>   	do {
> -		ptep = pte_offset_kernel(pmdp, addr);
>   		pte = __ptep_get(ptep);
>   		if (pte_none(pte))
>   			continue;
>   
>   		WARN_ON(!pte_present(pte));
>   		__pte_clear(&init_mm, addr, ptep);
> +		lm_meminfo_sub(addr, PAGE_SIZE, PTE);
>   		if (free_mapped) {
>   			/* CONT blocks are not supported in the vmemmap */
>   			WARN_ON(pte_cont(pte));
> @@ -1485,19 +1600,39 @@ static void unmap_hotplug_pte_range(pmd_t *pmdp, unsigned long addr,
>   						PAGE_SIZE, altmap);
>   		}
>   		/* unmap_hotplug_range() flushes TLB for !free_mapped */
> -	} while (addr += PAGE_SIZE, addr < end);
> +	} while (ptep++, addr += PAGE_SIZE, addr < end);
> +}
> +
> +static void unmap_hotplug_cont_pte_range(pmd_t *pmdp, unsigned long addr,
> +					 unsigned long end, bool free_mapped,
> +					 struct vmem_altmap *altmap)
> +{
> +	unsigned long next;
> +	pte_t *ptep, pte;
> +
> +	do {
> +		next = pte_cont_addr_end(addr, end);
> +		ptep = pte_offset_kernel(pmdp, addr);
> +		pte = __ptep_get(ptep);
> +
> +		if (pte_present(pte) && pte_cont(pte)) {
> +			lm_meminfo_sub(addr, CONT_PTE_SIZE, CONT_PTE);
> +			lm_meminfo_add(addr, CONT_PTE_SIZE, PTE);
> +		}
> +
> +		unmap_hotplug_pte_range(ptep, addr, next, free_mapped, altmap);
> +	} while (addr = next, addr < end);
>   }
>   
> -static void unmap_hotplug_pmd_range(pud_t *pudp, unsigned long addr,
> +static void unmap_hotplug_pmd_range(pmd_t *pmdp, unsigned long addr,
>   				    unsigned long end, bool free_mapped,
>   				    struct vmem_altmap *altmap)
>   {
>   	unsigned long next;
> -	pmd_t *pmdp, pmd;
> +	pmd_t pmd;
>   
>   	do {
>   		next = pmd_addr_end(addr, end);
> -		pmdp = pmd_offset(pudp, addr);
>   		pmd = READ_ONCE(*pmdp);
>   		if (pmd_none(pmd))
>   			continue;
> @@ -1505,6 +1640,7 @@ static void unmap_hotplug_pmd_range(pud_t *pudp, unsigned long addr,
>   		WARN_ON(!pmd_present(pmd));
>   		if (pmd_leaf(pmd)) {
>   			pmd_clear(pmdp);
> +			lm_meminfo_sub(addr, PMD_SIZE, PMD);
>   			if (free_mapped) {
>   				/* CONT blocks are not supported in the vmemmap */
>   				WARN_ON(pmd_cont(pmd));
> @@ -1516,7 +1652,28 @@ static void unmap_hotplug_pmd_range(pud_t *pudp, unsigned long addr,
>   			continue;
>   		}
>   		WARN_ON(!pmd_table(pmd));
> -		unmap_hotplug_pte_range(pmdp, addr, next, free_mapped, altmap);
> +		unmap_hotplug_cont_pte_range(pmdp, addr, next, free_mapped, altmap);
> +	} while (pmdp++, addr = next, addr < end);
> +}
> +
> +static void unmap_hotplug_cont_pmd_range(pud_t *pudp, unsigned long addr,
> +					 unsigned long end, bool free_mapped,
> +					 struct vmem_altmap *altmap)
> +{
> +	unsigned long next;
> +	pmd_t *pmdp, pmd;
> +
> +	do {
> +		next = pmd_cont_addr_end(addr, end);
> +		pmdp = pmd_offset(pudp, addr);
> +		pmd = READ_ONCE(*pmdp);
> +
> +		if (pmd_leaf(pmd) && pmd_cont(pmd)) {
> +			lm_meminfo_sub(addr, CONT_PMD_SIZE, CONT_PMD);
> +			lm_meminfo_add(addr, CONT_PMD_SIZE, PMD);
> +		}
> +
> +		unmap_hotplug_pmd_range(pmdp, addr, next, free_mapped, altmap);
>   	} while (addr = next, addr < end);
>   }
>   
> @@ -1537,6 +1694,7 @@ static void unmap_hotplug_pud_range(p4d_t *p4dp, unsigned long addr,
>   		WARN_ON(!pud_present(pud));
>   		if (pud_leaf(pud)) {
>   			pud_clear(pudp);
> +			lm_meminfo_sub(addr, PUD_SIZE, PUD);
>   			if (free_mapped) {
>   				flush_tlb_kernel_range(addr, addr + PUD_SIZE);
>   				free_hotplug_page_range(pud_page(pud),
> @@ -1546,7 +1704,7 @@ static void unmap_hotplug_pud_range(p4d_t *p4dp, unsigned long addr,
>   			continue;
>   		}
>   		WARN_ON(!pud_table(pud));
> -		unmap_hotplug_pmd_range(pudp, addr, next, free_mapped, altmap);
> +		unmap_hotplug_cont_pmd_range(pudp, addr, next, free_mapped, altmap);
>   	} while (addr = next, addr < end);
>   }
>   




More information about the linux-arm-kernel mailing list