[PATCH v3 4/4] mm: cache struct page for empty_zero_page and return it from ZERO_PAGE()
Liam R. Howlett
Liam.Howlett at oracle.com
Thu Feb 12 10:40:04 PST 2026
* Mike Rapoport <rppt at kernel.org> [260211 05:32]:
> From: "Mike Rapoport (Microsoft)" <rppt at kernel.org>
>
> For most architectures every invocation of ZERO_PAGE() does
> virt_to_page(empty_zero_page). But empty_zero_page is in BSS and it is
> enough to get its struct page once at initialization time and then use
> it whenever a zero page should be accessed.
>
> Add yet another __zero_page variable that will be initialized as
> virt_to_page(empty_zero_page) for most architectures in a weak
> arch_setup_zero_pages() function.
>
> For architectures that use colored zero pages (MIPS and s390) rename their
> setup_zero_pages() to arch_setup_zero_pages() and make it global rather
> than static.
>
> For architectures that cannot use virt_to_page() for BSS (arm64 and
> sparc64) add override of arch_setup_zero_pages().
>
> Acked-by: Catalin Marinas <catalin.marinas at arm.com>
> Signed-off-by: Mike Rapoport (Microsoft) <rppt at kernel.org>
Acked-by: Liam R. Howlett <Liam.Howlett at oracle.com>
> ---
> arch/arm64/include/asm/pgtable.h | 6 ------
> arch/arm64/mm/init.c | 5 +++++
> arch/mips/mm/init.c | 11 +----------
> arch/s390/mm/init.c | 4 +---
> arch/sparc/include/asm/pgtable_64.h | 3 ---
> arch/sparc/mm/init_64.c | 17 +++++++----------
> include/linux/pgtable.h | 11 ++++++++---
> mm/mm_init.c | 21 +++++++++++++++++----
> 8 files changed, 39 insertions(+), 39 deletions(-)
>
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index 63da07398a30..2c1ec7cc8612 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -106,12 +106,6 @@ static inline void arch_leave_lazy_mmu_mode(void)
> #define flush_tlb_fix_spurious_fault_pmd(vma, address, pmdp) \
> local_flush_tlb_page_nonotify(vma, address)
>
> -/*
> - * ZERO_PAGE is a global shared page that is always zero: used
> - * for zero-mapped memory areas etc..
> - */
> -#define ZERO_PAGE(vaddr) phys_to_page(__pa_symbol(empty_zero_page))
> -
> #define pte_ERROR(e) \
> pr_err("%s:%d: bad pte %016llx.\n", __FILE__, __LINE__, pte_val(e))
>
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index 96711b8578fd..417ec7efe569 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -328,6 +328,11 @@ void __init bootmem_init(void)
> memblock_dump_all();
> }
>
> +void __init arch_setup_zero_pages(void)
> +{
> + __zero_page = phys_to_page(__pa_symbol(empty_zero_page));
> +}
> +
> void __init arch_mm_preinit(void)
> {
> unsigned int flags = SWIOTLB_VERBOSE;
> diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
> index 4f6449ad02ca..55b25e85122a 100644
> --- a/arch/mips/mm/init.c
> +++ b/arch/mips/mm/init.c
> @@ -56,10 +56,7 @@ unsigned long empty_zero_page, zero_page_mask;
> EXPORT_SYMBOL_GPL(empty_zero_page);
> EXPORT_SYMBOL(zero_page_mask);
>
> -/*
> - * Not static inline because used by IP27 special magic initialization code
> - */
> -static void __init setup_zero_pages(void)
> +void __init arch_setup_zero_pages(void)
> {
> unsigned int order;
>
> @@ -450,7 +447,6 @@ void __init arch_mm_preinit(void)
> BUILD_BUG_ON(IS_ENABLED(CONFIG_32BIT) && (PFN_PTE_SHIFT > PAGE_SHIFT));
>
> maar_init();
> - setup_zero_pages(); /* Setup zeroed pages. */
> highmem_init();
>
> #ifdef CONFIG_64BIT
> @@ -461,11 +457,6 @@ void __init arch_mm_preinit(void)
> 0x80000000 - 4, KCORE_TEXT);
> #endif
> }
> -#else /* CONFIG_NUMA */
> -void __init arch_mm_preinit(void)
> -{
> - setup_zero_pages(); /* This comes from node 0 */
> -}
> #endif /* !CONFIG_NUMA */
>
> void free_init_pages(const char *what, unsigned long begin, unsigned long end)
> diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
> index 3c20475cbee2..1f72efc2a579 100644
> --- a/arch/s390/mm/init.c
> +++ b/arch/s390/mm/init.c
> @@ -69,7 +69,7 @@ unsigned long empty_zero_page, zero_page_mask;
> EXPORT_SYMBOL(empty_zero_page);
> EXPORT_SYMBOL(zero_page_mask);
>
> -static void __init setup_zero_pages(void)
> +void __init arch_setup_zero_pages(void)
> {
> unsigned long total_pages = memblock_estimated_nr_free_pages();
> unsigned int order;
> @@ -159,8 +159,6 @@ void __init arch_mm_preinit(void)
> cpumask_set_cpu(0, mm_cpumask(&init_mm));
>
> pv_init();
> -
> - setup_zero_pages(); /* Setup zeroed pages. */
> }
>
> unsigned long memory_block_size_bytes(void)
> diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
> index 615f460c50af..74ede706fb32 100644
> --- a/arch/sparc/include/asm/pgtable_64.h
> +++ b/arch/sparc/include/asm/pgtable_64.h
> @@ -210,9 +210,6 @@ extern unsigned long _PAGE_CACHE;
> extern unsigned long pg_iobits;
> extern unsigned long _PAGE_ALL_SZ_BITS;
>
> -extern struct page *mem_map_zero;
> -#define ZERO_PAGE(vaddr) (mem_map_zero)
> -
> /* PFNs are real physical page numbers. However, mem_map only begins to record
> * per-page information starting at pfn_base. This is to handle systems where
> * the first physical page in the machine is at some huge physical address,
> diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
> index 0cc8de2fea90..707c1df67d79 100644
> --- a/arch/sparc/mm/init_64.c
> +++ b/arch/sparc/mm/init_64.c
> @@ -177,9 +177,6 @@ extern unsigned long sparc_ramdisk_image64;
> extern unsigned int sparc_ramdisk_image;
> extern unsigned int sparc_ramdisk_size;
>
> -struct page *mem_map_zero __read_mostly;
> -EXPORT_SYMBOL(mem_map_zero);
> -
> unsigned int sparc64_highest_unlocked_tlb_ent __read_mostly;
>
> unsigned long sparc64_kern_pri_context __read_mostly;
> @@ -2496,11 +2493,17 @@ static void __init register_page_bootmem_info(void)
> register_page_bootmem_info_node(NODE_DATA(i));
> #endif
> }
> -void __init mem_init(void)
> +
> +void __init arch_setup_zero_pages(void)
> {
> phys_addr_t zero_page_pa = kern_base +
> ((unsigned long)&empty_zero_page[0] - KERNBASE);
>
> + __zero_page = phys_to_page(zero_page_pa);
> +}
> +
> +void __init mem_init(void)
> +{
> /*
> * Must be done after boot memory is put on freelist, because here we
> * might set fields in deferred struct pages that have not yet been
> @@ -2509,12 +2512,6 @@ void __init mem_init(void)
> */
> register_page_bootmem_info();
>
> - /*
> - * Set up the zero page, mark it reserved, so that page count
> - * is not manipulated when freeing the page from user ptes.
> - */
> - mem_map_zero = pfn_to_page(PHYS_PFN(zero_page_pa));
> -
> if (tlb_type == cheetah || tlb_type == cheetah_plus)
> cheetah_ecache_flush_init();
> }
> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
> index 3d48eea57cd2..1da21ec62836 100644
> --- a/include/linux/pgtable.h
> +++ b/include/linux/pgtable.h
> @@ -1894,6 +1894,8 @@ static inline void pfnmap_setup_cachemode_pfn(unsigned long pfn, pgprot_t *prot)
> * For architectures that don't __HAVE_COLOR_ZERO_PAGE the zero page lives in
> * empty_zero_page in BSS.
> */
> +void arch_setup_zero_pages(void);
> +
> #ifdef __HAVE_COLOR_ZERO_PAGE
> static inline int is_zero_pfn(unsigned long pfn)
> {
> @@ -1921,10 +1923,13 @@ static inline unsigned long zero_pfn(unsigned long addr)
> }
>
> extern uint8_t empty_zero_page[PAGE_SIZE];
> +extern struct page *__zero_page;
>
> -#ifndef ZERO_PAGE
> -#define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page))
> -#endif
> +static inline struct page *_zero_page(unsigned long addr)
> +{
> + return __zero_page;
> +}
> +#define ZERO_PAGE(vaddr) _zero_page(vaddr)
>
> #endif /* __HAVE_COLOR_ZERO_PAGE */
>
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index 1eac634ece1a..b08608c1b71d 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -59,7 +59,10 @@ EXPORT_SYMBOL(zero_page_pfn);
> #ifndef __HAVE_COLOR_ZERO_PAGE
> uint8_t empty_zero_page[PAGE_SIZE] __page_aligned_bss;
> EXPORT_SYMBOL(empty_zero_page);
> -#endif
> +
> +struct page *__zero_page __ro_after_init;
> +EXPORT_SYMBOL(__zero_page);
> +#endif /* __HAVE_COLOR_ZERO_PAGE */
>
> #ifdef CONFIG_DEBUG_MEMORY_INIT
> int __meminitdata mminit_loglevel;
> @@ -2675,12 +2678,21 @@ static void __init mem_init_print_info(void)
> );
> }
>
> -static int __init init_zero_page_pfn(void)
> +#ifndef __HAVE_COLOR_ZERO_PAGE
> +/*
> + * architectures that __HAVE_COLOR_ZERO_PAGE must define this function
> + */
> +void __init __weak arch_setup_zero_pages(void)
> +{
> + __zero_page = virt_to_page(empty_zero_page);
> +}
> +#endif
> +
> +static void __init init_zero_page_pfn(void)
> {
> + arch_setup_zero_pages();
> zero_page_pfn = page_to_pfn(ZERO_PAGE(0));
> - return 0;
> }
> -early_initcall(init_zero_page_pfn);
>
> void __init __weak arch_mm_preinit(void)
> {
> @@ -2704,6 +2716,7 @@ void __init mm_core_init_early(void)
> void __init mm_core_init(void)
> {
> arch_mm_preinit();
> + init_zero_page_pfn();
>
> /* Initializations relying on SMP setup */
> BUILD_BUG_ON(MAX_ZONELISTS > 2);
> --
> 2.51.0
>
More information about the linux-riscv
mailing list