[PATCH 02/11] arm64: mm: define percpu virtual space area
Yang Shi
yang at os.amperecomputing.com
Wed Apr 29 10:04:30 PDT 2026
The percpu allocator returns offset from percpu base address. The
percpu base address is determined by the first chunk which is typically
in the low address of vmalloc space, however percpu varriables are
typically allocated from the high address of vmalloc space. So the offset
could be quite big. It may be the whole size of vmalloc space. To support
local percpu mapping in order to optimize this_cpu_*() ops, the percpu
allocator needs to allocate memory from local percpu area too in the following
patch and the offset to local percpu base address must be same because
the offset returned by percpu allocator must be used to access both
global percpu and local percpu.
We can half vmalloc space to have either half dedicated to local percpu,
but it wastes too much address space.
So carve out dedicated global percpu and local percpu areas. Each area size
is 2 * PGDIR_SIZE. It is 1TB with 4K page size, should be big enough for percpu.
The percpu areas are PGDIR_SIZE aligned in order to just need to sync percpu
page table at pgd level to minimize page table sync overhead.
The kernel virtual address space layout now looks like:
+-----------------+
| Linear mapping |
+-----------------+
| Modules |
+-----------------+
| Vmalloc |
+-----------------+
| Global Percpu |
+-----------------+
| Local Percpu |
+-----------------+
| Vmemap |
+-----------------+
| PCI I/O |
+-----------------+
| Fixed map |
+-----------------+
Signed-off-by: Yang Shi <yang at os.amperecomputing.com>
---
arch/arm64/include/asm/pgtable.h | 21 ++++++++++++++++++---
arch/arm64/mm/mmu.c | 4 ++++
arch/arm64/mm/ptdump.c | 4 ++++
3 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 38eec71ec383..9043b976682c 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -18,14 +18,29 @@
* VMALLOC range.
*
* VMALLOC_START: beginning of the kernel vmalloc space
- * VMALLOC_END: extends to the available space below vmemmap
+ * VMALLOC_END: extends to the space below global percpu area
*/
#define VMALLOC_START (MODULES_END)
+#define VMALLOC_END (PERCPU_START - SZ_8M)
+
+/*
+ * PERCPU range
+ *
+ * PERCPU_START: beginning of global percpu area
+ * PERCPU_END: end of global percpu area
+ * LOCAL_PERCPU_START: beginning of local percpu area
+ * LOCAL_PERCPU_END: end of local percpu area, extend to the available
+ * space below vmemap
+ */
+#define PERCPU_SIZE (2 * PGDIR_SIZE)
+#define PERCPU_START (PERCPU_END - PERCPU_SIZE)
+#define PERCPU_END (LOCAL_PERCPU_START)
+#define LOCAL_PERCPU_START (LOCAL_PERCPU_END - PERCPU_SIZE)
#if VA_BITS == VA_BITS_MIN
-#define VMALLOC_END (VMEMMAP_START - SZ_8M)
+#define LOCAL_PERCPU_END (ALIGN_DOWN(VMEMMAP_START, PGDIR_SIZE))
#else
#define VMEMMAP_UNUSED_NPAGES ((_PAGE_OFFSET(vabits_actual) - PAGE_OFFSET) >> PAGE_SHIFT)
-#define VMALLOC_END (VMEMMAP_START + VMEMMAP_UNUSED_NPAGES * sizeof(struct page) - SZ_8M)
+#define LOCAL_PERCPU_END (ALIGN_DOWN((VMEMMAP_START + VMEMMAP_UNUSED_NPAGES * sizeof(struct page)), PGDIR_SIZE))
#endif
#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index ed1545baa045..7708dcc1b6a9 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -108,6 +108,10 @@ void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
if (!percpu_pgd_setup_done)
return;
+ /* Don't sync local percpu area page table */
+ if (start >= LOCAL_PERCPU_START && end < LOCAL_PERCPU_END)
+ return;
+
addr = start;
do {
pgd = READ_ONCE(*pgdp);
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index ab9899ca1e5f..7d5696a48917 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -389,6 +389,10 @@ static int __init ptdump_init(void)
{ MODULES_END, "Modules end" },
{ VMALLOC_START, "vmalloc() area" },
{ VMALLOC_END, "vmalloc() end" },
+ { PERCPU_START, "Global percpu start" },
+ { PERCPU_END, "Global percpu end" },
+ { LOCAL_PERCPU_START, "Local percpu start" },
+ { LOCAL_PERCPU_END, "Local percpu end" },
{ vmemmap_start, "vmemmap start" },
{ VMEMMAP_END, "vmemmap end" },
{ PCI_IO_START, "PCI I/O start" },
--
2.47.0
More information about the linux-arm-kernel
mailing list