[PATCH v3 18/19] arm64: text replication: support more page sizes and levels

Hao Jia jiahao.os at bytedance.com
Wed Jan 17 00:53:56 PST 2024


Previously, the page table group variable (pgtables) of each node
pointed to pgtable_node0 by default. This method only worked properly
in the configuration of 4K page szie and 4-level page table. Because
in this configuration, the offset between the member variables of
struct pgtables is exactly equal to the offset between *_pg_dir defined
in vmlinux.lds.S. But this won't work for other page sizes configurations.

Therefore, we modify the member variables of struct pgtables to pointer
variables and point to the global *_pg_dir defined in vmlinux.lds.S by
default, which will no longer rely on offset equality. The member variables
of struct pgtables will be allocated memory separately and reassigned in
ktext_replication_init(). This will allow us to support more page sizes
and page level configurations.

In addition, the kernel text size is not always smaller than PGDIR_SIZE
(for example, PGDIR_SIZE is 32M when 16K page size and 2-level page table
are configured). The kernel text may need to occupy more than one L0 page
table entry. So we need to clean up the pgdir entry of kernel mapping in
a loop in ktext_replication_init().

But we still cannot support the configuration of 16K page size and 4-level
page table. In this configuration, PGDIR_SIZE is 128T, because it is too large
to allow the kernel text to exclusively occupy at least one L0 page table entry.

Signed-off-by: Hao Jia <jiahao.os at bytedance.com>
---
 arch/arm64/include/asm/pgtable.h | 12 +++-----
 arch/arm64/kernel/vmlinux.lds.S  |  3 --
 arch/arm64/mm/ktext.c            | 53 ++++++++++++++++++++------------
 3 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 62a9d3e11fe1..e0b428e780c7 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -21,7 +21,7 @@
  * VMALLOC_END: extends to the available space below vmemmap, PCI I/O space
  *	and fixed mappings
  */
-#define VMALLOC_START		(MODULES_END + PGDIR_SIZE)
+#define VMALLOC_START		(MODULES_END + KIMAGE_OFFSET)
 #define VMALLOC_END		(VMEMMAP_START - SZ_256M)
 
 #define vmemmap			((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
@@ -625,17 +625,13 @@ extern pgd_t reserved_pg_dir[PTRS_PER_PGD];
 
 struct pgtables {
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-	pgd_t tramp_pg_dir[PTRS_PER_PGD];
+	pgd_t *tramp_pg_dir;
 #endif
-	pgd_t reserved_pg_dir[PTRS_PER_PGD];
-	pgd_t swapper_pg_dir[PTRS_PER_PGD];
+	pgd_t *reserved_pg_dir;
+	pgd_t *swapper_pg_dir;
 };
 
-extern struct pgtables pgtable_node0;
-
 #ifdef CONFIG_REPLICATE_KTEXT
-extern struct pgtables *pgtables[MAX_NUMNODES];
-
 pgd_t *swapper_pg_dir_node(void);
 phys_addr_t __swapper_pg_dir_node_phys(int nid);
 phys_addr_t swapper_pg_dir_node_phys(void);
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index d3c7ed76adbf..3cd7e76cc562 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -212,9 +212,6 @@ SECTIONS
 	idmap_pg_dir = .;
 	. += PAGE_SIZE;
 
-	/* pgtable struct - covers the tramp, reserved and swapper pgdirs */
-	pgtable_node0 = .;
-
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 	tramp_pg_dir = .;
 	. += PAGE_SIZE;
diff --git a/arch/arm64/mm/ktext.c b/arch/arm64/mm/ktext.c
index 3dde6e1d99d7..e50828189824 100644
--- a/arch/arm64/mm/ktext.c
+++ b/arch/arm64/mm/ktext.c
@@ -16,15 +16,21 @@
 #include <asm/memory.h>
 #include <asm/pgalloc.h>
 
-struct pgtables *pgtables[MAX_NUMNODES] = {
-	[0 ... MAX_NUMNODES - 1] = &pgtable_node0,
+static struct pgtables pgtables[MAX_NUMNODES] = {
+	[0 ... MAX_NUMNODES - 1] = {
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+		tramp_pg_dir,
+#endif
+		reserved_pg_dir,
+		swapper_pg_dir
+	},
 };
 
 static void *kernel_texts[MAX_NUMNODES];
 
 static pgd_t *__swapper_pg_dir_node(int nid)
 {
-	return pgtables[nid]->swapper_pg_dir;
+	return pgtables[nid].swapper_pg_dir;
 }
 
 pgd_t *swapper_pg_dir_node(void)
@@ -116,20 +122,21 @@ early_param("ktext", parse_ktext);
 /* Allocate page tables and memory for the replicated kernel texts. */
 void __init ktext_replication_init(void)
 {
+	int kidx_base = pgd_index((phys_addr_t)KERNEL_START);
+	int kidx_end = pgd_index((phys_addr_t)KERNEL_END);
 	size_t size = __end_rodata - _stext;
-	int kidx = pgd_index((phys_addr_t)KERNEL_START);
-	int nid;
+	int nid, i;
 
 	/*
 	 * If we've messed up and the kernel shares a L0 entry with the
 	 * module or vmalloc area, then don't even attempt to use text
 	 * replication.
 	 */
-	if (pgd_index(MODULES_VADDR) == kidx) {
+	if (pgd_index(MODULES_VADDR) == kidx_base) {
 		pr_warn("Kernel is located in the same L0 index as modules - text replication disabled\n");
 		return;
 	}
-	if (pgd_index(VMALLOC_START) == kidx) {
+	if (pgd_index(VMALLOC_START) == kidx_end) {
 		pr_warn("Kernel is located in the same L0 index as vmalloc - text replication disabled\n");
 		return;
 	}
@@ -149,36 +156,44 @@ void __init ktext_replication_init(void)
 				       (u64)kernel_texts[nid] + size);
 
 		/* Allocate the pagetables for this node */
-		pgtables[nid] = memblock_alloc_node(sizeof(*pgtables[0]),
-						    PGD_SIZE, nid);
-
+		pgtables[nid].swapper_pg_dir = memblock_alloc_node(sizeof(swapper_pg_dir),
+									PGD_SIZE, nid);
+		pgtables[nid].reserved_pg_dir = memblock_alloc_node(sizeof(reserved_pg_dir),
+									PGD_SIZE, nid);
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+		pgtables[nid].tramp_pg_dir = memblock_alloc_node(sizeof(tramp_pg_dir),
+									PGD_SIZE, nid);
+#endif
 		/* Copy initial swapper page directory */
-		memcpy(pgtables[nid]->swapper_pg_dir, swapper_pg_dir, PGD_SIZE);
+		memcpy(pgtables[nid].swapper_pg_dir, swapper_pg_dir, PGD_SIZE);
 
 		/* Clear the kernel mapping */
-		memset(&pgtables[nid]->swapper_pg_dir[kidx], 0,
-		       sizeof(pgtables[nid]->swapper_pg_dir[kidx]));
+		for (i = kidx_base; i <= kidx_end; i++)
+			memset(&pgtables[nid].swapper_pg_dir[i], 0,
+			       sizeof(pgtables[nid].swapper_pg_dir[i]));
 
 		/* Create kernel mapping pointing at our local copy */
-		create_kernel_nid_map(pgtables[nid]->swapper_pg_dir,
+		create_kernel_nid_map(pgtables[nid].swapper_pg_dir,
 				      kernel_texts[nid]);
 	}
 }
 
 void ktext_replication_set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
 {
+	int kidx_base = pgd_index((phys_addr_t)KERNEL_START);
+	int kidx_end = pgd_index((phys_addr_t)KERNEL_END);
 	unsigned long idx = pgdp - swapper_pg_dir;
 	int nid;
 
 	if (WARN_ON_ONCE(idx >= PTRS_PER_PGD) ||
-	    WARN_ON_ONCE(idx == pgd_index((phys_addr_t)KERNEL_START)))
+	    WARN_ON_ONCE(idx >= kidx_base && idx <= kidx_end))
 		return;
 
 	for_each_node(nid) {
-		if (pgtables[nid]->swapper_pg_dir == swapper_pg_dir)
+		if (pgtables[nid].swapper_pg_dir == swapper_pg_dir)
 			continue;
 
-		WRITE_ONCE(pgtables[nid]->swapper_pg_dir[idx], pgd);
+		WRITE_ONCE(pgtables[nid].swapper_pg_dir[idx], pgd);
 	}
 }
 
@@ -189,10 +204,10 @@ void __init ktext_replication_init_tramp(void)
 
 	for_each_node(nid) {
 		/* Nothing to do for node 0 */
-		if (pgtables[nid]->tramp_pg_dir == tramp_pg_dir)
+		if (!nid)
 			continue;
 
-		memcpy(pgtables[nid]->tramp_pg_dir, tramp_pg_dir, PGD_SIZE);
+		memcpy(pgtables[nid].tramp_pg_dir, tramp_pg_dir, PGD_SIZE);
 	}
 }
 #endif
-- 
2.20.1




More information about the linux-arm-kernel mailing list