[PATCH] arm64: module: Widen module region to 2 GiB

Ard Biesheuvel ardb at kernel.org
Tue Apr 4 06:54:37 PDT 2023


Shanker reports that, after loading a 110+ MiB kernel module, no other
modules can be loaded, in spite of the fact that module PLTs have been
enabled in the build.

This is due to the fact, even with module PLTs enabled, the module
region is dimensioned to be a fixed 128 MiB region, as we simply never
anticipated the need for supporting modules that huge.

So let's increase the size of the statically allocated module region to
2 GiB, and update the module loading logic so that we prefer loading
modules in the vicinity of the kernel text, where relative branches can
be resolved without the need for PLTs. Only when we run out of space
here (or when CONFIG_RANDOMIZE_MODULE_REGION_FULL is enabled), we will
fall back to the larger window and allocate from there.

While at it, let's try to simplify the logic a bit, to make it easier to
follow:
- remove the special cases for KASAN, which are no longer needed now
  that KASAN_VMALLOC is always enabled when KASAN is configured;
- instead of defining a global module base address, define a global
  module limit, and work our way down from it.

Cc: Shanker Donthineni <sdonthineni at nvidia.com>
Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
---
 Documentation/arm64/memory.rst  |  8 ++---
 arch/arm64/include/asm/memory.h |  2 +-
 arch/arm64/include/asm/module.h | 10 ++++--
 arch/arm64/kernel/kaslr.c       | 38 +++++++++++------------
 arch/arm64/kernel/module.c      | 54 ++++++++++++++++-----------------
 5 files changed, 59 insertions(+), 53 deletions(-)

diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst
index 2a641ba7be3b717a..55a55f30eed8a6ce 100644
--- a/Documentation/arm64/memory.rst
+++ b/Documentation/arm64/memory.rst
@@ -33,8 +33,8 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
   0000000000000000	0000ffffffffffff	 256TB		user
   ffff000000000000	ffff7fffffffffff	 128TB		kernel logical memory map
  [ffff600000000000	ffff7fffffffffff]	  32TB		[kasan shadow region]
-  ffff800000000000	ffff800007ffffff	 128MB		modules
-  ffff800008000000	fffffbffefffffff	 124TB		vmalloc
+  ffff800000000000	ffff80007fffffff	   2GB		modules
+  ffff800080000000	fffffbffefffffff	 124TB		vmalloc
   fffffbfff0000000	fffffbfffdffffff	 224MB		fixed mappings (top down)
   fffffbfffe000000	fffffbfffe7fffff	   8MB		[guard region]
   fffffbfffe800000	fffffbffff7fffff	  16MB		PCI I/O space
@@ -50,8 +50,8 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):
   0000000000000000	000fffffffffffff	   4PB		user
   fff0000000000000	ffff7fffffffffff	  ~4PB		kernel logical memory map
  [fffd800000000000	ffff7fffffffffff]	 512TB		[kasan shadow region]
-  ffff800000000000	ffff800007ffffff	 128MB		modules
-  ffff800008000000	fffffbffefffffff	 124TB		vmalloc
+  ffff800000000000	ffff80007fffffff	   2GB		modules
+  ffff800080000000	fffffbffefffffff	 124TB		vmalloc
   fffffbfff0000000	fffffbfffdffffff	 224MB		fixed mappings (top down)
   fffffbfffe000000	fffffbfffe7fffff	   8MB		[guard region]
   fffffbfffe800000	fffffbffff7fffff	  16MB		PCI I/O space
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 78e5163836a0ab95..b58c3127323e16c8 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -46,7 +46,7 @@
 #define KIMAGE_VADDR		(MODULES_END)
 #define MODULES_END		(MODULES_VADDR + MODULES_VSIZE)
 #define MODULES_VADDR		(_PAGE_END(VA_BITS_MIN))
-#define MODULES_VSIZE		(SZ_128M)
+#define MODULES_VSIZE		(SZ_2G)
 #define VMEMMAP_START		(-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
 #define VMEMMAP_END		(VMEMMAP_START + VMEMMAP_SIZE)
 #define PCI_IO_END		(VMEMMAP_START - SZ_8M)
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index 18734fed3bdd7609..98dae9f87b521f07 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -31,9 +31,15 @@ u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
 				void *loc, u64 val);
 
 #ifdef CONFIG_RANDOMIZE_BASE
-extern u64 module_alloc_base;
+extern u64 module_alloc_limit;
 #else
-#define module_alloc_base	((u64)_etext - MODULES_VSIZE)
+#define module_alloc_limit	MODULE_REF_END
+#endif
+
+#ifdef CONFIG_ARM64_MODULE_PLTS
+#define MODULE_REF_END		((u64)_end)
+#else
+#define MODULE_REF_END		((u64)_etext)
 #endif
 
 struct plt_entry {
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index e7477f21a4c9d062..14e96c3f707a74a3 100644
--- a/arch/arm64/kernel/kaslr.c
+++ b/arch/arm64/kernel/kaslr.c
@@ -8,6 +8,7 @@
 #include <linux/init.h>
 #include <linux/libfdt.h>
 #include <linux/mm_types.h>
+#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/types.h>
 #include <linux/pgtable.h>
@@ -17,10 +18,11 @@
 #include <asm/kernel-pgtable.h>
 #include <asm/memory.h>
 #include <asm/mmu.h>
+#include <asm/module.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
 
-u64 __ro_after_init module_alloc_base;
+u64 __ro_after_init module_alloc_limit = MODULE_REF_END;
 u16 __initdata memstart_offset_seed;
 
 struct arm64_ftr_override kaslr_feature_override __initdata;
@@ -30,12 +32,6 @@ static int __init kaslr_init(void)
 	u64 module_range;
 	u32 seed;
 
-	/*
-	 * Set a reasonable default for module_alloc_base in case
-	 * we end up running with module randomization disabled.
-	 */
-	module_alloc_base = (u64)_etext - MODULES_VSIZE;
-
 	if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) {
 		pr_info("KASLR disabled on command line\n");
 		return 0;
@@ -69,24 +65,28 @@ static int __init kaslr_init(void)
 		 * resolved via PLTs. (Branches between modules will be
 		 * resolved normally.)
 		 */
-		module_range = SZ_2G - (u64)(_end - _stext);
-		module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR);
+		module_range = SZ_2G;
 	} else {
 		/*
-		 * Randomize the module region by setting module_alloc_base to
-		 * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE,
-		 * _stext) . This guarantees that the resulting region still
-		 * covers [_stext, _etext], and that all relative branches can
-		 * be resolved without veneers unless this region is exhausted
-		 * and we fall back to a larger 2GB window in module_alloc()
-		 * when ARM64_MODULE_PLTS is enabled.
+		 * Randomize the module region over a 128 MB window covering
+		 * the kernel text. This guarantees that the resulting region
+		 * still covers [_stext, _etext], and that all relative
+		 * branches can be resolved without veneers unless this region
+		 * is exhausted and we fall back to a larger 2GB window in
+		 * module_alloc() when ARM64_MODULE_PLTS is enabled.
 		 */
-		module_range = MODULES_VSIZE - (u64)(_etext - _stext);
+		module_range = SZ_128M;
 	}
 
+	/*
+	 * Subtract the size of the core kernel region that must be in range
+	 * for all loaded modules.
+	 */
+	module_range -= MODULE_REF_END - (u64)_stext;
+
 	/* use the lower 21 bits to randomize the base of the module region */
-	module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
-	module_alloc_base &= PAGE_MASK;
+	module_alloc_limit += (module_range * (seed & ((1 << 21) - 1))) >> 21;
+	module_alloc_limit &= PAGE_MASK;
 
 	return 0;
 }
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 5af4975caeb58ff7..aa61493957c010b2 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -24,7 +24,6 @@
 
 void *module_alloc(unsigned long size)
 {
-	u64 module_alloc_end = module_alloc_base + MODULES_VSIZE;
 	gfp_t gfp_mask = GFP_KERNEL;
 	void *p;
 
@@ -32,33 +31,34 @@ void *module_alloc(unsigned long size)
 	if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
 		gfp_mask |= __GFP_NOWARN;
 
-	if (IS_ENABLED(CONFIG_KASAN_GENERIC) ||
-	    IS_ENABLED(CONFIG_KASAN_SW_TAGS))
-		/* don't exceed the static module region - see below */
-		module_alloc_end = MODULES_END;
-
-	p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
-				module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK,
-				NUMA_NO_NODE, __builtin_return_address(0));
+	/*
+	 * First, try to allocate from the 128 MB region just below the limit.
+	 * If KASLR is disabled, or CONFIG_RANDOMIZE_MODULE_REGION_FULL is not
+	 * set, this will produce an allocation that allows all relative
+	 * branches into the kernel text to be resolved without the need for
+	 * veneers (PLTs). If CONFIG_RANDOMIZE_MODULE_REGION_FULL is set, this
+	 * 128 MB window might not cover the kernel text, but branches between
+	 * modules will still be in relative branching range.
+	 */
+	p = __vmalloc_node_range(size, MODULE_ALIGN,
+				 module_alloc_limit - SZ_128M,
+				 module_alloc_limit, gfp_mask, PAGE_KERNEL,
+				 VM_DEFER_KMEMLEAK, NUMA_NO_NODE,
+				 __builtin_return_address(0));
 
-	if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
-	    (IS_ENABLED(CONFIG_KASAN_VMALLOC) ||
-	     (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
-	      !IS_ENABLED(CONFIG_KASAN_SW_TAGS))))
-		/*
-		 * KASAN without KASAN_VMALLOC can only deal with module
-		 * allocations being served from the reserved module region,
-		 * since the remainder of the vmalloc region is already
-		 * backed by zero shadow pages, and punching holes into it
-		 * is non-trivial. Since the module region is not randomized
-		 * when KASAN is enabled without KASAN_VMALLOC, it is even
-		 * less likely that the module region gets exhausted, so we
-		 * can simply omit this fallback in that case.
-		 */
-		p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
-				module_alloc_base + SZ_2G, GFP_KERNEL,
-				PAGE_KERNEL, 0, NUMA_NO_NODE,
-				__builtin_return_address(0));
+	/*
+	 * If the prior allocation failed, and we have configured support for
+	 * fixing up out-of-range relative branches through the use of PLTs,
+	 * fall back to a 2 GB window for module allocations. This is the
+	 * maximum we can support, due to the use of 32-bit place relative
+	 * symbol references, which cannot be fixed up using PLTs.
+	 */
+	if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+		p = __vmalloc_node_range(size, MODULE_ALIGN,
+					 module_alloc_limit - SZ_2G,
+					 module_alloc_limit, GFP_KERNEL,
+					 PAGE_KERNEL, 0, NUMA_NO_NODE,
+					 __builtin_return_address(0));
 
 	if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
 		vfree(p);
-- 
2.39.2




More information about the linux-arm-kernel mailing list