[PATCH v2] arm64: mm: Increase MODULES_VSIZE to 2GB
Shanker Donthineni
sdonthineni at nvidia.com
Fri Mar 31 14:48:16 PDT 2023
Hi Ard,
On 3/30/23 09:04, Shanker Donthineni wrote:
> The allocation of modules occurs in two regions. The first region
> is MODULES_VSIZE, which is 128MB in size and shared with the core
> kernel when the KASLR feature is unavailable or disabled through
> a boot parameter. The second region, which is 2GB in size, is
> shared with the other vmalloc callers. Depending on the size of
> the core kernel, the 128MB region may quickly fill up after
> loading a few modules, causing the system to switch to the 2GB
> region. Unfortunately, even the 2GB region can run out of space
> if previously loaded modules and other kernel subsystems consume
> the entire area, leaving no space for additional modules.
>
> This issue usually occurs when the system has a large number of
> CPU cores, PCIe host-brigde controllers, and I/O devices. For
> instance, the ECAM region of one host-bridge controller can use
> up to 256MB of vmalloc space, while eight controllers can occupy
> the entire 2GB.
>
> To address this problem, a possible solution would be to increase
> the MODULES_VSIZE to 2GB. This would improve the system's ability
> to accommodate a greater number of dynamically loaded modules and
> drivers when KASLR is not enabled. However, prior to switching to
> the 2GB region, it is advisable to allocate modules within the
> 128MB space that covers the core kernel, in order to benefit from
> the direct branches.
>
> Signed-off-by: Shanker Donthineni <sdonthineni at nvidia.com>
> ---
> Changes since v1:
> - Included Ard's recommendations.
> - Revised the commit message.
>
> dmesg:
> On a NVIDIA T241 system with Ubuntu-22.04, hitting boot failures
> due to vmalloc/vmap allocation errors when loading modules.
>
> [ 64.181308] ipmi_ssif: IPMI SSIF Interface driver
> [ 64.184494] usbcore: registered new interface driver r8152
> [ 64.242492] vmap allocation for size 393216 failed: use vmalloc=<size> to increase size
> [ 64.242499] systemd-udevd: vmalloc error: size 327680, vm_struct allocation failed, mode:0xcc0(GFP_KERNEL), nodemask=(null),cpuset=/,mems_allowed=0-3
> [ 64.242510] CPU: 32 PID: 2910 Comm: systemd-udevd Tainted: G OE 6.2-generic-64k
> [ 64.242513] Hardware name: NVIDIA T241, BIOS v1.1.0 2023-03-18T21:32:31+00:00
> [ 64.242515] Call trace:
> [ 64.242516] dump_backtrace+0xe0/0x130
> [ 64.242523] show_stack+0x20/0x60
> [ 64.242525] dump_stack_lvl+0x68/0x84
> [ 64.242530] dump_stack+0x18/0x34
> [ 64.242532] warn_alloc+0x11c/0x1b0
> [ 64.242537] __vmalloc_node_range+0xe0/0x20c
> [ 64.242540] module_alloc+0x118/0x160
> [ 64.242543] move_module+0x2c/0x190
> [ 64.242546] layout_and_allocate+0xfc/0x160
> [ 64.242548] load_module+0x260/0xbc4
> [ 64.242549] __do_sys_finit_module+0xac/0x130
> [ 64.242551] __arm64_sys_finit_module+0x28/0x34
> [ 64.242552] invoke_syscall+0x78/0x100
> [ 64.242553] el0_svc_common.constprop.0+0x170/0x194
> [ 64.242555] do_el0_svc+0x38/0x4c
> [ 64.242556] el0_svc+0x2c/0xc0
> [ 64.242558] el0t_64_sync_handler+0xbc/0x13c
> [ 64.242560] el0t_64_sync+0x1a0/0x1a4
>
> Documentation/arm64/memory.rst | 8 ++++----
> arch/arm64/include/asm/memory.h | 2 +-
> arch/arm64/kernel/module.c | 2 +-
> 3 files changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst
> index 2a641ba7be3b..55a55f30eed8 100644
> --- a/Documentation/arm64/memory.rst
> +++ b/Documentation/arm64/memory.rst
> @@ -33,8 +33,8 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
> 0000000000000000 0000ffffffffffff 256TB user
> ffff000000000000 ffff7fffffffffff 128TB kernel logical memory map
> [ffff600000000000 ffff7fffffffffff] 32TB [kasan shadow region]
> - ffff800000000000 ffff800007ffffff 128MB modules
> - ffff800008000000 fffffbffefffffff 124TB vmalloc
> + ffff800000000000 ffff80007fffffff 2GB modules
> + ffff800080000000 fffffbffefffffff 124TB vmalloc
> fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down)
> fffffbfffe000000 fffffbfffe7fffff 8MB [guard region]
> fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space
> @@ -50,8 +50,8 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):
> 0000000000000000 000fffffffffffff 4PB user
> fff0000000000000 ffff7fffffffffff ~4PB kernel logical memory map
> [fffd800000000000 ffff7fffffffffff] 512TB [kasan shadow region]
> - ffff800000000000 ffff800007ffffff 128MB modules
> - ffff800008000000 fffffbffefffffff 124TB vmalloc
> + ffff800000000000 ffff80007fffffff 2GB modules
> + ffff800080000000 fffffbffefffffff 124TB vmalloc
> fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down)
> fffffbfffe000000 fffffbfffe7fffff 8MB [guard region]
> fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space
> diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
> index 78e5163836a0..b58c3127323e 100644
> --- a/arch/arm64/include/asm/memory.h
> +++ b/arch/arm64/include/asm/memory.h
> @@ -46,7 +46,7 @@
> #define KIMAGE_VADDR (MODULES_END)
> #define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
> #define MODULES_VADDR (_PAGE_END(VA_BITS_MIN))
> -#define MODULES_VSIZE (SZ_128M)
> +#define MODULES_VSIZE (SZ_2G)
> #define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
> #define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE)
> #define PCI_IO_END (VMEMMAP_START - SZ_8M)
> diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
> index 5af4975caeb5..b4affe775f23 100644
> --- a/arch/arm64/kernel/module.c
> +++ b/arch/arm64/kernel/module.c
> @@ -37,7 +37,7 @@ void *module_alloc(unsigned long size)
> /* don't exceed the static module region - see below */
> module_alloc_end = MODULES_END;
>
> - p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
> + p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_end - SZ_128M,
> module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK,
> NUMA_NO_NODE, __builtin_return_address(0));
>
Occasionally encountering the "overflow in relocation type 261" error suggests that the
expression 'module_alloc_end - SZ_128M' may be outside the 2GB range starting from the
beginning of _stext. Used the following code to resolve the issue And also enable
randomization of module base within 128MB if CONFIG_RANDOMIZE_MODULE_REGION_FULL is not
defined.
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -46,7 +46,7 @@
#define KIMAGE_VADDR (MODULES_END)
#define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
#define MODULES_VADDR (_PAGE_END(VA_BITS_MIN))
-#define MODULES_VSIZE (SZ_128M)
+#define MODULES_VSIZE (SZ_2G)
#define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
#define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE)
#define PCI_IO_END (VMEMMAP_START - SZ_8M)
diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
index e7477f21a4c9..4f92fc511b85 100644
@@ -70,18 +70,19 @@ static int __init kaslr_init(void)
* resolved normally.)
*/
module_range = SZ_2G - (u64)(_end - _stext);
- module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR);
+ module_alloc_base = max((u64)_etext - SZ_2G, (u64)MODULES_VADDR);
} else {
/*
* Randomize the module region by setting module_alloc_base to
- * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE,
+ * a PAGE_SIZE multiple in the range [_etext - SZ_128M,
* _stext) . This guarantees that the resulting region still
* covers [_stext, _etext], and that all relative branches can
* be resolved without veneers unless this region is exhausted
* and we fall back to a larger 2GB window in module_alloc()
* when ARM64_MODULE_PLTS is enabled.
*/
- module_range = MODULES_VSIZE - (u64)(_etext - _stext);
+ module_range = SZ_128M - (u64)(_etext - _stext);
+ module_alloc_base = (u64)_etext - SZ_128M;
}
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index 5af4975caeb5..b4affe775f23 100644
@@ -37,7 +37,7 @@ void *module_alloc(unsigned long size)
/* don't exceed the static module region - see below */
module_alloc_end = MODULES_END;
- p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
+ p = __vmalloc_node_range(size, MODULE_ALIGN, kaslr_enabled() ?
+ module_alloc_base : module_alloc_end - SZ_128M,
module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK,
NUMA_NO_NODE, __builtin_return_address(0));
More information about the linux-arm-kernel
mailing list