[PATCH] arm64: module: Widen module region to 2 GiB
Mark Rutland
mark.rutland at arm.com
Tue Apr 4 08:49:21 PDT 2023
On Tue, Apr 04, 2023 at 03:54:37PM +0200, Ard Biesheuvel wrote:
> Shanker reports that, after loading a 110+ MiB kernel module, no other
> modules can be loaded, in spite of the fact that module PLTs have been
> enabled in the build.
>
> This is due to the fact, even with module PLTs enabled, the module
> region is dimensioned to be a fixed 128 MiB region, as we simply never
> anticipated the need for supporting modules that huge.
>
> So let's increase the size of the statically allocated module region to
> 2 GiB, and update the module loading logic so that we prefer loading
> modules in the vicinity of the kernel text, where relative branches can
> be resolved without the need for PLTs. Only when we run out of space
> here (or when CONFIG_RANDOMIZE_MODULE_REGION_FULL is enabled), we will
> fall back to the larger window and allocate from there.
>
> While at it, let's try to simplify the logic a bit, to make it easier to
> follow:
> - remove the special cases for KASAN, which are no longer needed now
> that KASAN_VMALLOC is always enabled when KASAN is configured;
> - instead of defining a global module base address, define a global
> module limit, and work our way down from it.
I find this last change a bit confusing, and I think it'd be preferable to have
explicit start and end variables; more on that below.
>
> Cc: Shanker Donthineni <sdonthineni at nvidia.com>
> Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
> ---
> Documentation/arm64/memory.rst | 8 ++---
> arch/arm64/include/asm/memory.h | 2 +-
> arch/arm64/include/asm/module.h | 10 ++++--
> arch/arm64/kernel/kaslr.c | 38 +++++++++++------------
> arch/arm64/kernel/module.c | 54 ++++++++++++++++-----------------
> 5 files changed, 59 insertions(+), 53 deletions(-)
>
> diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst
> index 2a641ba7be3b717a..55a55f30eed8a6ce 100644
> --- a/Documentation/arm64/memory.rst
> +++ b/Documentation/arm64/memory.rst
> @@ -33,8 +33,8 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit)::
> 0000000000000000 0000ffffffffffff 256TB user
> ffff000000000000 ffff7fffffffffff 128TB kernel logical memory map
> [ffff600000000000 ffff7fffffffffff] 32TB [kasan shadow region]
> - ffff800000000000 ffff800007ffffff 128MB modules
> - ffff800008000000 fffffbffefffffff 124TB vmalloc
> + ffff800000000000 ffff80007fffffff 2GB modules
> + ffff800080000000 fffffbffefffffff 124TB vmalloc
> fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down)
> fffffbfffe000000 fffffbfffe7fffff 8MB [guard region]
> fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space
> @@ -50,8 +50,8 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support):
> 0000000000000000 000fffffffffffff 4PB user
> fff0000000000000 ffff7fffffffffff ~4PB kernel logical memory map
> [fffd800000000000 ffff7fffffffffff] 512TB [kasan shadow region]
> - ffff800000000000 ffff800007ffffff 128MB modules
> - ffff800008000000 fffffbffefffffff 124TB vmalloc
> + ffff800000000000 ffff80007fffffff 2GB modules
> + ffff800080000000 fffffbffefffffff 124TB vmalloc
> fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down)
> fffffbfffe000000 fffffbfffe7fffff 8MB [guard region]
> fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space
> diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
> index 78e5163836a0ab95..b58c3127323e16c8 100644
> --- a/arch/arm64/include/asm/memory.h
> +++ b/arch/arm64/include/asm/memory.h
> @@ -46,7 +46,7 @@
> #define KIMAGE_VADDR (MODULES_END)
> #define MODULES_END (MODULES_VADDR + MODULES_VSIZE)
> #define MODULES_VADDR (_PAGE_END(VA_BITS_MIN))
> -#define MODULES_VSIZE (SZ_128M)
> +#define MODULES_VSIZE (SZ_2G)
> #define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT)))
> #define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE)
> #define PCI_IO_END (VMEMMAP_START - SZ_8M)
> diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
> index 18734fed3bdd7609..98dae9f87b521f07 100644
> --- a/arch/arm64/include/asm/module.h
> +++ b/arch/arm64/include/asm/module.h
> @@ -31,9 +31,15 @@ u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
> void *loc, u64 val);
>
> #ifdef CONFIG_RANDOMIZE_BASE
> -extern u64 module_alloc_base;
> +extern u64 module_alloc_limit;
> #else
> -#define module_alloc_base ((u64)_etext - MODULES_VSIZE)
> +#define module_alloc_limit MODULE_REF_END
> +#endif
> +
> +#ifdef CONFIG_ARM64_MODULE_PLTS
> +#define MODULE_REF_END ((u64)_end)
> +#else
> +#define MODULE_REF_END ((u64)_etext)
> #endif
I was initially a bit confused by this. I think it's a bit misleading for the
!CONFIG_ARM64_MODULE_PLTS case, since modules can still have data references
between _etext and _end, it's just that we're (hopefully) unlikely to have the
text be <128M with >2G of subsequent data.
I'd find this clearer if we could express the two constaints separately. e.g.
have something like:
#define MODULE_DATA_REF_END ((u64)_end)
#define MODULE_TEXT_REF_END ((u64)_etext)
That'd allow us to do something like the below, which I think would be clearer.
u64 module_alloc_end;
u64 module_alloc_base_noplt;
u64 module_alloc_base_plt;
/*
* Call this somewhere after choosing hte KASLR limits
*/
void module_limits_init(void)
{
module_alloc_end = (u64)_stext;
/*
* 32-bit relative data references must always fall within 2G of the
* end of the kernel image.
*/
module_alloc_base_plt = max(MODULE_VADDR, MODULE_DATA_REF_END - SZ_2G);
/*
* Direct branches must be within 128M of the end of the kernel text.
*/
module_alloc_base_noplt = max(module_alloc_base_plt,
MODULE_TEXT_REF_END - SZ_128M);
}
void *module_alloc(unsigned long size)
{
gfp_t gfp_mask = GFP_KERNEL;
void *p = NULL;
if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
gfp_mask |= __GFP_NOWARN;
/*
* By default, prefer to place modules such that they don't require
* PLTs. With CONFIG_RANDOMIZE_MODULE_REGION_FULL=y we'll prefer to use
* the entire range possible with PLTs, to get as much entropy as possible.
*/
if (!IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
p = __vmalloc_node_range(size, MODULE_ALIGN,
module_base_noplt, module_end,
gfp_mask, PAGE_KERNEL,
VM_DEFER_KMEMLEAK, NUMA_NO_NODE,
__builtin_return_address(0));
}
if (!p && IS_ENABLED(CONFIG_MODULE_PLTS)) {
p = __vmalloc_node_range(size, MODULE_ALIGN,
module_base_plt, module_end,
gfp_mask, PAGE_KERNEL,
VM_DEFER_KMEMLEAK, NUMA_NO_NODE,
__builtin_return_address(0));
}
[... play with KASAN here ...]
}
Is that viable, or am I missing something?
Thanks,
Mark.
> struct plt_entry {
> diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c
> index e7477f21a4c9d062..14e96c3f707a74a3 100644
> --- a/arch/arm64/kernel/kaslr.c
> +++ b/arch/arm64/kernel/kaslr.c
> @@ -8,6 +8,7 @@
> #include <linux/init.h>
> #include <linux/libfdt.h>
> #include <linux/mm_types.h>
> +#include <linux/module.h>
> #include <linux/sched.h>
> #include <linux/types.h>
> #include <linux/pgtable.h>
> @@ -17,10 +18,11 @@
> #include <asm/kernel-pgtable.h>
> #include <asm/memory.h>
> #include <asm/mmu.h>
> +#include <asm/module.h>
> #include <asm/sections.h>
> #include <asm/setup.h>
>
> -u64 __ro_after_init module_alloc_base;
> +u64 __ro_after_init module_alloc_limit = MODULE_REF_END;
> u16 __initdata memstart_offset_seed;
>
> struct arm64_ftr_override kaslr_feature_override __initdata;
> @@ -30,12 +32,6 @@ static int __init kaslr_init(void)
> u64 module_range;
> u32 seed;
>
> - /*
> - * Set a reasonable default for module_alloc_base in case
> - * we end up running with module randomization disabled.
> - */
> - module_alloc_base = (u64)_etext - MODULES_VSIZE;
> -
> if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) {
> pr_info("KASLR disabled on command line\n");
> return 0;
> @@ -69,24 +65,28 @@ static int __init kaslr_init(void)
> * resolved via PLTs. (Branches between modules will be
> * resolved normally.)
> */
> - module_range = SZ_2G - (u64)(_end - _stext);
> - module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR);
> + module_range = SZ_2G;
> } else {
> /*
> - * Randomize the module region by setting module_alloc_base to
> - * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE,
> - * _stext) . This guarantees that the resulting region still
> - * covers [_stext, _etext], and that all relative branches can
> - * be resolved without veneers unless this region is exhausted
> - * and we fall back to a larger 2GB window in module_alloc()
> - * when ARM64_MODULE_PLTS is enabled.
> + * Randomize the module region over a 128 MB window covering
> + * the kernel text. This guarantees that the resulting region
> + * still covers [_stext, _etext], and that all relative
> + * branches can be resolved without veneers unless this region
> + * is exhausted and we fall back to a larger 2GB window in
> + * module_alloc() when ARM64_MODULE_PLTS is enabled.
> */
> - module_range = MODULES_VSIZE - (u64)(_etext - _stext);
> + module_range = SZ_128M;
> }
>
> + /*
> + * Subtract the size of the core kernel region that must be in range
> + * for all loaded modules.
> + */
> + module_range -= MODULE_REF_END - (u64)_stext;
> +
> /* use the lower 21 bits to randomize the base of the module region */
> - module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
> - module_alloc_base &= PAGE_MASK;
> + module_alloc_limit += (module_range * (seed & ((1 << 21) - 1))) >> 21;
> + module_alloc_limit &= PAGE_MASK;
>
> return 0;
> }
> diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
> index 5af4975caeb58ff7..aa61493957c010b2 100644
> --- a/arch/arm64/kernel/module.c
> +++ b/arch/arm64/kernel/module.c
> @@ -24,7 +24,6 @@
>
> void *module_alloc(unsigned long size)
> {
> - u64 module_alloc_end = module_alloc_base + MODULES_VSIZE;
> gfp_t gfp_mask = GFP_KERNEL;
> void *p;
>
> @@ -32,33 +31,34 @@ void *module_alloc(unsigned long size)
> if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
> gfp_mask |= __GFP_NOWARN;
>
> - if (IS_ENABLED(CONFIG_KASAN_GENERIC) ||
> - IS_ENABLED(CONFIG_KASAN_SW_TAGS))
> - /* don't exceed the static module region - see below */
> - module_alloc_end = MODULES_END;
> -
> - p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
> - module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK,
> - NUMA_NO_NODE, __builtin_return_address(0));
> + /*
> + * First, try to allocate from the 128 MB region just below the limit.
> + * If KASLR is disabled, or CONFIG_RANDOMIZE_MODULE_REGION_FULL is not
> + * set, this will produce an allocation that allows all relative
> + * branches into the kernel text to be resolved without the need for
> + * veneers (PLTs). If CONFIG_RANDOMIZE_MODULE_REGION_FULL is set, this
> + * 128 MB window might not cover the kernel text, but branches between
> + * modules will still be in relative branching range.
> + */
> + p = __vmalloc_node_range(size, MODULE_ALIGN,
> + module_alloc_limit - SZ_128M,
> + module_alloc_limit, gfp_mask, PAGE_KERNEL,
> + VM_DEFER_KMEMLEAK, NUMA_NO_NODE,
> + __builtin_return_address(0));
>
> - if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
> - (IS_ENABLED(CONFIG_KASAN_VMALLOC) ||
> - (!IS_ENABLED(CONFIG_KASAN_GENERIC) &&
> - !IS_ENABLED(CONFIG_KASAN_SW_TAGS))))
> - /*
> - * KASAN without KASAN_VMALLOC can only deal with module
> - * allocations being served from the reserved module region,
> - * since the remainder of the vmalloc region is already
> - * backed by zero shadow pages, and punching holes into it
> - * is non-trivial. Since the module region is not randomized
> - * when KASAN is enabled without KASAN_VMALLOC, it is even
> - * less likely that the module region gets exhausted, so we
> - * can simply omit this fallback in that case.
> - */
> - p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base,
> - module_alloc_base + SZ_2G, GFP_KERNEL,
> - PAGE_KERNEL, 0, NUMA_NO_NODE,
> - __builtin_return_address(0));
> + /*
> + * If the prior allocation failed, and we have configured support for
> + * fixing up out-of-range relative branches through the use of PLTs,
> + * fall back to a 2 GB window for module allocations. This is the
> + * maximum we can support, due to the use of 32-bit place relative
> + * symbol references, which cannot be fixed up using PLTs.
> + */
> + if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
> + p = __vmalloc_node_range(size, MODULE_ALIGN,
> + module_alloc_limit - SZ_2G,
> + module_alloc_limit, GFP_KERNEL,
> + PAGE_KERNEL, 0, NUMA_NO_NODE,
> + __builtin_return_address(0));
>
> if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
> vfree(p);
> --
> 2.39.2
>
More information about the linux-arm-kernel
mailing list