[PATCH v2 1/2] ARM: module: split core and init PLT sections
Ard Biesheuvel
ard.biesheuvel at linaro.org
Wed Feb 22 03:30:10 PST 2017
On 22 February 2017 at 11:29, Angus Clark <angus at angusclark.org> wrote:
> Hi Ard,
>
> Thanks. I can confirm v2 works fine on my setup (with a minor change
> to backport to a 4.1 kernel).
>
Thanks Angus. I will add your Tested-by
--
Ard.
> On 21 February 2017 at 22:12, Ard Biesheuvel <ard.biesheuvel at linaro.org> wrote:
>> Since commit 35fa91eed817 ("ARM: kernel: merge core and init PLTs"),
>> the ARM module PLT code allocates all PLT entries in a single core
>> section, since the overhead of having a separate init PLT section is
>> not justified by the small number of PLT entries usually required for
>> init code.
>>
>> However, the core and init module regions are allocated independently,
>> and there is a corner case where the core region may be allocated from
>> the VMALLOC region if the dedicated module region is exhausted, but the
>> init region, being much smaller, can still be allocated from the module
>> region. This puts the PLT entries out of reach of the relocated branch
>> instructions, defeating the whole purpose of PLTs.
>>
>> So split the core and init PLT regions, and name the latter ".init.plt"
>> so it gets allocated along with (and sufficiently close to) the .init
>> sections that it serves. Also, given that init PLT entries may need to
>> be emitted for branches that target the core module, modify the logic
>> that disregards defined symbols to only disregard symbols that are
>> defined in the same section.
>>
>> Fixes: 35fa91eed817 ("ARM: kernel: merge core and init PLTs")
>> Reported-by: Angus Clark <angus at angusclark.org>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel at linaro.org>
>> ---
>> arch/arm/include/asm/module.h | 9 +-
>> arch/arm/kernel/module-plts.c | 87 ++++++++++++++------
>> arch/arm/kernel/module.lds | 1 +
>> 3 files changed, 68 insertions(+), 29 deletions(-)
>>
>> diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
>> index 464748b9fd7d..ed2319663a1e 100644
>> --- a/arch/arm/include/asm/module.h
>> +++ b/arch/arm/include/asm/module.h
>> @@ -18,13 +18,18 @@ enum {
>> };
>> #endif
>>
>> +struct mod_plt_sec {
>> + struct elf32_shdr *plt;
>> + int plt_count;
>> +};
>> +
>> struct mod_arch_specific {
>> #ifdef CONFIG_ARM_UNWIND
>> struct unwind_table *unwind[ARM_SEC_MAX];
>> #endif
>> #ifdef CONFIG_ARM_MODULE_PLTS
>> - struct elf32_shdr *plt;
>> - int plt_count;
>> + struct mod_plt_sec core;
>> + struct mod_plt_sec init;
>> #endif
>> };
>>
>> diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
>> index 3a5cba90c971..3d0c2e4dda1d 100644
>> --- a/arch/arm/kernel/module-plts.c
>> +++ b/arch/arm/kernel/module-plts.c
>> @@ -1,5 +1,5 @@
>> /*
>> - * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel at linaro.org>
>> + * Copyright (C) 2014-2017 Linaro Ltd. <ard.biesheuvel at linaro.org>
>> *
>> * This program is free software; you can redistribute it and/or modify
>> * it under the terms of the GNU General Public License version 2 as
>> @@ -31,9 +31,17 @@ struct plt_entries {
>> u32 lit[PLT_ENT_COUNT];
>> };
>>
>> +static bool in_init(const struct module *mod, unsigned long loc)
>> +{
>> + return loc - (u32)mod->init_layout.base < mod->init_layout.size;
>> +}
>> +
>> u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
>> {
>> - struct plt_entries *plt = (struct plt_entries *)mod->arch.plt->sh_addr;
>> + struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
>> + &mod->arch.init;
>> +
>> + struct plt_entries *plt = (struct plt_entries *)pltsec->plt->sh_addr;
>> int idx = 0;
>>
>> /*
>> @@ -41,9 +49,9 @@ u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
>> * relocations are sorted, this will be the last entry we allocated.
>> * (if one exists).
>> */
>> - if (mod->arch.plt_count > 0) {
>> - plt += (mod->arch.plt_count - 1) / PLT_ENT_COUNT;
>> - idx = (mod->arch.plt_count - 1) % PLT_ENT_COUNT;
>> + if (pltsec->plt_count > 0) {
>> + plt += (pltsec->plt_count - 1) / PLT_ENT_COUNT;
>> + idx = (pltsec->plt_count - 1) % PLT_ENT_COUNT;
>>
>> if (plt->lit[idx] == val)
>> return (u32)&plt->ldr[idx];
>> @@ -53,8 +61,8 @@ u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
>> plt++;
>> }
>>
>> - mod->arch.plt_count++;
>> - BUG_ON(mod->arch.plt_count * PLT_ENT_SIZE > mod->arch.plt->sh_size);
>> + pltsec->plt_count++;
>> + BUG_ON(pltsec->plt_count * PLT_ENT_SIZE > pltsec->plt->sh_size);
>>
>> if (!idx)
>> /* Populate a new set of entries */
>> @@ -129,7 +137,7 @@ static bool duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num)
>>
>> /* Count how many PLT entries we may need */
>> static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
>> - const Elf32_Rel *rel, int num)
>> + const Elf32_Rel *rel, int num, Elf32_Word dstidx)
>> {
>> unsigned int ret = 0;
>> const Elf32_Sym *s;
>> @@ -144,13 +152,17 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
>> case R_ARM_THM_JUMP24:
>> /*
>> * We only have to consider branch targets that resolve
>> - * to undefined symbols. This is not simply a heuristic,
>> - * it is a fundamental limitation, since the PLT itself
>> - * is part of the module, and needs to be within range
>> - * as well, so modules can never grow beyond that limit.
>> + * to symbols that are defined in a different section.
>> + * This is not simply a heuristic, it is a fundamental
>> + * limitation, since there is no guaranteed way to emit
>> + * PLT entries sufficiently close to the branch if the
>> + * section size exceeds the range of a branch
>> + * instruction. So ignore relocations against defined
>> + * symbols if they live in the same section as the
>> + * relocation target.
>> */
>> s = syms + ELF32_R_SYM(rel[i].r_info);
>> - if (s->st_shndx != SHN_UNDEF)
>> + if (s->st_shndx == dstidx)
>> break;
>>
>> /*
>> @@ -161,7 +173,12 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
>> * So we need to support them, but there is no need to
>> * take them into consideration when trying to optimize
>> * this code. So let's only check for duplicates when
>> - * the addend is zero.
>> + * the addend is zero. (Note that calls into the core
>> + * module via init PLT entries could involve section
>> + * relative symbol references with non-zero addends, for
>> + * which we may end up emitting duplicates, but the init
>> + * PLT is released along with the rest of the .init
>> + * region as soon as module loading completes.)
>> */
>> if (!is_zero_addend_relocation(base, rel + i) ||
>> !duplicate_rel(base, rel, i))
>> @@ -174,7 +191,8 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
>> int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
>> char *secstrings, struct module *mod)
>> {
>> - unsigned long plts = 0;
>> + unsigned long core_plts = 0;
>> + unsigned long init_plts = 0;
>> Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;
>> Elf32_Sym *syms = NULL;
>>
>> @@ -184,13 +202,15 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
>> */
>> for (s = sechdrs; s < sechdrs_end; ++s) {
>> if (strcmp(".plt", secstrings + s->sh_name) == 0)
>> - mod->arch.plt = s;
>> + mod->arch.core.plt = s;
>> + else if (strcmp(".init.plt", secstrings + s->sh_name) == 0)
>> + mod->arch.init.plt = s;
>> else if (s->sh_type == SHT_SYMTAB)
>> syms = (Elf32_Sym *)s->sh_addr;
>> }
>>
>> - if (!mod->arch.plt) {
>> - pr_err("%s: module PLT section missing\n", mod->name);
>> + if (!mod->arch.core.plt || !mod->arch.init.plt) {
>> + pr_err("%s: module PLT section(s) missing\n", mod->name);
>> return -ENOEXEC;
>> }
>> if (!syms) {
>> @@ -213,16 +233,29 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
>> /* sort by type and symbol index */
>> sort(rels, numrels, sizeof(Elf32_Rel), cmp_rel, NULL);
>>
>> - plts += count_plts(syms, dstsec->sh_addr, rels, numrels);
>> + if (strncmp(secstrings + dstsec->sh_name, ".init", 5) != 0)
>> + core_plts += count_plts(syms, dstsec->sh_addr, rels,
>> + numrels, s->sh_info);
>> + else
>> + init_plts += count_plts(syms, dstsec->sh_addr, rels,
>> + numrels, s->sh_info);
>> }
>>
>> - mod->arch.plt->sh_type = SHT_NOBITS;
>> - mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
>> - mod->arch.plt->sh_addralign = L1_CACHE_BYTES;
>> - mod->arch.plt->sh_size = round_up(plts * PLT_ENT_SIZE,
>> - sizeof(struct plt_entries));
>> - mod->arch.plt_count = 0;
>> -
>> - pr_debug("%s: plt=%x\n", __func__, mod->arch.plt->sh_size);
>> + mod->arch.core.plt->sh_type = SHT_NOBITS;
>> + mod->arch.core.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
>> + mod->arch.core.plt->sh_addralign = L1_CACHE_BYTES;
>> + mod->arch.core.plt->sh_size = round_up(core_plts * PLT_ENT_SIZE,
>> + sizeof(struct plt_entries));
>> + mod->arch.core.plt_count = 0;
>> +
>> + mod->arch.init.plt->sh_type = SHT_NOBITS;
>> + mod->arch.init.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
>> + mod->arch.init.plt->sh_addralign = L1_CACHE_BYTES;
>> + mod->arch.init.plt->sh_size = round_up(init_plts * PLT_ENT_SIZE,
>> + sizeof(struct plt_entries));
>> + mod->arch.init.plt_count = 0;
>> +
>> + pr_debug("%s: plt=%x, init.plt=%x\n", __func__,
>> + mod->arch.core.plt->sh_size, mod->arch.init.plt->sh_size);
>> return 0;
>> }
>> diff --git a/arch/arm/kernel/module.lds b/arch/arm/kernel/module.lds
>> index 05881e2b414c..eacb5c67f61e 100644
>> --- a/arch/arm/kernel/module.lds
>> +++ b/arch/arm/kernel/module.lds
>> @@ -1,3 +1,4 @@
>> SECTIONS {
>> .plt : { BYTE(0) }
>> + .init.plt : { BYTE(0) }
>> }
>> --
>> 2.7.4
>>
More information about the linux-arm-kernel
mailing list