[PATCH v2 1/2] ARM: module: split core and init PLT sections

Ard Biesheuvel ard.biesheuvel at linaro.org
Wed Feb 22 03:30:10 PST 2017


On 22 February 2017 at 11:29, Angus Clark <angus at angusclark.org> wrote:
> Hi Ard,
>
> Thanks.  I can confirm v2 works fine on my setup (with a minor change
> to backport to a 4.1 kernel).
>

Thanks Angus. I will add your Tested-by

-- 
Ard.


> On 21 February 2017 at 22:12, Ard Biesheuvel <ard.biesheuvel at linaro.org> wrote:
>> Since commit 35fa91eed817 ("ARM: kernel: merge core and init PLTs"),
>> the ARM module PLT code allocates all PLT entries in a single core
>> section, since the overhead of having a separate init PLT section is
>> not justified by the small number of PLT entries usually required for
>> init code.
>>
>> However, the core and init module regions are allocated independently,
>> and there is a corner case where the core region may be allocated from
>> the VMALLOC region if the dedicated module region is exhausted, but the
>> init region, being much smaller, can still be allocated from the module
>> region. This puts the PLT entries out of reach of the relocated branch
>> instructions, defeating the whole purpose of PLTs.
>>
>> So split the core and init PLT regions, and name the latter ".init.plt"
>> so it gets allocated along with (and sufficiently close to) the .init
>> sections that it serves. Also, given that init PLT entries may need to
>> be emitted for branches that target the core module, modify the logic
>> that disregards defined symbols to only disregard symbols that are
>> defined in the same section.
>>
>> Fixes: 35fa91eed817 ("ARM: kernel: merge core and init PLTs")
>> Reported-by: Angus Clark <angus at angusclark.org>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel at linaro.org>
>> ---
>>  arch/arm/include/asm/module.h |  9 +-
>>  arch/arm/kernel/module-plts.c | 87 ++++++++++++++------
>>  arch/arm/kernel/module.lds    |  1 +
>>  3 files changed, 68 insertions(+), 29 deletions(-)
>>
>> diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
>> index 464748b9fd7d..ed2319663a1e 100644
>> --- a/arch/arm/include/asm/module.h
>> +++ b/arch/arm/include/asm/module.h
>> @@ -18,13 +18,18 @@ enum {
>>  };
>>  #endif
>>
>> +struct mod_plt_sec {
>> +       struct elf32_shdr       *plt;
>> +       int                     plt_count;
>> +};
>> +
>>  struct mod_arch_specific {
>>  #ifdef CONFIG_ARM_UNWIND
>>         struct unwind_table *unwind[ARM_SEC_MAX];
>>  #endif
>>  #ifdef CONFIG_ARM_MODULE_PLTS
>> -       struct elf32_shdr   *plt;
>> -       int                 plt_count;
>> +       struct mod_plt_sec      core;
>> +       struct mod_plt_sec      init;
>>  #endif
>>  };
>>
>> diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
>> index 3a5cba90c971..3d0c2e4dda1d 100644
>> --- a/arch/arm/kernel/module-plts.c
>> +++ b/arch/arm/kernel/module-plts.c
>> @@ -1,5 +1,5 @@
>>  /*
>> - * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel at linaro.org>
>> + * Copyright (C) 2014-2017 Linaro Ltd. <ard.biesheuvel at linaro.org>
>>   *
>>   * This program is free software; you can redistribute it and/or modify
>>   * it under the terms of the GNU General Public License version 2 as
>> @@ -31,9 +31,17 @@ struct plt_entries {
>>         u32     lit[PLT_ENT_COUNT];
>>  };
>>
>> +static bool in_init(const struct module *mod, unsigned long loc)
>> +{
>> +       return loc - (u32)mod->init_layout.base < mod->init_layout.size;
>> +}
>> +
>>  u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
>>  {
>> -       struct plt_entries *plt = (struct plt_entries *)mod->arch.plt->sh_addr;
>> +       struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
>> +                                                         &mod->arch.init;
>> +
>> +       struct plt_entries *plt = (struct plt_entries *)pltsec->plt->sh_addr;
>>         int idx = 0;
>>
>>         /*
>> @@ -41,9 +49,9 @@ u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
>>          * relocations are sorted, this will be the last entry we allocated.
>>          * (if one exists).
>>          */
>> -       if (mod->arch.plt_count > 0) {
>> -               plt += (mod->arch.plt_count - 1) / PLT_ENT_COUNT;
>> -               idx = (mod->arch.plt_count - 1) % PLT_ENT_COUNT;
>> +       if (pltsec->plt_count > 0) {
>> +               plt += (pltsec->plt_count - 1) / PLT_ENT_COUNT;
>> +               idx = (pltsec->plt_count - 1) % PLT_ENT_COUNT;
>>
>>                 if (plt->lit[idx] == val)
>>                         return (u32)&plt->ldr[idx];
>> @@ -53,8 +61,8 @@ u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
>>                         plt++;
>>         }
>>
>> -       mod->arch.plt_count++;
>> -       BUG_ON(mod->arch.plt_count * PLT_ENT_SIZE > mod->arch.plt->sh_size);
>> +       pltsec->plt_count++;
>> +       BUG_ON(pltsec->plt_count * PLT_ENT_SIZE > pltsec->plt->sh_size);
>>
>>         if (!idx)
>>                 /* Populate a new set of entries */
>> @@ -129,7 +137,7 @@ static bool duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num)
>>
>>  /* Count how many PLT entries we may need */
>>  static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
>> -                              const Elf32_Rel *rel, int num)
>> +                              const Elf32_Rel *rel, int num, Elf32_Word dstidx)
>>  {
>>         unsigned int ret = 0;
>>         const Elf32_Sym *s;
>> @@ -144,13 +152,17 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
>>                 case R_ARM_THM_JUMP24:
>>                         /*
>>                          * We only have to consider branch targets that resolve
>> -                        * to undefined symbols. This is not simply a heuristic,
>> -                        * it is a fundamental limitation, since the PLT itself
>> -                        * is part of the module, and needs to be within range
>> -                        * as well, so modules can never grow beyond that limit.
>> +                        * to symbols that are defined in a different section.
>> +                        * This is not simply a heuristic, it is a fundamental
>> +                        * limitation, since there is no guaranteed way to emit
>> +                        * PLT entries sufficiently close to the branch if the
>> +                        * section size exceeds the range of a branch
>> +                        * instruction. So ignore relocations against defined
>> +                        * symbols if they live in the same section as the
>> +                        * relocation target.
>>                          */
>>                         s = syms + ELF32_R_SYM(rel[i].r_info);
>> -                       if (s->st_shndx != SHN_UNDEF)
>> +                       if (s->st_shndx == dstidx)
>>                                 break;
>>
>>                         /*
>> @@ -161,7 +173,12 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
>>                          * So we need to support them, but there is no need to
>>                          * take them into consideration when trying to optimize
>>                          * this code. So let's only check for duplicates when
>> -                        * the addend is zero.
>> +                        * the addend is zero. (Note that calls into the core
>> +                        * module via init PLT entries could involve section
>> +                        * relative symbol references with non-zero addends, for
>> +                        * which we may end up emitting duplicates, but the init
>> +                        * PLT is released along with the rest of the .init
>> +                        * region as soon as module loading completes.)
>>                          */
>>                         if (!is_zero_addend_relocation(base, rel + i) ||
>>                             !duplicate_rel(base, rel, i))
>> @@ -174,7 +191,8 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
>>  int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
>>                               char *secstrings, struct module *mod)
>>  {
>> -       unsigned long plts = 0;
>> +       unsigned long core_plts = 0;
>> +       unsigned long init_plts = 0;
>>         Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;
>>         Elf32_Sym *syms = NULL;
>>
>> @@ -184,13 +202,15 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
>>          */
>>         for (s = sechdrs; s < sechdrs_end; ++s) {
>>                 if (strcmp(".plt", secstrings + s->sh_name) == 0)
>> -                       mod->arch.plt = s;
>> +                       mod->arch.core.plt = s;
>> +               else if (strcmp(".init.plt", secstrings + s->sh_name) == 0)
>> +                       mod->arch.init.plt = s;
>>                 else if (s->sh_type == SHT_SYMTAB)
>>                         syms = (Elf32_Sym *)s->sh_addr;
>>         }
>>
>> -       if (!mod->arch.plt) {
>> -               pr_err("%s: module PLT section missing\n", mod->name);
>> +       if (!mod->arch.core.plt || !mod->arch.init.plt) {
>> +               pr_err("%s: module PLT section(s) missing\n", mod->name);
>>                 return -ENOEXEC;
>>         }
>>         if (!syms) {
>> @@ -213,16 +233,29 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
>>                 /* sort by type and symbol index */
>>                 sort(rels, numrels, sizeof(Elf32_Rel), cmp_rel, NULL);
>>
>> -               plts += count_plts(syms, dstsec->sh_addr, rels, numrels);
>> +               if (strncmp(secstrings + dstsec->sh_name, ".init", 5) != 0)
>> +                       core_plts += count_plts(syms, dstsec->sh_addr, rels,
>> +                                               numrels, s->sh_info);
>> +               else
>> +                       init_plts += count_plts(syms, dstsec->sh_addr, rels,
>> +                                               numrels, s->sh_info);
>>         }
>>
>> -       mod->arch.plt->sh_type = SHT_NOBITS;
>> -       mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
>> -       mod->arch.plt->sh_addralign = L1_CACHE_BYTES;
>> -       mod->arch.plt->sh_size = round_up(plts * PLT_ENT_SIZE,
>> -                                         sizeof(struct plt_entries));
>> -       mod->arch.plt_count = 0;
>> -
>> -       pr_debug("%s: plt=%x\n", __func__, mod->arch.plt->sh_size);
>> +       mod->arch.core.plt->sh_type = SHT_NOBITS;
>> +       mod->arch.core.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
>> +       mod->arch.core.plt->sh_addralign = L1_CACHE_BYTES;
>> +       mod->arch.core.plt->sh_size = round_up(core_plts * PLT_ENT_SIZE,
>> +                                              sizeof(struct plt_entries));
>> +       mod->arch.core.plt_count = 0;
>> +
>> +       mod->arch.init.plt->sh_type = SHT_NOBITS;
>> +       mod->arch.init.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
>> +       mod->arch.init.plt->sh_addralign = L1_CACHE_BYTES;
>> +       mod->arch.init.plt->sh_size = round_up(init_plts * PLT_ENT_SIZE,
>> +                                              sizeof(struct plt_entries));
>> +       mod->arch.init.plt_count = 0;
>> +
>> +       pr_debug("%s: plt=%x, init.plt=%x\n", __func__,
>> +                mod->arch.core.plt->sh_size, mod->arch.init.plt->sh_size);
>>         return 0;
>>  }
>> diff --git a/arch/arm/kernel/module.lds b/arch/arm/kernel/module.lds
>> index 05881e2b414c..eacb5c67f61e 100644
>> --- a/arch/arm/kernel/module.lds
>> +++ b/arch/arm/kernel/module.lds
>> @@ -1,3 +1,4 @@
>>  SECTIONS {
>>         .plt : { BYTE(0) }
>> +       .init.plt : { BYTE(0) }
>>  }
>> --
>> 2.7.4
>>



More information about the linux-arm-kernel mailing list