[PATCH v2 1/2] ARM: module: split core and init PLT sections

Angus Clark angus at angusclark.org
Wed Feb 22 03:29:16 PST 2017


Hi Ard,

Thanks.  I can confirm v2 works fine on my setup (with a minor change
to backport to a 4.1 kernel).

Cheers,

Angus

On 21 February 2017 at 22:12, Ard Biesheuvel <ard.biesheuvel at linaro.org> wrote:
> Since commit 35fa91eed817 ("ARM: kernel: merge core and init PLTs"),
> the ARM module PLT code allocates all PLT entries in a single core
> section, since the overhead of having a separate init PLT section is
> not justified by the small number of PLT entries usually required for
> init code.
>
> However, the core and init module regions are allocated independently,
> and there is a corner case where the core region may be allocated from
> the VMALLOC region if the dedicated module region is exhausted, but the
> init region, being much smaller, can still be allocated from the module
> region. This puts the PLT entries out of reach of the relocated branch
> instructions, defeating the whole purpose of PLTs.
>
> So split the core and init PLT regions, and name the latter ".init.plt"
> so it gets allocated along with (and sufficiently close to) the .init
> sections that it serves. Also, given that init PLT entries may need to
> be emitted for branches that target the core module, modify the logic
> that disregards defined symbols to only disregard symbols that are
> defined in the same section.
>
> Fixes: 35fa91eed817 ("ARM: kernel: merge core and init PLTs")
> Reported-by: Angus Clark <angus at angusclark.org>
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel at linaro.org>
> ---
>  arch/arm/include/asm/module.h |  9 +-
>  arch/arm/kernel/module-plts.c | 87 ++++++++++++++------
>  arch/arm/kernel/module.lds    |  1 +
>  3 files changed, 68 insertions(+), 29 deletions(-)
>
> diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
> index 464748b9fd7d..ed2319663a1e 100644
> --- a/arch/arm/include/asm/module.h
> +++ b/arch/arm/include/asm/module.h
> @@ -18,13 +18,18 @@ enum {
>  };
>  #endif
>
> +struct mod_plt_sec {
> +       struct elf32_shdr       *plt;
> +       int                     plt_count;
> +};
> +
>  struct mod_arch_specific {
>  #ifdef CONFIG_ARM_UNWIND
>         struct unwind_table *unwind[ARM_SEC_MAX];
>  #endif
>  #ifdef CONFIG_ARM_MODULE_PLTS
> -       struct elf32_shdr   *plt;
> -       int                 plt_count;
> +       struct mod_plt_sec      core;
> +       struct mod_plt_sec      init;
>  #endif
>  };
>
> diff --git a/arch/arm/kernel/module-plts.c b/arch/arm/kernel/module-plts.c
> index 3a5cba90c971..3d0c2e4dda1d 100644
> --- a/arch/arm/kernel/module-plts.c
> +++ b/arch/arm/kernel/module-plts.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel at linaro.org>
> + * Copyright (C) 2014-2017 Linaro Ltd. <ard.biesheuvel at linaro.org>
>   *
>   * This program is free software; you can redistribute it and/or modify
>   * it under the terms of the GNU General Public License version 2 as
> @@ -31,9 +31,17 @@ struct plt_entries {
>         u32     lit[PLT_ENT_COUNT];
>  };
>
> +static bool in_init(const struct module *mod, unsigned long loc)
> +{
> +       return loc - (u32)mod->init_layout.base < mod->init_layout.size;
> +}
> +
>  u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
>  {
> -       struct plt_entries *plt = (struct plt_entries *)mod->arch.plt->sh_addr;
> +       struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
> +                                                         &mod->arch.init;
> +
> +       struct plt_entries *plt = (struct plt_entries *)pltsec->plt->sh_addr;
>         int idx = 0;
>
>         /*
> @@ -41,9 +49,9 @@ u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
>          * relocations are sorted, this will be the last entry we allocated.
>          * (if one exists).
>          */
> -       if (mod->arch.plt_count > 0) {
> -               plt += (mod->arch.plt_count - 1) / PLT_ENT_COUNT;
> -               idx = (mod->arch.plt_count - 1) % PLT_ENT_COUNT;
> +       if (pltsec->plt_count > 0) {
> +               plt += (pltsec->plt_count - 1) / PLT_ENT_COUNT;
> +               idx = (pltsec->plt_count - 1) % PLT_ENT_COUNT;
>
>                 if (plt->lit[idx] == val)
>                         return (u32)&plt->ldr[idx];
> @@ -53,8 +61,8 @@ u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
>                         plt++;
>         }
>
> -       mod->arch.plt_count++;
> -       BUG_ON(mod->arch.plt_count * PLT_ENT_SIZE > mod->arch.plt->sh_size);
> +       pltsec->plt_count++;
> +       BUG_ON(pltsec->plt_count * PLT_ENT_SIZE > pltsec->plt->sh_size);
>
>         if (!idx)
>                 /* Populate a new set of entries */
> @@ -129,7 +137,7 @@ static bool duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num)
>
>  /* Count how many PLT entries we may need */
>  static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
> -                              const Elf32_Rel *rel, int num)
> +                              const Elf32_Rel *rel, int num, Elf32_Word dstidx)
>  {
>         unsigned int ret = 0;
>         const Elf32_Sym *s;
> @@ -144,13 +152,17 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
>                 case R_ARM_THM_JUMP24:
>                         /*
>                          * We only have to consider branch targets that resolve
> -                        * to undefined symbols. This is not simply a heuristic,
> -                        * it is a fundamental limitation, since the PLT itself
> -                        * is part of the module, and needs to be within range
> -                        * as well, so modules can never grow beyond that limit.
> +                        * to symbols that are defined in a different section.
> +                        * This is not simply a heuristic, it is a fundamental
> +                        * limitation, since there is no guaranteed way to emit
> +                        * PLT entries sufficiently close to the branch if the
> +                        * section size exceeds the range of a branch
> +                        * instruction. So ignore relocations against defined
> +                        * symbols if they live in the same section as the
> +                        * relocation target.
>                          */
>                         s = syms + ELF32_R_SYM(rel[i].r_info);
> -                       if (s->st_shndx != SHN_UNDEF)
> +                       if (s->st_shndx == dstidx)
>                                 break;
>
>                         /*
> @@ -161,7 +173,12 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
>                          * So we need to support them, but there is no need to
>                          * take them into consideration when trying to optimize
>                          * this code. So let's only check for duplicates when
> -                        * the addend is zero.
> +                        * the addend is zero. (Note that calls into the core
> +                        * module via init PLT entries could involve section
> +                        * relative symbol references with non-zero addends, for
> +                        * which we may end up emitting duplicates, but the init
> +                        * PLT is released along with the rest of the .init
> +                        * region as soon as module loading completes.)
>                          */
>                         if (!is_zero_addend_relocation(base, rel + i) ||
>                             !duplicate_rel(base, rel, i))
> @@ -174,7 +191,8 @@ static unsigned int count_plts(const Elf32_Sym *syms, Elf32_Addr base,
>  int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
>                               char *secstrings, struct module *mod)
>  {
> -       unsigned long plts = 0;
> +       unsigned long core_plts = 0;
> +       unsigned long init_plts = 0;
>         Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;
>         Elf32_Sym *syms = NULL;
>
> @@ -184,13 +202,15 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
>          */
>         for (s = sechdrs; s < sechdrs_end; ++s) {
>                 if (strcmp(".plt", secstrings + s->sh_name) == 0)
> -                       mod->arch.plt = s;
> +                       mod->arch.core.plt = s;
> +               else if (strcmp(".init.plt", secstrings + s->sh_name) == 0)
> +                       mod->arch.init.plt = s;
>                 else if (s->sh_type == SHT_SYMTAB)
>                         syms = (Elf32_Sym *)s->sh_addr;
>         }
>
> -       if (!mod->arch.plt) {
> -               pr_err("%s: module PLT section missing\n", mod->name);
> +       if (!mod->arch.core.plt || !mod->arch.init.plt) {
> +               pr_err("%s: module PLT section(s) missing\n", mod->name);
>                 return -ENOEXEC;
>         }
>         if (!syms) {
> @@ -213,16 +233,29 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
>                 /* sort by type and symbol index */
>                 sort(rels, numrels, sizeof(Elf32_Rel), cmp_rel, NULL);
>
> -               plts += count_plts(syms, dstsec->sh_addr, rels, numrels);
> +               if (strncmp(secstrings + dstsec->sh_name, ".init", 5) != 0)
> +                       core_plts += count_plts(syms, dstsec->sh_addr, rels,
> +                                               numrels, s->sh_info);
> +               else
> +                       init_plts += count_plts(syms, dstsec->sh_addr, rels,
> +                                               numrels, s->sh_info);
>         }
>
> -       mod->arch.plt->sh_type = SHT_NOBITS;
> -       mod->arch.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
> -       mod->arch.plt->sh_addralign = L1_CACHE_BYTES;
> -       mod->arch.plt->sh_size = round_up(plts * PLT_ENT_SIZE,
> -                                         sizeof(struct plt_entries));
> -       mod->arch.plt_count = 0;
> -
> -       pr_debug("%s: plt=%x\n", __func__, mod->arch.plt->sh_size);
> +       mod->arch.core.plt->sh_type = SHT_NOBITS;
> +       mod->arch.core.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
> +       mod->arch.core.plt->sh_addralign = L1_CACHE_BYTES;
> +       mod->arch.core.plt->sh_size = round_up(core_plts * PLT_ENT_SIZE,
> +                                              sizeof(struct plt_entries));
> +       mod->arch.core.plt_count = 0;
> +
> +       mod->arch.init.plt->sh_type = SHT_NOBITS;
> +       mod->arch.init.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
> +       mod->arch.init.plt->sh_addralign = L1_CACHE_BYTES;
> +       mod->arch.init.plt->sh_size = round_up(init_plts * PLT_ENT_SIZE,
> +                                              sizeof(struct plt_entries));
> +       mod->arch.init.plt_count = 0;
> +
> +       pr_debug("%s: plt=%x, init.plt=%x\n", __func__,
> +                mod->arch.core.plt->sh_size, mod->arch.init.plt->sh_size);
>         return 0;
>  }
> diff --git a/arch/arm/kernel/module.lds b/arch/arm/kernel/module.lds
> index 05881e2b414c..eacb5c67f61e 100644
> --- a/arch/arm/kernel/module.lds
> +++ b/arch/arm/kernel/module.lds
> @@ -1,3 +1,4 @@
>  SECTIONS {
>         .plt : { BYTE(0) }
> +       .init.plt : { BYTE(0) }
>  }
> --
> 2.7.4
>



More information about the linux-arm-kernel mailing list