[PATCH v7 1/3] riscv: Introduce CONFIG_RELOCATABLE

Alexandre ghiti alex at ghiti.fr
Mon Dec 6 01:44:04 PST 2021


@Palmer, can I do anything for that to be pulled in 5.17?

Thanks,

Alex

On 10/27/21 07:04, Alexandre ghiti wrote:
> Hi Palmer,
>
> On 10/26/21 11:29 PM, Palmer Dabbelt wrote:
>> On Sat, 09 Oct 2021 10:20:20 PDT (-0700), alex at ghiti.fr wrote:
>>> Arf, I have sent this patchset with the wrong email address. @Palmer
>>> tell me if you want me to resend it correctly.
>> Sorry for being kind of slow here.  It's fine: there's a "From:" in
>> the patch, and git picks those up so it'll match the signed-off-by
>> line.  I send pretty much all my patches that way, as I never managed
>> to get my Google address working correctly.
>>
>>> Thanks,
>>>
>>> Alex
>>>
>>> On 10/9/21 7:12 PM, Alexandre Ghiti wrote:
>>>> From: Alexandre Ghiti <alex at ghiti.fr>
>>>>
>>>> This config allows to compile 64b kernel as PIE and to relocate it at
>>>> any virtual address at runtime: this paves the way to KASLR.
>>>> Runtime relocation is possible since relocation metadata are
>>>> embedded into
>>>> the kernel.
>> IMO this should really be user selectable, at a bare minimum so it's
>> testable.
>> I just sent along a patch to do that (my power's off at home, so email
>> is a bit
>> wacky right now).
>>
>> I haven't put this on for-next yet as I'm not sure if you had a fix
>> for the
>> kasan issue (which IIUC would conflict with this).
>
> The kasan issue only revealed that I need to move the kasan shadow
> memory around with sv48 support, that's not related to the relocatable
> kernel.
>
> Thanks,
>
> Alex
>
>
>>>> Note that relocating at runtime introduces an overhead even if the
>>>> kernel is loaded at the same address it was linked at and that the
>>>> compiler
>>>> options are those used in arm64 which uses the same RELA relocation
>>>> format.
>>>>
>>>> Signed-off-by: Alexandre Ghiti <alex at ghiti.fr>
>>>> ---
>>>>   arch/riscv/Kconfig              | 12 ++++++++
>>>>   arch/riscv/Makefile             |  7 +++--
>>>>   arch/riscv/kernel/vmlinux.lds.S |  6 ++++
>>>>   arch/riscv/mm/Makefile          |  4 +++
>>>>   arch/riscv/mm/init.c            | 54 ++++++++++++++++++++++++++++++++-
>>>>   5 files changed, 80 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>>> index ea16fa2dd768..043ba92559fa 100644
>>>> --- a/arch/riscv/Kconfig
>>>> +++ b/arch/riscv/Kconfig
>>>> @@ -213,6 +213,18 @@ config PGTABLE_LEVELS
>>>>   config LOCKDEP_SUPPORT
>>>>       def_bool y
>>>>
>>>> +config RELOCATABLE
>>>> +    bool
>>>> +    depends on MMU && 64BIT && !XIP_KERNEL
>>>> +    help
>>>> +          This builds a kernel as a Position Independent Executable
>>>> (PIE),
>>>> +          which retains all relocation metadata required to
>>>> relocate the
>>>> +          kernel binary at runtime to a different virtual address
>>>> than the
>>>> +          address it was linked at.
>>>> +          Since RISCV uses the RELA relocation format, this requires a
>>>> +          relocation pass at runtime even if the kernel is loaded
>>>> at the
>>>> +          same address it was linked at.
>>>> +
>>>>   source "arch/riscv/Kconfig.socs"
>>>>   source "arch/riscv/Kconfig.erratas"
>>>>
>>>> diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
>>>> index 0eb4568fbd29..2f509915f246 100644
>>>> --- a/arch/riscv/Makefile
>>>> +++ b/arch/riscv/Makefile
>>>> @@ -9,9 +9,12 @@
>>>>   #
>>>>
>>>>   OBJCOPYFLAGS    := -O binary
>>>> -LDFLAGS_vmlinux :=
>>>> +ifeq ($(CONFIG_RELOCATABLE),y)
>>>> +    LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro
>>>> +    KBUILD_CFLAGS += -fPIE
>>>> +endif
>>>>   ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
>>>> -    LDFLAGS_vmlinux := --no-relax
>>>> +    LDFLAGS_vmlinux += --no-relax
>>>>       KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
>>>>       CC_FLAGS_FTRACE := -fpatchable-function-entry=8
>>>>   endif
>>>> diff --git a/arch/riscv/kernel/vmlinux.lds.S
>>>> b/arch/riscv/kernel/vmlinux.lds.S
>>>> index 5104f3a871e3..862a8c09723c 100644
>>>> --- a/arch/riscv/kernel/vmlinux.lds.S
>>>> +++ b/arch/riscv/kernel/vmlinux.lds.S
>>>> @@ -133,6 +133,12 @@ SECTIONS
>>>>
>>>>       BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
>>>>
>>>> +    .rela.dyn : ALIGN(8) {
>>>> +        __rela_dyn_start = .;
>>>> +        *(.rela .rela*)
>>>> +        __rela_dyn_end = .;
>>>> +    }
>>>> +
>>>>   #ifdef CONFIG_EFI
>>>>       . = ALIGN(PECOFF_SECTION_ALIGNMENT);
>>>>       __pecoff_data_virt_size = ABSOLUTE(. - __pecoff_text_end);
>>>> diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
>>>> index 7ebaef10ea1b..2d33ec574bbb 100644
>>>> --- a/arch/riscv/mm/Makefile
>>>> +++ b/arch/riscv/mm/Makefile
>>>> @@ -1,6 +1,10 @@
>>>>   # SPDX-License-Identifier: GPL-2.0-only
>>>>
>>>>   CFLAGS_init.o := -mcmodel=medany
>>>> +ifdef CONFIG_RELOCATABLE
>>>> +CFLAGS_init.o += -fno-pie
>>>> +endif
>>>> +
>>>>   ifdef CONFIG_FTRACE
>>>>   CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE)
>>>>   CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE)
>>>> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
>>>> index c0cddf0fc22d..42041c12d496 100644
>>>> --- a/arch/riscv/mm/init.c
>>>> +++ b/arch/riscv/mm/init.c
>>>> @@ -20,6 +20,9 @@
>>>>   #include <linux/dma-map-ops.h>
>>>>   #include <linux/crash_dump.h>
>>>>   #include <linux/hugetlb.h>
>>>> +#ifdef CONFIG_RELOCATABLE
>>>> +#include <linux/elf.h>
>>>> +#endif
>>>>
>>>>   #include <asm/fixmap.h>
>>>>   #include <asm/tlbflush.h>
>>>> @@ -103,7 +106,7 @@ static void __init print_vm_layout(void)
>>>>       print_mlm("lowmem", (unsigned long)PAGE_OFFSET,
>>>>             (unsigned long)high_memory);
>>>>   #ifdef CONFIG_64BIT
>>>> -    print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR,
>>>> +    print_mlm("kernel", (unsigned long)kernel_map.virt_addr,
>>>>             (unsigned long)ADDRESS_SPACE_END);
>>>>   #endif
>>>>   }
>>>> @@ -518,6 +521,44 @@ static __init pgprot_t pgprot_from_va(uintptr_t
>>>> va)
>>>>   #error "setup_vm() is called from head.S before relocate so it
>>>> should not use absolute addressing."
>>>>   #endif
>>>>
>>>> +#ifdef CONFIG_RELOCATABLE
>>>> +extern unsigned long __rela_dyn_start, __rela_dyn_end;
>>>> +
>>>> +static void __init relocate_kernel(void)
>>>> +{
>>>> +    Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start;
>>>> +    /*
>>>> +     * This holds the offset between the linked virtual address and
>>>> the
>>>> +     * relocated virtual address.
>>>> +     */
>>>> +    uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR;
>>>> +    /*
>>>> +     * This holds the offset between kernel linked virtual address and
>>>> +     * physical address.
>>>> +     */
>>>> +    uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR -
>>>> kernel_map.phys_addr;
>>>> +
>>>> +    for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) {
>>>> +        Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset);
>>>> +        Elf64_Addr relocated_addr = rela->r_addend;
>>>> +
>>>> +        if (rela->r_info != R_RISCV_RELATIVE)
>>>> +            continue;
>>>> +
>>>> +        /*
>>>> +         * Make sure to not relocate vdso symbols like rt_sigreturn
>>>> +         * which are linked from the address 0 in vmlinux since
>>>> +         * vdso symbol addresses are actually used as an offset from
>>>> +         * mm->context.vdso in VDSO_OFFSET macro.
>>>> +         */
>>>> +        if (relocated_addr >= KERNEL_LINK_ADDR)
>>>> +            relocated_addr += reloc_offset;
>>>> +
>>>> +        *(Elf64_Addr *)addr = relocated_addr;
>>>> +    }
>>>> +}
>>>> +#endif /* CONFIG_RELOCATABLE */
>>>> +
>>>>   #ifdef CONFIG_XIP_KERNEL
>>>>   static void __init create_kernel_page_table(pgd_t *pgdir,
>>>>                           __always_unused bool early)
>>>> @@ -625,6 +666,17 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
>>>>       BUG_ON((kernel_map.virt_addr + kernel_map.size) >
>>>> ADDRESS_SPACE_END - SZ_4K);
>>>>   #endif
>>>>
>>>> +#ifdef CONFIG_RELOCATABLE
>>>> +    /*
>>>> +     * Early page table uses only one PGDIR, which makes it possible
>>>> +     * to map PGDIR_SIZE aligned on PGDIR_SIZE: if the relocation
>>>> offset
>>>> +     * makes the kernel cross over a PGDIR_SIZE boundary, raise a bug
>>>> +     * since a part of the kernel would not get mapped.
>>>> +     */
>>>> +    BUG_ON(PGDIR_SIZE - (kernel_map.virt_addr & (PGDIR_SIZE - 1)) <
>>>> kernel_map.size);
>>>> +    relocate_kernel();
>>>> +#endif
>>>> +
>>>>       pt_ops.alloc_pte = alloc_pte_early;
>>>>       pt_ops.get_pte_virt = get_pte_virt_early;
>>>>   #ifndef __PAGETABLE_PMD_FOLDED



More information about the linux-riscv mailing list