[PATCH v8 1/3] riscv: Introduce CONFIG_RELOCATABLE

Alexandre Ghiti alex at ghiti.fr
Wed Mar 22 06:38:34 PDT 2023


@linux-kbuild: Does anyone has an idea to solve this?

Thanks!

On 2/22/23 13:29, Alexandre Ghiti wrote:
> +cc linux-kbuild, llvm, Nathan, Nick
>
> On 2/15/23 15:36, Alexandre Ghiti wrote:
>> From: Alexandre Ghiti <alex at ghiti.fr>
>>
>> This config allows to compile 64b kernel as PIE and to relocate it at
>> any virtual address at runtime: this paves the way to KASLR.
>> Runtime relocation is possible since relocation metadata are embedded 
>> into
>> the kernel.
>>
>> Note that relocating at runtime introduces an overhead even if the
>> kernel is loaded at the same address it was linked at and that the 
>> compiler
>> options are those used in arm64 which uses the same RELA relocation
>> format.
>>
>> Signed-off-by: Alexandre Ghiti <alex at ghiti.fr>
>> ---
>>   arch/riscv/Kconfig              | 14 +++++++++
>>   arch/riscv/Makefile             |  7 +++--
>>   arch/riscv/kernel/efi-header.S  |  6 ++--
>>   arch/riscv/kernel/vmlinux.lds.S | 10 ++++--
>>   arch/riscv/mm/Makefile          |  4 +++
>>   arch/riscv/mm/init.c            | 54 ++++++++++++++++++++++++++++++++-
>>   6 files changed, 87 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>> index e2b656043abf..e0ee7ce4b2e3 100644
>> --- a/arch/riscv/Kconfig
>> +++ b/arch/riscv/Kconfig
>> @@ -544,6 +544,20 @@ config COMPAT
>>           If you want to execute 32-bit userspace applications, say Y.
>>   +config RELOCATABLE
>> +    bool "Build a relocatable kernel"
>> +    depends on MMU && 64BIT && !XIP_KERNEL
>> +    help
>> +          This builds a kernel as a Position Independent Executable 
>> (PIE),
>> +          which retains all relocation metadata required to relocate 
>> the
>> +          kernel binary at runtime to a different virtual address 
>> than the
>> +          address it was linked at.
>> +          Since RISCV uses the RELA relocation format, this requires a
>> +          relocation pass at runtime even if the kernel is loaded at 
>> the
>> +          same address it was linked at.
>> +
>> +          If unsure, say N.
>> +
>>   endmenu # "Kernel features"
>>     menu "Boot options"
>> diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
>> index 82153960ac00..97c34136b027 100644
>> --- a/arch/riscv/Makefile
>> +++ b/arch/riscv/Makefile
>> @@ -7,9 +7,12 @@
>>   #
>>     OBJCOPYFLAGS    := -O binary
>> -LDFLAGS_vmlinux :=
>> +ifeq ($(CONFIG_RELOCATABLE),y)
>> +    LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro
>> +    KBUILD_CFLAGS += -fPIE
>> +endif
>>   ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
>> -    LDFLAGS_vmlinux := --no-relax
>> +    LDFLAGS_vmlinux += --no-relax
>>       KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
>>       CC_FLAGS_FTRACE := -fpatchable-function-entry=8
>>   endif
>> diff --git a/arch/riscv/kernel/efi-header.S 
>> b/arch/riscv/kernel/efi-header.S
>> index 8e733aa48ba6..f7ee09c4f12d 100644
>> --- a/arch/riscv/kernel/efi-header.S
>> +++ b/arch/riscv/kernel/efi-header.S
>> @@ -33,7 +33,7 @@ optional_header:
>>       .byte    0x02                    // MajorLinkerVersion
>>       .byte    0x14                    // MinorLinkerVersion
>>       .long    __pecoff_text_end - efi_header_end    // SizeOfCode
>> -    .long    __pecoff_data_virt_size            // 
>> SizeOfInitializedData
>> +    .long    __pecoff_data_virt_end - __pecoff_text_end    // 
>> SizeOfInitializedData
>>       .long    0                    // SizeOfUninitializedData
>>       .long    __efistub_efi_pe_entry - _start        // 
>> AddressOfEntryPoint
>>       .long    efi_header_end - _start            // BaseOfCode
>> @@ -91,9 +91,9 @@ section_table:
>>           IMAGE_SCN_MEM_EXECUTE            // Characteristics
>>         .ascii    ".data\0\0\0"
>> -    .long    __pecoff_data_virt_size            // VirtualSize
>> +    .long    __pecoff_data_virt_end - __pecoff_text_end    // 
>> VirtualSize
>>       .long    __pecoff_text_end - _start        // VirtualAddress
>> -    .long    __pecoff_data_raw_size            // SizeOfRawData
>> +    .long    __pecoff_data_raw_end - __pecoff_text_end    // 
>> SizeOfRawData
>>       .long    __pecoff_text_end - _start        // PointerToRawData
>>         .long    0                    // PointerToRelocations
>> diff --git a/arch/riscv/kernel/vmlinux.lds.S 
>> b/arch/riscv/kernel/vmlinux.lds.S
>> index 4e6c88aa4d87..8be2de3be08c 100644
>> --- a/arch/riscv/kernel/vmlinux.lds.S
>> +++ b/arch/riscv/kernel/vmlinux.lds.S
>> @@ -122,9 +122,15 @@ SECTIONS
>>           *(.sdata*)
>>       }
>>   +    .rela.dyn : ALIGN(8) {
>> +        __rela_dyn_start = .;
>> +        *(.rela .rela*)
>> +        __rela_dyn_end = .;
>> +    }
>> +
>
>
> So I realized those relocations would be better in the init section so 
> we can get rid of them at some point. So I tried the following:
>
> diff --git a/arch/riscv/kernel/vmlinux.lds.S 
> b/arch/riscv/kernel/vmlinux.lds.S
> index 7ac215467fd5..6111023a89ef 100644
> --- a/arch/riscv/kernel/vmlinux.lds.S
> +++ b/arch/riscv/kernel/vmlinux.lds.S
> @@ -93,6 +93,12 @@ SECTIONS
>                 *(.rel.dyn*)
>         }
>
> +       .rela.dyn : ALIGN(8) {
> +               __rela_dyn_start = .;
> +               *(.rela .rela*)
> +               __rela_dyn_end = .;
> +       }
> +
>         __init_data_end = .;
>
>         . = ALIGN(8);
> @@ -119,12 +125,6 @@ SECTIONS
>                 *(.sdata*)
>         }
>
> -       .rela.dyn : ALIGN(8) {
> -               __rela_dyn_start = .;
> -               *(.rela .rela*)
> -               __rela_dyn_end = .;
> -       }
> -
>  #ifdef CONFIG_EFI
>         .pecoff_edata_padding : { BYTE(0); . = 
> ALIGN(PECOFF_FILE_ALIGNMENT); }
>         __pecoff_data_raw_end = ABSOLUTE(.);
>
>
> But then all the relocations in vmlinux end up being null:
>
> vmlinux:     file format elf64-littleriscv
>
> $ riscv64-linux-gnu-objdump -R vmlinux
>
> DYNAMIC RELOCATION RECORDS
> OFFSET           TYPE              VALUE
> 0000000000000000 R_RISCV_NONE      *ABS*
> 0000000000000000 R_RISCV_NONE      *ABS*
> ....
>
>  I also noticed that re-linking vmlinux with the same command right 
> after works (ie, the relocations are now valid):
>
> $ riscv64-linux-gnu-objdump -R vmlinux
>
> vmlinux:     file format elf64-littleriscv
>
> DYNAMIC RELOCATION RECORDS
> OFFSET           TYPE              VALUE
> ffffffff82600718 R_RISCV_RELATIVE  *ABS*-0x000000007d9ff8e8
> ffffffff82600720 R_RISCV_RELATIVE  *ABS*-0x000000007d9ff8e8
> ...
>
> Below is the command used to generate this working vmlinux:
>
> riscv64-unknown-linux-gnu-ld -melf64lriscv -z noexecstack 
> --no-warn-rwx-segments -shared -Bsymbolic -z notext -z norelro 
> --no-relax --build-id=sha1 --script=./arch/riscv/kernel/vmlinux.lds 
> -Map=vmlinux.map -o vmlinux --whole-archive vmlinux.a 
> .vmlinux.export.o init/version-timestamp.o --no-whole-archive 
> --start-group ./drivers/firmware/efi/libstub/lib.a --end-group 
> .tmp_vmlinux.kallsyms3.o
>
> I tried a lot of things, but I struggle to understand, does anyone 
> have any idea? FYI, the same problem happens with LLVM.
>
> Thanks,
>
> Alex
>
>
>>   #ifdef CONFIG_EFI
>>       .pecoff_edata_padding : { BYTE(0); . = 
>> ALIGN(PECOFF_FILE_ALIGNMENT); }
>> -    __pecoff_data_raw_size = ABSOLUTE(. - __pecoff_text_end);
>> +    __pecoff_data_raw_end = ABSOLUTE(.);
>>   #endif
>>         /* End of data section */
>> @@ -134,7 +140,7 @@ SECTIONS
>>     #ifdef CONFIG_EFI
>>       . = ALIGN(PECOFF_SECTION_ALIGNMENT);
>> -    __pecoff_data_virt_size = ABSOLUTE(. - __pecoff_text_end);
>> +    __pecoff_data_virt_end = ABSOLUTE(.);
>>   #endif
>>       _end = .;
>>   diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
>> index 2ac177c05352..b85e9e82f082 100644
>> --- a/arch/riscv/mm/Makefile
>> +++ b/arch/riscv/mm/Makefile
>> @@ -1,6 +1,10 @@
>>   # SPDX-License-Identifier: GPL-2.0-only
>>     CFLAGS_init.o := -mcmodel=medany
>> +ifdef CONFIG_RELOCATABLE
>> +CFLAGS_init.o += -fno-pie
>> +endif
>> +
>>   ifdef CONFIG_FTRACE
>>   CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE)
>>   CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE)
>> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
>> index 7f01c2e56efe..3862696c2ac9 100644
>> --- a/arch/riscv/mm/init.c
>> +++ b/arch/riscv/mm/init.c
>> @@ -20,6 +20,9 @@
>>   #include <linux/dma-map-ops.h>
>>   #include <linux/crash_dump.h>
>>   #include <linux/hugetlb.h>
>> +#ifdef CONFIG_RELOCATABLE
>> +#include <linux/elf.h>
>> +#endif
>>     #include <asm/fixmap.h>
>>   #include <asm/tlbflush.h>
>> @@ -146,7 +149,7 @@ static void __init print_vm_layout(void)
>>           print_ml("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END);
>>   #endif
>>   -        print_ml("kernel", (unsigned long)KERNEL_LINK_ADDR,
>> +        print_ml("kernel", (unsigned long)kernel_map.virt_addr,
>>                (unsigned long)ADDRESS_SPACE_END);
>>       }
>>   }
>> @@ -854,6 +857,44 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
>>   #error "setup_vm() is called from head.S before relocate so it 
>> should not use absolute addressing."
>>   #endif
>>   +#ifdef CONFIG_RELOCATABLE
>> +extern unsigned long __rela_dyn_start, __rela_dyn_end;
>> +
>> +static void __init relocate_kernel(void)
>> +{
>> +    Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start;
>> +    /*
>> +     * This holds the offset between the linked virtual address and the
>> +     * relocated virtual address.
>> +     */
>> +    uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR;
>> +    /*
>> +     * This holds the offset between kernel linked virtual address and
>> +     * physical address.
>> +     */
>> +    uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - 
>> kernel_map.phys_addr;
>> +
>> +    for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) {
>> +        Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset);
>> +        Elf64_Addr relocated_addr = rela->r_addend;
>> +
>> +        if (rela->r_info != R_RISCV_RELATIVE)
>> +            continue;
>> +
>> +        /*
>> +         * Make sure to not relocate vdso symbols like rt_sigreturn
>> +         * which are linked from the address 0 in vmlinux since
>> +         * vdso symbol addresses are actually used as an offset from
>> +         * mm->context.vdso in VDSO_OFFSET macro.
>> +         */
>> +        if (relocated_addr >= KERNEL_LINK_ADDR)
>> +            relocated_addr += reloc_offset;
>> +
>> +        *(Elf64_Addr *)addr = relocated_addr;
>> +    }
>> +}
>> +#endif /* CONFIG_RELOCATABLE */
>> +
>>   #ifdef CONFIG_XIP_KERNEL
>>   static void __init create_kernel_page_table(pgd_t *pgdir,
>>                           __always_unused bool early)
>> @@ -1039,6 +1080,17 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
>>       BUG_ON((kernel_map.virt_addr + kernel_map.size) > 
>> ADDRESS_SPACE_END - SZ_4K);
>>   #endif
>>   +#ifdef CONFIG_RELOCATABLE
>> +    /*
>> +     * Early page table uses only one PUD, which makes it possible
>> +     * to map PUD_SIZE aligned on PUD_SIZE: if the relocation offset
>> +     * makes the kernel cross over a PUD_SIZE boundary, raise a bug
>> +     * since a part of the kernel would not get mapped.
>> +     */
>> +    BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < 
>> kernel_map.size);
>> +    relocate_kernel();
>> +#endif
>> +
>>       apply_early_boot_alternatives();
>>       pt_ops_set_early();
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv



More information about the linux-riscv mailing list