[PATCH v6 1/4] RISC-V: mm: Restrict address space for sv39,sv48,sv57

Alexandre Ghiti alexghiti at rivosinc.com
Thu Jul 20 01:13:54 PDT 2023


On Fri, Jul 14, 2023 at 6:55 PM Charlie Jenkins <charlie at rivosinc.com> wrote:
>
> Make sv48 the default address space for mmap as some applications
> currently depend on this assumption. A hint address passed to mmap will
> cause the largest address space that fits entirely into the hint to be
> used. If the hint is less than or equal to 1<<38, an sv39 address will
> be used. An exception is that if the hint address is 0, then a sv48
> address will be used. After an address space is completely full, the next
> smallest address space will be used.
>
> Signed-off-by: Charlie Jenkins <charlie at rivosinc.com>
> ---
>  arch/riscv/include/asm/elf.h       |  2 +-
>  arch/riscv/include/asm/pgtable.h   | 12 +++++++-
>  arch/riscv/include/asm/processor.h | 46 +++++++++++++++++++++++++-----
>  3 files changed, 51 insertions(+), 9 deletions(-)
>
> diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
> index c24280774caf..5d3368d5585c 100644
> --- a/arch/riscv/include/asm/elf.h
> +++ b/arch/riscv/include/asm/elf.h
> @@ -49,7 +49,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr);
>   * the loader.  We need to make sure that it is out of the way of the program
>   * that it will "exec", and that there is sufficient room for the brk.
>   */
> -#define ELF_ET_DYN_BASE                ((TASK_SIZE / 3) * 2)
> +#define ELF_ET_DYN_BASE                ((DEFAULT_MAP_WINDOW / 3) * 2)
>
>  #ifdef CONFIG_64BIT
>  #ifdef CONFIG_COMPAT
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index 75970ee2bda2..e13f5872bfe9 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -63,12 +63,22 @@
>   * position vmemmap directly below the VMALLOC region.
>   */
>  #ifdef CONFIG_64BIT
> +#define VA_BITS_SV39 39
> +#define VA_BITS_SV48 48
> +#define VA_BITS_SV57 57
> +
> +#define VA_USER_SV39 (UL(1) << (VA_BITS_SV39 - 1))
> +#define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1))
> +#define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1))
> +
>  #define VA_BITS                (pgtable_l5_enabled ? \
> -                               57 : (pgtable_l4_enabled ? 48 : 39))
> +                               VA_BITS_SV57 : (pgtable_l4_enabled ? VA_BITS_SV48 : VA_BITS_SV39))
>  #else
>  #define VA_BITS                32
>  #endif
>
> +#define MMAP_VA_BITS ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS)
> +
>  #define VMEMMAP_SHIFT \
>         (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
>  #define VMEMMAP_SIZE   BIT(VMEMMAP_SHIFT)
> diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
> index c950a8d9edef..14a5396eed3d 100644
> --- a/arch/riscv/include/asm/processor.h
> +++ b/arch/riscv/include/asm/processor.h
> @@ -13,20 +13,52 @@
>
>  #include <asm/ptrace.h>
>
> -/*
> - * This decides where the kernel will search for a free chunk of vm
> - * space during mmap's.
> - */
> -#define TASK_UNMAPPED_BASE     PAGE_ALIGN(TASK_SIZE / 3)
> -
> -#define STACK_TOP              TASK_SIZE
>  #ifdef CONFIG_64BIT
> +#define DEFAULT_MAP_WINDOW     (UL(1) << (MMAP_VA_BITS - 1))
>  #define STACK_TOP_MAX          TASK_SIZE_64
> +
> +#define arch_get_mmap_end(addr, len, flags)    \
> +({     \
> +       unsigned long mmap_end; \
> +       if ((addr) >= VA_USER_SV57)     \
> +               mmap_end = STACK_TOP_MAX;       \
> +       else if ((((addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48))       \
> +               mmap_end = VA_USER_SV48;        \
> +       else if ((addr) == 0)   \
> +               mmap_end = DEFAULT_MAP_WINDOW;  \
> +       else    \
> +               mmap_end = VA_USER_SV39;        \
> +       mmap_end;       \
> +})

What about the following instead:

#define arch_get_mmap_end(addr, len, flags)    \
({     \
       unsigned long mmap_end; \
       if ((addr) >= VA_USER_SV57) \
          mmap_end = STACK_TOP_MAX; \ // Maybe a comment here that
says it returns the max user address of the current mode, not obvious
at first sight.
       else \
          mmap_end = DEFAULT_MAP_WINDOW; \
       mmap_end; \
})

The only corner case is when sv57 is active, then only a hint greater
than VA_USER_SV57 can return a sv57 user address. Otherwise, we just
need to return the default mmap end right?

> +
> +#define arch_get_mmap_base(addr, base) \
> +({     \
> +       unsigned long mmap_base;        \
> +       if (((addr) >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57))      \
> +               mmap_base = (base) + (VA_USER_SV57 - DEFAULT_MAP_WINDOW);       \
> +       else if ((((addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48))       \
> +               mmap_base = (base) + (VA_USER_SV48 - DEFAULT_MAP_WINDOW);       \
> +       else if ((addr) == 0)   \
> +               mmap_base = (base);     \
> +       else    \
> +               mmap_base = (base) + (VA_USER_SV39 - DEFAULT_MAP_WINDOW);       \
> +       mmap_base;      \
> +})
> +

>From arch_pick_mmap_layout()
(https://elixir.bootlin.com/linux/latest/source/mm/util.c#L433), the
"base" argument is:

- either STACK_TOP in top-down (more or less some random offset)
- or TASK_UNMAPPED_BASE in bottom-up (more or less some random offset)

When bottom-up is the current mode, we should not change the base, so
adding (VA_USER_SV57 - DEFAULT_MAP_WINDOW) in the first case is not
right for me. When sv48 or sv57 are the active mode,
DEFAULT_MAP_WINDOW is equal to VA_USER_SV48 right? So (VA_USER_SV48 -
DEFAULT_MAP_WINDOW) is 0, so not useful. And for the last case, when
the user asks for a sv39 address whereas the active mode is sv48 or
sv57, then  (VA_USER_SV39 - DEFAULT_MAP_WINDOW) is negative and the
base is smaller which is not correct.

In the bottom-up case, we should preserve the base and I think that
again, only sv57 is the corner case to deal with.


>  #else
> +#define DEFAULT_MAP_WINDOW     TASK_SIZE
>  #define STACK_TOP_MAX          TASK_SIZE
>  #endif
>  #define STACK_ALIGN            16
>
> +#define STACK_TOP              DEFAULT_MAP_WINDOW
> +
> +/*
> + * This decides where the kernel will search for a free chunk of vm
> + * space during mmap's.
> + */
> +#define TASK_UNMAPPED_BASE     PAGE_ALIGN(DEFAULT_MAP_WINDOW / 3)
> +
>  #ifndef __ASSEMBLY__
>
>  struct task_struct;
> --
> 2.41.0
>



More information about the linux-riscv mailing list