[PATCH v1 1/2] riscv, mm: Add Sv57 support based on Sv48 implementation

Alexandre ghiti alex at ghiti.fr
Mon Dec 6 02:53:24 PST 2021


Hi Qinglin,

On 11/30/21 04:11, 潘庆霖 wrote:
>
> Hi Alex,
>
>
> On 2021/11/29 19:20, Alexandre ghiti wrote:
> > Hi Qinglin,
> >
> > On 11/24/21 12:20, panqinglin2020 at iscas.ac.cn wrote:
> >> From: Qinglin Pan <panqinglin2020 at iscas.ac.cn>
> >>
> >> This patch adds Sv57 implementation on the top of Alex's Sv48 
> patchset.
> >> The mmu configuration will be determined on runtime, according to both
> >> mmu HW support and mmu-type field in the dtb. The kernel will try to
> >> set satp mode one by one from the configuration item to Sv39 in 64bit.
> >>
> >> Signed-off-by: Qinglin Pan <panqinglin2020 at iscas.ac.cn>
> >> ---
> >>   arch/riscv/Kconfig                  |   4 +-
> >>   arch/riscv/include/asm/csr.h        |   1 +
> >>   arch/riscv/include/asm/fixmap.h     |   1 +
> >>   arch/riscv/include/asm/page.h       |   1 +
> >>   arch/riscv/include/asm/pgalloc.h    |  49 ++++++++
> >>   arch/riscv/include/asm/pgtable-64.h | 103 ++++++++++++++++-
> >>   arch/riscv/include/asm/pgtable.h    |   4 +-
> >>   arch/riscv/kernel/cpu.c             |   4 +-
> >>   arch/riscv/mm/init.c                | 169 
> +++++++++++++++++++++++++---
> >>   9 files changed, 312 insertions(+), 24 deletions(-)
> >>
> >> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> >> index a4cadcd4e785..aac28e96d0e5 100644
> >> --- a/arch/riscv/Kconfig
> >> +++ b/arch/riscv/Kconfig
> >> @@ -151,7 +151,7 @@ config PAGE_OFFSET
> >>       hex
> >>       default 0xC0000000 if 32BIT
> >>       default 0x80000000 if 64BIT && !MMU
> >> -    default 0xffffc00000000000 if 64BIT
> >> +    default 0xff80000000000000 if 64BIT
> >>     config ARCH_FLATMEM_ENABLE
> >>       def_bool !NUMA
> >> @@ -196,7 +196,7 @@ config FIX_EARLYCON_MEM
> >>     config PGTABLE_LEVELS
> >>       int
> >> -    default 4 if 64BIT
> >> +    default 5 if 64BIT
> >>       default 2
> >>     config LOCKDEP_SUPPORT
> >> diff --git a/arch/riscv/include/asm/csr.h 
> b/arch/riscv/include/asm/csr.h
> >> index ae711692eec9..299abdef0cd6 100644
> >> --- a/arch/riscv/include/asm/csr.h
> >> +++ b/arch/riscv/include/asm/csr.h
> >> @@ -47,6 +47,7 @@
> >>   #define SATP_PPN    _AC(0x00000FFFFFFFFFFF, UL)
> >>   #define SATP_MODE_39    _AC(0x8000000000000000, UL)
> >>   #define SATP_MODE_48    _AC(0x9000000000000000, UL)
> >> +#define SATP_MODE_57    _AC(0xa000000000000000, UL)
> >>   #define SATP_ASID_BITS    16
> >>   #define SATP_ASID_SHIFT    44
> >>   #define SATP_ASID_MASK    _AC(0xFFFF, UL)
> >> diff --git a/arch/riscv/include/asm/fixmap.h 
> b/arch/riscv/include/asm/fixmap.h
> >> index 58a718573ad6..3cfece8b6568 100644
> >> --- a/arch/riscv/include/asm/fixmap.h
> >> +++ b/arch/riscv/include/asm/fixmap.h
> >> @@ -25,6 +25,7 @@ enum fixed_addresses {
> >>       FIX_PTE,
> >>       FIX_PMD,
> >>       FIX_PUD,
> >> +    FIX_P4D,
> >>       FIX_TEXT_POKE1,
> >>       FIX_TEXT_POKE0,
> >>       FIX_EARLYCON_MEM_BASE,
> >> diff --git a/arch/riscv/include/asm/page.h 
> b/arch/riscv/include/asm/page.h
> >> index 63334568a10e..41e0d88234d5 100644
> >> --- a/arch/riscv/include/asm/page.h
> >> +++ b/arch/riscv/include/asm/page.h
> >> @@ -37,6 +37,7 @@
> >>    * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 
> address space so
> >>    * define the PAGE_OFFSET value for SV39.
> >>    */
> >> +#define PAGE_OFFSET_L4        _AC(0xffffc00000000000, UL)
> >>   #define PAGE_OFFSET_L3        _AC(0xffffffe000000000, UL)
> >>   #else
> >>   #define PAGE_OFFSET        _AC(CONFIG_PAGE_OFFSET, UL)
> >> diff --git a/arch/riscv/include/asm/pgalloc.h 
> b/arch/riscv/include/asm/pgalloc.h
> >> index 11823004b87a..947f23d7b6af 100644
> >> --- a/arch/riscv/include/asm/pgalloc.h
> >> +++ b/arch/riscv/include/asm/pgalloc.h
> >> @@ -59,6 +59,26 @@ static inline void p4d_populate_safe(struct 
> mm_struct *mm, p4d_t *p4d,
> >>       }
> >>   }
> >>   +static inline void pgd_populate(struct mm_struct *mm, pgd_t 
> *pgd, p4d_t *p4d)
> >> +{
> >> +    if (pgtable_l5_enabled) {
> >> +        unsigned long pfn = virt_to_pfn(p4d);
> >> +
> >> +        set_pgd(pgd, __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
> >> +    }
> >> +}
> >> +
> >> +static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t 
> *pgd,
> >> +                     p4d_t *p4d)
> >> +{
> >> +    if (pgtable_l5_enabled) {
> >> +        unsigned long pfn = virt_to_pfn(p4d);
> >> +
> >> +        set_pgd_safe(pgd,
> >> +                 __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
> >> +    }
> >> +}
> >> +
> >>   #define pud_alloc_one pud_alloc_one
> >>   static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned 
> long addr)
> >>   {
> >> @@ -76,6 +96,35 @@ static inline void pud_free(struct mm_struct 
> *mm, pud_t *pud)
> >>   }
> >>     #define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
> >> +
> >> +#define p4d_alloc_one p4d_alloc_one
> >> +static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned 
> long addr)
> >> +{
> >> +    if (pgtable_l5_enabled) {
> >> +        gfp_t gfp = GFP_PGTABLE_USER;
> >> +
> >> +        if (mm == &init_mm)
> >> +            gfp = GFP_PGTABLE_KERNEL;
> >> +        return (p4d_t *)get_zeroed_page(gfp);
> >> +    }
> >> +
> >> +    return NULL;
> >> +}
> >> +
> >> +static inline void __p4d_free(struct mm_struct *mm, p4d_t *p4d)
> >> +{
> >> +    BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
> >> +    free_page((unsigned long)p4d);
> >> +}
> >> +
> >> +#define p4d_free p4d_free
> >> +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
> >> +{
> >> +    if (pgtable_l5_enabled)
> >> +        __p4d_free(mm, p4d);
> >> +}
> >> +
> >> +#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d)
> >>   #endif /* __PAGETABLE_PMD_FOLDED */
> >>     static inline pgd_t *pgd_alloc(struct mm_struct *mm)
> >> diff --git a/arch/riscv/include/asm/pgtable-64.h 
> b/arch/riscv/include/asm/pgtable-64.h
> >> index bbbdd66e5e2f..a01386d4094f 100644
> >> --- a/arch/riscv/include/asm/pgtable-64.h
> >> +++ b/arch/riscv/include/asm/pgtable-64.h
> >> @@ -9,16 +9,24 @@
> >>   #include <linux/const.h>
> >>     extern bool pgtable_l4_enabled;
> >> +extern bool pgtable_l5_enabled;
> >>     #define PGDIR_SHIFT_L3  30
> >>   #define PGDIR_SHIFT_L4  39
> >> +#define PGDIR_SHIFT_L5  48
> >>   #define PGDIR_SIZE_L3   (_AC(1, UL) << PGDIR_SHIFT_L3)
> >>   -#define PGDIR_SHIFT     (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : 
> PGDIR_SHIFT_L3)
> >> +#define PGDIR_SHIFT     (pgtable_l5_enabled ? PGDIR_SHIFT_L5 : \
> >> +        (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3))
> >
> >
> > This syntax is very cumbersome, the best I could come up with is the 
> following macro:
> >
> > #define pgtable_level(l3, l4, l5) (pgtable_l5_enabled ? (l5): 
> (pgtable_l4_enabled ? (l4): (l3))
> >
> > And I'm wondering if a single variable that contains the number of 
> page table levels would not be better actually, any idea?
> >
>
> Yes, it is a good idea to use a macro like pgtable_level. But a 
> variable containing
> the number of page table levels may be better. What about using such 
> variable
> for array index like this:
>
> static int64_t page_table_level = 5; // set sv57 defaultly
> #define PGTABLE_LEVEL_OFFSET 3
> static int64_t pgdir_shift[3] = {PGDIR_SHIFT_L3, PGDIR_SHIFT_L4, 
> PGDIR_SHIFT_L5};
> #define PGDIR_SHIFT pgdir_shift[page_table_level - PGTABLE_LEVEL_OFFSET]


I have just updated the sv48 patchset without this suggestion as it 
already contains some significant changes. I'm not opposed to your idea, 
but I have a preference for the macro as it does not use extra memory, 
but do as you want and we'll see what others think too.

Thanks,

Alex


>
> >
> >>
> >>   #endif /* __PAGETABLE_PMD_FOLDED */
> >> @@ -627,6 +716,13 @@ static __init pgprot_t 
> pgprot_from_va(uintptr_t va)
> >>   #endif /* CONFIG_STRICT_KERNEL_RWX */
> >>     #ifdef CONFIG_64BIT
> >> +static void __init disable_pgtable_l5(void)
> >> +{
> >> +    pgtable_l5_enabled = false;
> >> +    kernel_map.page_offset = PAGE_OFFSET_L4;
> >> +    satp_mode = SATP_MODE_48;
> >> +}
> >> +
> >>   static void __init disable_pgtable_l4(void)
> >>   {
> >>       pgtable_l4_enabled = false;
> >> @@ -643,8 +739,9 @@ static void __init disable_pgtable_l4(void)
> >>   static __init void set_satp_mode(uintptr_t dtb_pa)
> >>   {
> >>       u64 identity_satp, hw_satp;
> >> -    uintptr_t set_satp_mode_pmd;
> >> +    uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & 
> PMD_MASK;
> >>       int cpus_node;
> >> +    bool check_l4 = false;
> >>         /* Check if the user asked for sv39 explicitly in the 
> device tree */
> >>       cpus_node = fdt_path_offset((void *)dtb_pa, "/cpus");
> >> @@ -658,18 +755,31 @@ static __init void set_satp_mode(uintptr_t 
> dtb_pa)
> >>                   continue;
> >>                 if (!strcmp(mmu_type, "riscv,sv39")) {
> >> +                disable_pgtable_l5();
> >>                   disable_pgtable_l4();
> >>                   return;
> >>               }
> >>   +            if (!strcmp(mmu_type, "riscv,sv48")) {
> >> +                check_l4 = true;
> >> +            }
> >> +
> >
> >
> > If sv48 is set in the device tree, why would you test if it is 
> supported below? I would take it as is, just like for sv39, I'm not 
> sure we want to override this silently and make a wrong device tree work.
> >
>
> Actually I prefer to emit a big WARN when sv48 is set in device tree
> but unavailable. I think the device tree field shows the page table level
> which developers expect, and the kernel should try it best to boot itself
> even when the cpu doesn't meet that expectation ? As for sv39, I think 
> any
> RISC-V 64-bit device's mmu (if exists) should support it, so we don't 
> need
> to test it
>
>
> >
> >> break;
> >>           }
> >>       }
> >>   -    set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
> >> +retry:
> >> +    if (check_l4)
> >> +        disable_pgtable_l5();
> >> +
> >>       create_pgd_mapping(early_pg_dir,
> >> -               set_satp_mode_pmd, (uintptr_t)early_pud,
> >> +               set_satp_mode_pmd,
> >> +               check_l4 ? (uintptr_t)early_pud : 
> (uintptr_t)early_p4d,
> >>                  PGDIR_SIZE, PAGE_TABLE);
> >> +    if (!check_l4)
> >> +        create_p4d_mapping(early_p4d,
> >> +                set_satp_mode_pmd, (uintptr_t)early_pud,
> >> +                P4D_SIZE, PAGE_TABLE);
> >>       create_pud_mapping(early_pud,
> >>                  set_satp_mode_pmd, (uintptr_t)early_pmd,
> >>                  PUD_SIZE, PAGE_TABLE);
> >> @@ -689,10 +799,16 @@ static __init void set_satp_mode(uintptr_t 
> dtb_pa)
> >>       hw_satp = csr_swap(CSR_SATP, 0ULL);
> >>       local_flush_tlb_all();
> >>   -    if (hw_satp != identity_satp)
> >> +    if (hw_satp != identity_satp) {
> >> +        if (!check_l4) {
> >> +            check_l4 = true;
> >> +            goto retry;
> >> +        }
> >>           disable_pgtable_l4();
> >> +    }
> >>         memset(early_pg_dir, 0, PAGE_SIZE);
> >> +    memset(early_p4d, 0, PAGE_SIZE);
> >>       memset(early_pud, 0, PAGE_SIZE);
> >>       memset(early_pmd, 0, PAGE_SIZE);
> >>   }
> >> @@ -766,6 +882,10 @@ static void __init 
> create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
> >>                  PGDIR_SIZE,
> >>                  IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
> >>   +    if (pgtable_l5_enabled)
> >> +        create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA,
> >> +                   (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE);
> >> +
> >>       if (pgtable_l4_enabled)
> >>           create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
> >>                      (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
> >> @@ -802,6 +922,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
> >>       pt_ops.get_pmd_virt = get_pmd_virt_early;
> >>       pt_ops.alloc_pud = alloc_pud_early;
> >>       pt_ops.get_pud_virt = get_pud_virt_early;
> >> +    pt_ops.alloc_p4d = alloc_p4d_early;
> >> +    pt_ops.get_p4d_virt = get_p4d_virt_early;
> >>   #endif
> >>         kernel_map.virt_addr = KERNEL_LINK_ADDR;
> >> @@ -855,6 +977,10 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
> >>                  fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
> >>     #ifndef __PAGETABLE_PMD_FOLDED
> >> +    /* Setup fixmap P4D and PUD */
> >> +    if (pgtable_l5_enabled)
> >> +        create_p4d_mapping(fixmap_p4d, FIXADDR_START,
> >> +                   (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE);
> >>       /* Setup fixmap PUD and PMD */
> >>       if (pgtable_l4_enabled)
> >>           create_pud_mapping(fixmap_pud, FIXADDR_START,
> >> @@ -864,6 +990,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
> >>       /* Setup trampoline PGD and PMD */
> >>       create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
> >>                  trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
> >> +    if (pgtable_l5_enabled)
> >> +        create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr,
> >> +                   (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE);
> >>       if (pgtable_l4_enabled)
> >>           create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
> >>                      (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
> >> @@ -938,6 +1067,8 @@ static void __init setup_vm_final(void)
> >>       pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
> >>       pt_ops.alloc_pud = alloc_pud_fixmap;
> >>       pt_ops.get_pud_virt = get_pud_virt_fixmap;
> >> +    pt_ops.alloc_p4d = alloc_p4d_fixmap;
> >> +    pt_ops.get_p4d_virt = get_p4d_virt_fixmap;
> >>   #endif
> >>       /* Setup swapper PGD for fixmap */
> >>       create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
> >> @@ -985,6 +1116,8 @@ static void __init setup_vm_final(void)
> >>       pt_ops.get_pmd_virt = get_pmd_virt_late;
> >>       pt_ops.alloc_pud = alloc_pud_late;
> >>       pt_ops.get_pud_virt = get_pud_virt_late;
> >> +    pt_ops.alloc_p4d = alloc_p4d_late;
> >> +    pt_ops.get_p4d_virt = get_p4d_virt_late;
> >>   #endif
> >>   }
> >>   #else
> >
> >
> > You forgot to handle kasan in this patch. Actually, I'm updating 
> kasan for the sv48 patchset after commit 54c5639d8f50 ("riscv: Fix 
> asan-stack clang build") broke it. I'm struggling a bit as the kasan 
> offset that is known at compile time must be the same for sv39, s48 
> and sv57, so we have to move the kasan region next to the kernel, but 
> then it is not aligned on pgdir boundaries for sv48 and sv57, so the 
> current kasan population functions must be adapted.
> >
> > Anyway, I would advise you to wait for my updated patchset before 
> tackling kasan for sv57.
>
>
> Thanks for your suggestions. I will handle kasan in next patchset 
> which should
> be based on your new patchset.
>
> Thanks,
> Qinglin
>
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv



More information about the linux-riscv mailing list