[PATCH v1 1/2] riscv, mm: Add Sv57 support based on Sv48 implementation

潘庆霖 panqinglin2020 at iscas.ac.cn
Mon Nov 29 19:11:43 PST 2021


Hi Alex,


On 2021/11/29 19:20, Alexandre ghiti wrote:
 > Hi Qinglin,
 >
 > On 11/24/21 12:20, panqinglin2020 at iscas.ac.cn wrote:
 >> From: Qinglin Pan <panqinglin2020 at iscas.ac.cn>
 >>
 >> This patch adds Sv57 implementation on the top of Alex's Sv48 patchset.
 >> The mmu configuration will be determined on runtime, according to both
 >> mmu HW support and mmu-type field in the dtb. The kernel will try to
 >> set satp mode one by one from the configuration item to Sv39 in 64bit.
 >>
 >> Signed-off-by: Qinglin Pan <panqinglin2020 at iscas.ac.cn>
 >> ---
 >>   arch/riscv/Kconfig                  |   4 +-
 >>   arch/riscv/include/asm/csr.h        |   1 +
 >>   arch/riscv/include/asm/fixmap.h     |   1 +
 >>   arch/riscv/include/asm/page.h       |   1 +
 >>   arch/riscv/include/asm/pgalloc.h    |  49 ++++++++
 >>   arch/riscv/include/asm/pgtable-64.h | 103 ++++++++++++++++-
 >>   arch/riscv/include/asm/pgtable.h    |   4 +-
 >>   arch/riscv/kernel/cpu.c             |   4 +-
 >>   arch/riscv/mm/init.c                | 169 +++++++++++++++++++++++++---
 >>   9 files changed, 312 insertions(+), 24 deletions(-)
 >>
 >> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
 >> index a4cadcd4e785..aac28e96d0e5 100644
 >> --- a/arch/riscv/Kconfig
 >> +++ b/arch/riscv/Kconfig
 >> @@ -151,7 +151,7 @@ config PAGE_OFFSET
 >>       hex
 >>       default 0xC0000000 if 32BIT
 >>       default 0x80000000 if 64BIT && !MMU
 >> -    default 0xffffc00000000000 if 64BIT
 >> +    default 0xff80000000000000 if 64BIT
 >>     config ARCH_FLATMEM_ENABLE
 >>       def_bool !NUMA
 >> @@ -196,7 +196,7 @@ config FIX_EARLYCON_MEM
 >>     config PGTABLE_LEVELS
 >>       int
 >> -    default 4 if 64BIT
 >> +    default 5 if 64BIT
 >>       default 2
 >>     config LOCKDEP_SUPPORT
 >> diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
 >> index ae711692eec9..299abdef0cd6 100644
 >> --- a/arch/riscv/include/asm/csr.h
 >> +++ b/arch/riscv/include/asm/csr.h
 >> @@ -47,6 +47,7 @@
 >>   #define SATP_PPN    _AC(0x00000FFFFFFFFFFF, UL)
 >>   #define SATP_MODE_39    _AC(0x8000000000000000, UL)
 >>   #define SATP_MODE_48    _AC(0x9000000000000000, UL)
 >> +#define SATP_MODE_57    _AC(0xa000000000000000, UL)
 >>   #define SATP_ASID_BITS    16
 >>   #define SATP_ASID_SHIFT    44
 >>   #define SATP_ASID_MASK    _AC(0xFFFF, UL)
 >> diff --git a/arch/riscv/include/asm/fixmap.h 
b/arch/riscv/include/asm/fixmap.h
 >> index 58a718573ad6..3cfece8b6568 100644
 >> --- a/arch/riscv/include/asm/fixmap.h
 >> +++ b/arch/riscv/include/asm/fixmap.h
 >> @@ -25,6 +25,7 @@ enum fixed_addresses {
 >>       FIX_PTE,
 >>       FIX_PMD,
 >>       FIX_PUD,
 >> +    FIX_P4D,
 >>       FIX_TEXT_POKE1,
 >>       FIX_TEXT_POKE0,
 >>       FIX_EARLYCON_MEM_BASE,
 >> diff --git a/arch/riscv/include/asm/page.h 
b/arch/riscv/include/asm/page.h
 >> index 63334568a10e..41e0d88234d5 100644
 >> --- a/arch/riscv/include/asm/page.h
 >> +++ b/arch/riscv/include/asm/page.h
 >> @@ -37,6 +37,7 @@
 >>    * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 
address space so
 >>    * define the PAGE_OFFSET value for SV39.
 >>    */
 >> +#define PAGE_OFFSET_L4        _AC(0xffffc00000000000, UL)
 >>   #define PAGE_OFFSET_L3        _AC(0xffffffe000000000, UL)
 >>   #else
 >>   #define PAGE_OFFSET        _AC(CONFIG_PAGE_OFFSET, UL)
 >> diff --git a/arch/riscv/include/asm/pgalloc.h 
b/arch/riscv/include/asm/pgalloc.h
 >> index 11823004b87a..947f23d7b6af 100644
 >> --- a/arch/riscv/include/asm/pgalloc.h
 >> +++ b/arch/riscv/include/asm/pgalloc.h
 >> @@ -59,6 +59,26 @@ static inline void p4d_populate_safe(struct 
mm_struct *mm, p4d_t *p4d,
 >>       }
 >>   }
 >>   +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, 
p4d_t *p4d)
 >> +{
 >> +    if (pgtable_l5_enabled) {
 >> +        unsigned long pfn = virt_to_pfn(p4d);
 >> +
 >> +        set_pgd(pgd, __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
 >> +    }
 >> +}
 >> +
 >> +static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd,
 >> +                     p4d_t *p4d)
 >> +{
 >> +    if (pgtable_l5_enabled) {
 >> +        unsigned long pfn = virt_to_pfn(p4d);
 >> +
 >> +        set_pgd_safe(pgd,
 >> +                 __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
 >> +    }
 >> +}
 >> +
 >>   #define pud_alloc_one pud_alloc_one
 >>   static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned 
long addr)
 >>   {
 >> @@ -76,6 +96,35 @@ static inline void pud_free(struct mm_struct *mm, 
pud_t *pud)
 >>   }
 >>     #define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
 >> +
 >> +#define p4d_alloc_one p4d_alloc_one
 >> +static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned 
long addr)
 >> +{
 >> +    if (pgtable_l5_enabled) {
 >> +        gfp_t gfp = GFP_PGTABLE_USER;
 >> +
 >> +        if (mm == &init_mm)
 >> +            gfp = GFP_PGTABLE_KERNEL;
 >> +        return (p4d_t *)get_zeroed_page(gfp);
 >> +    }
 >> +
 >> +    return NULL;
 >> +}
 >> +
 >> +static inline void __p4d_free(struct mm_struct *mm, p4d_t *p4d)
 >> +{
 >> +    BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
 >> +    free_page((unsigned long)p4d);
 >> +}
 >> +
 >> +#define p4d_free p4d_free
 >> +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
 >> +{
 >> +    if (pgtable_l5_enabled)
 >> +        __p4d_free(mm, p4d);
 >> +}
 >> +
 >> +#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d)
 >>   #endif /* __PAGETABLE_PMD_FOLDED */
 >>     static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 >> diff --git a/arch/riscv/include/asm/pgtable-64.h 
b/arch/riscv/include/asm/pgtable-64.h
 >> index bbbdd66e5e2f..a01386d4094f 100644
 >> --- a/arch/riscv/include/asm/pgtable-64.h
 >> +++ b/arch/riscv/include/asm/pgtable-64.h
 >> @@ -9,16 +9,24 @@
 >>   #include <linux/const.h>
 >>     extern bool pgtable_l4_enabled;
 >> +extern bool pgtable_l5_enabled;
 >>     #define PGDIR_SHIFT_L3  30
 >>   #define PGDIR_SHIFT_L4  39
 >> +#define PGDIR_SHIFT_L5  48
 >>   #define PGDIR_SIZE_L3   (_AC(1, UL) << PGDIR_SHIFT_L3)
 >>   -#define PGDIR_SHIFT     (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : 
PGDIR_SHIFT_L3)
 >> +#define PGDIR_SHIFT     (pgtable_l5_enabled ? PGDIR_SHIFT_L5 : \
 >> +        (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3))
 >
 >
 > This syntax is very cumbersome, the best I could come up with is the 
following macro:
 >
 > #define pgtable_level(l3, l4, l5) (pgtable_l5_enabled ? (l5): 
(pgtable_l4_enabled ? (l4): (l3))
 >
 > And I'm wondering if a single variable that contains the number of 
page table levels would not be better actually, any idea?
 >

Yes, it is a good idea to use a macro like pgtable_level. But a variable 
containing
the number of page table levels may be better. What about using such 
variable
for array index like this:

static int64_t page_table_level = 5; // set sv57 defaultly
#define PGTABLE_LEVEL_OFFSET 3
static int64_t pgdir_shift[3] = {PGDIR_SHIFT_L3, PGDIR_SHIFT_L4, 
PGDIR_SHIFT_L5};
#define PGDIR_SHIFT pgdir_shift[page_table_level - PGTABLE_LEVEL_OFFSET]

 >
 >>
 >>   #endif /* __PAGETABLE_PMD_FOLDED */
 >> @@ -627,6 +716,13 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
 >>   #endif /* CONFIG_STRICT_KERNEL_RWX */
 >>     #ifdef CONFIG_64BIT
 >> +static void __init disable_pgtable_l5(void)
 >> +{
 >> +    pgtable_l5_enabled = false;
 >> +    kernel_map.page_offset = PAGE_OFFSET_L4;
 >> +    satp_mode = SATP_MODE_48;
 >> +}
 >> +
 >>   static void __init disable_pgtable_l4(void)
 >>   {
 >>       pgtable_l4_enabled = false;
 >> @@ -643,8 +739,9 @@ static void __init disable_pgtable_l4(void)
 >>   static __init void set_satp_mode(uintptr_t dtb_pa)
 >>   {
 >>       u64 identity_satp, hw_satp;
 >> -    uintptr_t set_satp_mode_pmd;
 >> +    uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & 
PMD_MASK;
 >>       int cpus_node;
 >> +    bool check_l4 = false;
 >>         /* Check if the user asked for sv39 explicitly in the device 
tree */
 >>       cpus_node = fdt_path_offset((void *)dtb_pa, "/cpus");
 >> @@ -658,18 +755,31 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
 >>                   continue;
 >>                 if (!strcmp(mmu_type, "riscv,sv39")) {
 >> +                disable_pgtable_l5();
 >>                   disable_pgtable_l4();
 >>                   return;
 >>               }
 >>   +            if (!strcmp(mmu_type, "riscv,sv48")) {
 >> +                check_l4 = true;
 >> +            }
 >> +
 >
 >
 > If sv48 is set in the device tree, why would you test if it is 
supported below? I would take it as is, just like for sv39, I'm not sure 
we want to override this silently and make a wrong device tree work.
 >

Actually I prefer to emit a big WARN when sv48 is set in device tree
but unavailable. I think the device tree field shows the page table level
which developers expect, and the kernel should try it best to boot itself
even when the cpu doesn't meet that expectation ? As for sv39, I think any
RISC-V 64-bit device's mmu (if exists) should support it, so we don't need
to test it


 >
 >> break;
 >>           }
 >>       }
 >>   -    set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
 >> +retry:
 >> +    if (check_l4)
 >> +        disable_pgtable_l5();
 >> +
 >>       create_pgd_mapping(early_pg_dir,
 >> -               set_satp_mode_pmd, (uintptr_t)early_pud,
 >> +               set_satp_mode_pmd,
 >> +               check_l4 ? (uintptr_t)early_pud : (uintptr_t)early_p4d,
 >>                  PGDIR_SIZE, PAGE_TABLE);
 >> +    if (!check_l4)
 >> +        create_p4d_mapping(early_p4d,
 >> +                set_satp_mode_pmd, (uintptr_t)early_pud,
 >> +                P4D_SIZE, PAGE_TABLE);
 >>       create_pud_mapping(early_pud,
 >>                  set_satp_mode_pmd, (uintptr_t)early_pmd,
 >>                  PUD_SIZE, PAGE_TABLE);
 >> @@ -689,10 +799,16 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
 >>       hw_satp = csr_swap(CSR_SATP, 0ULL);
 >>       local_flush_tlb_all();
 >>   -    if (hw_satp != identity_satp)
 >> +    if (hw_satp != identity_satp) {
 >> +        if (!check_l4) {
 >> +            check_l4 = true;
 >> +            goto retry;
 >> +        }
 >>           disable_pgtable_l4();
 >> +    }
 >>         memset(early_pg_dir, 0, PAGE_SIZE);
 >> +    memset(early_p4d, 0, PAGE_SIZE);
 >>       memset(early_pud, 0, PAGE_SIZE);
 >>       memset(early_pmd, 0, PAGE_SIZE);
 >>   }
 >> @@ -766,6 +882,10 @@ static void __init 
create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
 >>                  PGDIR_SIZE,
 >>                  IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
 >>   +    if (pgtable_l5_enabled)
 >> +        create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA,
 >> +                   (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE);
 >> +
 >>       if (pgtable_l4_enabled)
 >>           create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
 >>                      (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
 >> @@ -802,6 +922,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 >>       pt_ops.get_pmd_virt = get_pmd_virt_early;
 >>       pt_ops.alloc_pud = alloc_pud_early;
 >>       pt_ops.get_pud_virt = get_pud_virt_early;
 >> +    pt_ops.alloc_p4d = alloc_p4d_early;
 >> +    pt_ops.get_p4d_virt = get_p4d_virt_early;
 >>   #endif
 >>         kernel_map.virt_addr = KERNEL_LINK_ADDR;
 >> @@ -855,6 +977,10 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 >>                  fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
 >>     #ifndef __PAGETABLE_PMD_FOLDED
 >> +    /* Setup fixmap P4D and PUD */
 >> +    if (pgtable_l5_enabled)
 >> +        create_p4d_mapping(fixmap_p4d, FIXADDR_START,
 >> +                   (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE);
 >>       /* Setup fixmap PUD and PMD */
 >>       if (pgtable_l4_enabled)
 >>           create_pud_mapping(fixmap_pud, FIXADDR_START,
 >> @@ -864,6 +990,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 >>       /* Setup trampoline PGD and PMD */
 >>       create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
 >>                  trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
 >> +    if (pgtable_l5_enabled)
 >> +        create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr,
 >> +                   (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE);
 >>       if (pgtable_l4_enabled)
 >>           create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
 >>                      (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
 >> @@ -938,6 +1067,8 @@ static void __init setup_vm_final(void)
 >>       pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
 >>       pt_ops.alloc_pud = alloc_pud_fixmap;
 >>       pt_ops.get_pud_virt = get_pud_virt_fixmap;
 >> +    pt_ops.alloc_p4d = alloc_p4d_fixmap;
 >> +    pt_ops.get_p4d_virt = get_p4d_virt_fixmap;
 >>   #endif
 >>       /* Setup swapper PGD for fixmap */
 >>       create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
 >> @@ -985,6 +1116,8 @@ static void __init setup_vm_final(void)
 >>       pt_ops.get_pmd_virt = get_pmd_virt_late;
 >>       pt_ops.alloc_pud = alloc_pud_late;
 >>       pt_ops.get_pud_virt = get_pud_virt_late;
 >> +    pt_ops.alloc_p4d = alloc_p4d_late;
 >> +    pt_ops.get_p4d_virt = get_p4d_virt_late;
 >>   #endif
 >>   }
 >>   #else
 >
 >
 > You forgot to handle kasan in this patch. Actually, I'm updating 
kasan for the sv48 patchset after commit 54c5639d8f50 ("riscv: Fix 
asan-stack clang build") broke it. I'm struggling a bit as the kasan 
offset that is known at compile time must be the same for sv39, s48 and 
sv57, so we have to move the kasan region next to the kernel, but then 
it is not aligned on pgdir boundaries for sv48 and sv57, so the current 
kasan population functions must be adapted.
 >
 > Anyway, I would advise you to wait for my updated patchset before 
tackling kasan for sv57.


Thanks for your suggestions. I will handle kasan in next patchset which 
should
be based on your new patchset.

Thanks,
Qinglin




More information about the linux-riscv mailing list