[PATCH v1 1/2] riscv, mm: Add Sv57 support based on Sv48 implementation
Alexandre ghiti
alex at ghiti.fr
Mon Dec 6 02:53:24 PST 2021
Hi Qinglin,
On 11/30/21 04:11, 潘庆霖 wrote:
>
> Hi Alex,
>
>
> On 2021/11/29 19:20, Alexandre ghiti wrote:
> > Hi Qinglin,
> >
> > On 11/24/21 12:20, panqinglin2020 at iscas.ac.cn wrote:
> >> From: Qinglin Pan <panqinglin2020 at iscas.ac.cn>
> >>
> >> This patch adds Sv57 implementation on the top of Alex's Sv48
> patchset.
> >> The mmu configuration will be determined on runtime, according to both
> >> mmu HW support and mmu-type field in the dtb. The kernel will try to
> >> set satp mode one by one from the configuration item to Sv39 in 64bit.
> >>
> >> Signed-off-by: Qinglin Pan <panqinglin2020 at iscas.ac.cn>
> >> ---
> >> arch/riscv/Kconfig | 4 +-
> >> arch/riscv/include/asm/csr.h | 1 +
> >> arch/riscv/include/asm/fixmap.h | 1 +
> >> arch/riscv/include/asm/page.h | 1 +
> >> arch/riscv/include/asm/pgalloc.h | 49 ++++++++
> >> arch/riscv/include/asm/pgtable-64.h | 103 ++++++++++++++++-
> >> arch/riscv/include/asm/pgtable.h | 4 +-
> >> arch/riscv/kernel/cpu.c | 4 +-
> >> arch/riscv/mm/init.c | 169
> +++++++++++++++++++++++++---
> >> 9 files changed, 312 insertions(+), 24 deletions(-)
> >>
> >> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> >> index a4cadcd4e785..aac28e96d0e5 100644
> >> --- a/arch/riscv/Kconfig
> >> +++ b/arch/riscv/Kconfig
> >> @@ -151,7 +151,7 @@ config PAGE_OFFSET
> >> hex
> >> default 0xC0000000 if 32BIT
> >> default 0x80000000 if 64BIT && !MMU
> >> - default 0xffffc00000000000 if 64BIT
> >> + default 0xff80000000000000 if 64BIT
> >> config ARCH_FLATMEM_ENABLE
> >> def_bool !NUMA
> >> @@ -196,7 +196,7 @@ config FIX_EARLYCON_MEM
> >> config PGTABLE_LEVELS
> >> int
> >> - default 4 if 64BIT
> >> + default 5 if 64BIT
> >> default 2
> >> config LOCKDEP_SUPPORT
> >> diff --git a/arch/riscv/include/asm/csr.h
> b/arch/riscv/include/asm/csr.h
> >> index ae711692eec9..299abdef0cd6 100644
> >> --- a/arch/riscv/include/asm/csr.h
> >> +++ b/arch/riscv/include/asm/csr.h
> >> @@ -47,6 +47,7 @@
> >> #define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
> >> #define SATP_MODE_39 _AC(0x8000000000000000, UL)
> >> #define SATP_MODE_48 _AC(0x9000000000000000, UL)
> >> +#define SATP_MODE_57 _AC(0xa000000000000000, UL)
> >> #define SATP_ASID_BITS 16
> >> #define SATP_ASID_SHIFT 44
> >> #define SATP_ASID_MASK _AC(0xFFFF, UL)
> >> diff --git a/arch/riscv/include/asm/fixmap.h
> b/arch/riscv/include/asm/fixmap.h
> >> index 58a718573ad6..3cfece8b6568 100644
> >> --- a/arch/riscv/include/asm/fixmap.h
> >> +++ b/arch/riscv/include/asm/fixmap.h
> >> @@ -25,6 +25,7 @@ enum fixed_addresses {
> >> FIX_PTE,
> >> FIX_PMD,
> >> FIX_PUD,
> >> + FIX_P4D,
> >> FIX_TEXT_POKE1,
> >> FIX_TEXT_POKE0,
> >> FIX_EARLYCON_MEM_BASE,
> >> diff --git a/arch/riscv/include/asm/page.h
> b/arch/riscv/include/asm/page.h
> >> index 63334568a10e..41e0d88234d5 100644
> >> --- a/arch/riscv/include/asm/page.h
> >> +++ b/arch/riscv/include/asm/page.h
> >> @@ -37,6 +37,7 @@
> >> * By default, CONFIG_PAGE_OFFSET value corresponds to SV48
> address space so
> >> * define the PAGE_OFFSET value for SV39.
> >> */
> >> +#define PAGE_OFFSET_L4 _AC(0xffffc00000000000, UL)
> >> #define PAGE_OFFSET_L3 _AC(0xffffffe000000000, UL)
> >> #else
> >> #define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
> >> diff --git a/arch/riscv/include/asm/pgalloc.h
> b/arch/riscv/include/asm/pgalloc.h
> >> index 11823004b87a..947f23d7b6af 100644
> >> --- a/arch/riscv/include/asm/pgalloc.h
> >> +++ b/arch/riscv/include/asm/pgalloc.h
> >> @@ -59,6 +59,26 @@ static inline void p4d_populate_safe(struct
> mm_struct *mm, p4d_t *p4d,
> >> }
> >> }
> >> +static inline void pgd_populate(struct mm_struct *mm, pgd_t
> *pgd, p4d_t *p4d)
> >> +{
> >> + if (pgtable_l5_enabled) {
> >> + unsigned long pfn = virt_to_pfn(p4d);
> >> +
> >> + set_pgd(pgd, __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
> >> + }
> >> +}
> >> +
> >> +static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t
> *pgd,
> >> + p4d_t *p4d)
> >> +{
> >> + if (pgtable_l5_enabled) {
> >> + unsigned long pfn = virt_to_pfn(p4d);
> >> +
> >> + set_pgd_safe(pgd,
> >> + __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
> >> + }
> >> +}
> >> +
> >> #define pud_alloc_one pud_alloc_one
> >> static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned
> long addr)
> >> {
> >> @@ -76,6 +96,35 @@ static inline void pud_free(struct mm_struct
> *mm, pud_t *pud)
> >> }
> >> #define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
> >> +
> >> +#define p4d_alloc_one p4d_alloc_one
> >> +static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned
> long addr)
> >> +{
> >> + if (pgtable_l5_enabled) {
> >> + gfp_t gfp = GFP_PGTABLE_USER;
> >> +
> >> + if (mm == &init_mm)
> >> + gfp = GFP_PGTABLE_KERNEL;
> >> + return (p4d_t *)get_zeroed_page(gfp);
> >> + }
> >> +
> >> + return NULL;
> >> +}
> >> +
> >> +static inline void __p4d_free(struct mm_struct *mm, p4d_t *p4d)
> >> +{
> >> + BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
> >> + free_page((unsigned long)p4d);
> >> +}
> >> +
> >> +#define p4d_free p4d_free
> >> +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
> >> +{
> >> + if (pgtable_l5_enabled)
> >> + __p4d_free(mm, p4d);
> >> +}
> >> +
> >> +#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d)
> >> #endif /* __PAGETABLE_PMD_FOLDED */
> >> static inline pgd_t *pgd_alloc(struct mm_struct *mm)
> >> diff --git a/arch/riscv/include/asm/pgtable-64.h
> b/arch/riscv/include/asm/pgtable-64.h
> >> index bbbdd66e5e2f..a01386d4094f 100644
> >> --- a/arch/riscv/include/asm/pgtable-64.h
> >> +++ b/arch/riscv/include/asm/pgtable-64.h
> >> @@ -9,16 +9,24 @@
> >> #include <linux/const.h>
> >> extern bool pgtable_l4_enabled;
> >> +extern bool pgtable_l5_enabled;
> >> #define PGDIR_SHIFT_L3 30
> >> #define PGDIR_SHIFT_L4 39
> >> +#define PGDIR_SHIFT_L5 48
> >> #define PGDIR_SIZE_L3 (_AC(1, UL) << PGDIR_SHIFT_L3)
> >> -#define PGDIR_SHIFT (pgtable_l4_enabled ? PGDIR_SHIFT_L4 :
> PGDIR_SHIFT_L3)
> >> +#define PGDIR_SHIFT (pgtable_l5_enabled ? PGDIR_SHIFT_L5 : \
> >> + (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3))
> >
> >
> > This syntax is very cumbersome, the best I could come up with is the
> following macro:
> >
> > #define pgtable_level(l3, l4, l5) (pgtable_l5_enabled ? (l5):
> (pgtable_l4_enabled ? (l4): (l3))
> >
> > And I'm wondering if a single variable that contains the number of
> page table levels would not be better actually, any idea?
> >
>
> Yes, it is a good idea to use a macro like pgtable_level. But a
> variable containing
> the number of page table levels may be better. What about using such
> variable
> for array index like this:
>
> static int64_t page_table_level = 5; // set sv57 defaultly
> #define PGTABLE_LEVEL_OFFSET 3
> static int64_t pgdir_shift[3] = {PGDIR_SHIFT_L3, PGDIR_SHIFT_L4,
> PGDIR_SHIFT_L5};
> #define PGDIR_SHIFT pgdir_shift[page_table_level - PGTABLE_LEVEL_OFFSET]
I have just updated the sv48 patchset without this suggestion as it
already contains some significant changes. I'm not opposed to your idea,
but I have a preference for the macro as it does not use extra memory,
but do as you want and we'll see what others think too.
Thanks,
Alex
>
> >
> >>
> >> #endif /* __PAGETABLE_PMD_FOLDED */
> >> @@ -627,6 +716,13 @@ static __init pgprot_t
> pgprot_from_va(uintptr_t va)
> >> #endif /* CONFIG_STRICT_KERNEL_RWX */
> >> #ifdef CONFIG_64BIT
> >> +static void __init disable_pgtable_l5(void)
> >> +{
> >> + pgtable_l5_enabled = false;
> >> + kernel_map.page_offset = PAGE_OFFSET_L4;
> >> + satp_mode = SATP_MODE_48;
> >> +}
> >> +
> >> static void __init disable_pgtable_l4(void)
> >> {
> >> pgtable_l4_enabled = false;
> >> @@ -643,8 +739,9 @@ static void __init disable_pgtable_l4(void)
> >> static __init void set_satp_mode(uintptr_t dtb_pa)
> >> {
> >> u64 identity_satp, hw_satp;
> >> - uintptr_t set_satp_mode_pmd;
> >> + uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) &
> PMD_MASK;
> >> int cpus_node;
> >> + bool check_l4 = false;
> >> /* Check if the user asked for sv39 explicitly in the
> device tree */
> >> cpus_node = fdt_path_offset((void *)dtb_pa, "/cpus");
> >> @@ -658,18 +755,31 @@ static __init void set_satp_mode(uintptr_t
> dtb_pa)
> >> continue;
> >> if (!strcmp(mmu_type, "riscv,sv39")) {
> >> + disable_pgtable_l5();
> >> disable_pgtable_l4();
> >> return;
> >> }
> >> + if (!strcmp(mmu_type, "riscv,sv48")) {
> >> + check_l4 = true;
> >> + }
> >> +
> >
> >
> > If sv48 is set in the device tree, why would you test if it is
> supported below? I would take it as is, just like for sv39, I'm not
> sure we want to override this silently and make a wrong device tree work.
> >
>
> Actually I prefer to emit a big WARN when sv48 is set in device tree
> but unavailable. I think the device tree field shows the page table level
> which developers expect, and the kernel should try it best to boot itself
> even when the cpu doesn't meet that expectation ? As for sv39, I think
> any
> RISC-V 64-bit device's mmu (if exists) should support it, so we don't
> need
> to test it
>
>
> >
> >> break;
> >> }
> >> }
> >> - set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
> >> +retry:
> >> + if (check_l4)
> >> + disable_pgtable_l5();
> >> +
> >> create_pgd_mapping(early_pg_dir,
> >> - set_satp_mode_pmd, (uintptr_t)early_pud,
> >> + set_satp_mode_pmd,
> >> + check_l4 ? (uintptr_t)early_pud :
> (uintptr_t)early_p4d,
> >> PGDIR_SIZE, PAGE_TABLE);
> >> + if (!check_l4)
> >> + create_p4d_mapping(early_p4d,
> >> + set_satp_mode_pmd, (uintptr_t)early_pud,
> >> + P4D_SIZE, PAGE_TABLE);
> >> create_pud_mapping(early_pud,
> >> set_satp_mode_pmd, (uintptr_t)early_pmd,
> >> PUD_SIZE, PAGE_TABLE);
> >> @@ -689,10 +799,16 @@ static __init void set_satp_mode(uintptr_t
> dtb_pa)
> >> hw_satp = csr_swap(CSR_SATP, 0ULL);
> >> local_flush_tlb_all();
> >> - if (hw_satp != identity_satp)
> >> + if (hw_satp != identity_satp) {
> >> + if (!check_l4) {
> >> + check_l4 = true;
> >> + goto retry;
> >> + }
> >> disable_pgtable_l4();
> >> + }
> >> memset(early_pg_dir, 0, PAGE_SIZE);
> >> + memset(early_p4d, 0, PAGE_SIZE);
> >> memset(early_pud, 0, PAGE_SIZE);
> >> memset(early_pmd, 0, PAGE_SIZE);
> >> }
> >> @@ -766,6 +882,10 @@ static void __init
> create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
> >> PGDIR_SIZE,
> >> IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
> >> + if (pgtable_l5_enabled)
> >> + create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA,
> >> + (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE);
> >> +
> >> if (pgtable_l4_enabled)
> >> create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
> >> (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
> >> @@ -802,6 +922,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
> >> pt_ops.get_pmd_virt = get_pmd_virt_early;
> >> pt_ops.alloc_pud = alloc_pud_early;
> >> pt_ops.get_pud_virt = get_pud_virt_early;
> >> + pt_ops.alloc_p4d = alloc_p4d_early;
> >> + pt_ops.get_p4d_virt = get_p4d_virt_early;
> >> #endif
> >> kernel_map.virt_addr = KERNEL_LINK_ADDR;
> >> @@ -855,6 +977,10 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
> >> fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
> >> #ifndef __PAGETABLE_PMD_FOLDED
> >> + /* Setup fixmap P4D and PUD */
> >> + if (pgtable_l5_enabled)
> >> + create_p4d_mapping(fixmap_p4d, FIXADDR_START,
> >> + (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE);
> >> /* Setup fixmap PUD and PMD */
> >> if (pgtable_l4_enabled)
> >> create_pud_mapping(fixmap_pud, FIXADDR_START,
> >> @@ -864,6 +990,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
> >> /* Setup trampoline PGD and PMD */
> >> create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
> >> trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
> >> + if (pgtable_l5_enabled)
> >> + create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr,
> >> + (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE);
> >> if (pgtable_l4_enabled)
> >> create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
> >> (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
> >> @@ -938,6 +1067,8 @@ static void __init setup_vm_final(void)
> >> pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
> >> pt_ops.alloc_pud = alloc_pud_fixmap;
> >> pt_ops.get_pud_virt = get_pud_virt_fixmap;
> >> + pt_ops.alloc_p4d = alloc_p4d_fixmap;
> >> + pt_ops.get_p4d_virt = get_p4d_virt_fixmap;
> >> #endif
> >> /* Setup swapper PGD for fixmap */
> >> create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
> >> @@ -985,6 +1116,8 @@ static void __init setup_vm_final(void)
> >> pt_ops.get_pmd_virt = get_pmd_virt_late;
> >> pt_ops.alloc_pud = alloc_pud_late;
> >> pt_ops.get_pud_virt = get_pud_virt_late;
> >> + pt_ops.alloc_p4d = alloc_p4d_late;
> >> + pt_ops.get_p4d_virt = get_p4d_virt_late;
> >> #endif
> >> }
> >> #else
> >
> >
> > You forgot to handle kasan in this patch. Actually, I'm updating
> kasan for the sv48 patchset after commit 54c5639d8f50 ("riscv: Fix
> asan-stack clang build") broke it. I'm struggling a bit as the kasan
> offset that is known at compile time must be the same for sv39, s48
> and sv57, so we have to move the kasan region next to the kernel, but
> then it is not aligned on pgdir boundaries for sv48 and sv57, so the
> current kasan population functions must be adapted.
> >
> > Anyway, I would advise you to wait for my updated patchset before
> tackling kasan for sv57.
>
>
> Thanks for your suggestions. I will handle kasan in next patchset
> which should
> be based on your new patchset.
>
> Thanks,
> Qinglin
>
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
More information about the linux-riscv
mailing list