[PATCH v1 1/2] riscv, mm: Add Sv57 support based on Sv48 implementation

Alexandre ghiti alex at ghiti.fr
Mon Nov 29 03:20:20 PST 2021


Hi Qinglin,

On 11/24/21 12:20, panqinglin2020 at iscas.ac.cn wrote:
> From: Qinglin Pan <panqinglin2020 at iscas.ac.cn>
>
> This patch adds Sv57 implementation on the top of Alex's Sv48 patchset.
> The mmu configuration will be determined on runtime, according to both
> mmu HW support and mmu-type field in the dtb. The kernel will try to
> set satp mode one by one from the configuration item to Sv39 in 64bit.
>
> Signed-off-by: Qinglin Pan <panqinglin2020 at iscas.ac.cn>
> ---
>   arch/riscv/Kconfig                  |   4 +-
>   arch/riscv/include/asm/csr.h        |   1 +
>   arch/riscv/include/asm/fixmap.h     |   1 +
>   arch/riscv/include/asm/page.h       |   1 +
>   arch/riscv/include/asm/pgalloc.h    |  49 ++++++++
>   arch/riscv/include/asm/pgtable-64.h | 103 ++++++++++++++++-
>   arch/riscv/include/asm/pgtable.h    |   4 +-
>   arch/riscv/kernel/cpu.c             |   4 +-
>   arch/riscv/mm/init.c                | 169 +++++++++++++++++++++++++---
>   9 files changed, 312 insertions(+), 24 deletions(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index a4cadcd4e785..aac28e96d0e5 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -151,7 +151,7 @@ config PAGE_OFFSET
>   	hex
>   	default 0xC0000000 if 32BIT
>   	default 0x80000000 if 64BIT && !MMU
> -	default 0xffffc00000000000 if 64BIT
> +	default 0xff80000000000000 if 64BIT
>   
>   config ARCH_FLATMEM_ENABLE
>   	def_bool !NUMA
> @@ -196,7 +196,7 @@ config FIX_EARLYCON_MEM
>   
>   config PGTABLE_LEVELS
>   	int
> -	default 4 if 64BIT
> +	default 5 if 64BIT
>   	default 2
>   
>   config LOCKDEP_SUPPORT
> diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
> index ae711692eec9..299abdef0cd6 100644
> --- a/arch/riscv/include/asm/csr.h
> +++ b/arch/riscv/include/asm/csr.h
> @@ -47,6 +47,7 @@
>   #define SATP_PPN	_AC(0x00000FFFFFFFFFFF, UL)
>   #define SATP_MODE_39	_AC(0x8000000000000000, UL)
>   #define SATP_MODE_48	_AC(0x9000000000000000, UL)
> +#define SATP_MODE_57	_AC(0xa000000000000000, UL)
>   #define SATP_ASID_BITS	16
>   #define SATP_ASID_SHIFT	44
>   #define SATP_ASID_MASK	_AC(0xFFFF, UL)
> diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
> index 58a718573ad6..3cfece8b6568 100644
> --- a/arch/riscv/include/asm/fixmap.h
> +++ b/arch/riscv/include/asm/fixmap.h
> @@ -25,6 +25,7 @@ enum fixed_addresses {
>   	FIX_PTE,
>   	FIX_PMD,
>   	FIX_PUD,
> +	FIX_P4D,
>   	FIX_TEXT_POKE1,
>   	FIX_TEXT_POKE0,
>   	FIX_EARLYCON_MEM_BASE,
> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> index 63334568a10e..41e0d88234d5 100644
> --- a/arch/riscv/include/asm/page.h
> +++ b/arch/riscv/include/asm/page.h
> @@ -37,6 +37,7 @@
>    * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so
>    * define the PAGE_OFFSET value for SV39.
>    */
> +#define PAGE_OFFSET_L4		_AC(0xffffc00000000000, UL)
>   #define PAGE_OFFSET_L3		_AC(0xffffffe000000000, UL)
>   #else
>   #define PAGE_OFFSET		_AC(CONFIG_PAGE_OFFSET, UL)
> diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
> index 11823004b87a..947f23d7b6af 100644
> --- a/arch/riscv/include/asm/pgalloc.h
> +++ b/arch/riscv/include/asm/pgalloc.h
> @@ -59,6 +59,26 @@ static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
>   	}
>   }
>   
> +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
> +{
> +	if (pgtable_l5_enabled) {
> +		unsigned long pfn = virt_to_pfn(p4d);
> +
> +		set_pgd(pgd, __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
> +	}
> +}
> +
> +static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd,
> +				     p4d_t *p4d)
> +{
> +	if (pgtable_l5_enabled) {
> +		unsigned long pfn = virt_to_pfn(p4d);
> +
> +		set_pgd_safe(pgd,
> +			     __pgd((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
> +	}
> +}
> +
>   #define pud_alloc_one pud_alloc_one
>   static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
>   {
> @@ -76,6 +96,35 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
>   }
>   
>   #define __pud_free_tlb(tlb, pud, addr)  pud_free((tlb)->mm, pud)
> +
> +#define p4d_alloc_one p4d_alloc_one
> +static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
> +{
> +	if (pgtable_l5_enabled) {
> +		gfp_t gfp = GFP_PGTABLE_USER;
> +
> +		if (mm == &init_mm)
> +			gfp = GFP_PGTABLE_KERNEL;
> +		return (p4d_t *)get_zeroed_page(gfp);
> +	}
> +
> +	return NULL;
> +}
> +
> +static inline void __p4d_free(struct mm_struct *mm, p4d_t *p4d)
> +{
> +	BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
> +	free_page((unsigned long)p4d);
> +}
> +
> +#define p4d_free p4d_free
> +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
> +{
> +	if (pgtable_l5_enabled)
> +		__p4d_free(mm, p4d);
> +}
> +
> +#define __p4d_free_tlb(tlb, p4d, addr)  p4d_free((tlb)->mm, p4d)
>   #endif /* __PAGETABLE_PMD_FOLDED */
>   
>   static inline pgd_t *pgd_alloc(struct mm_struct *mm)
> diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
> index bbbdd66e5e2f..a01386d4094f 100644
> --- a/arch/riscv/include/asm/pgtable-64.h
> +++ b/arch/riscv/include/asm/pgtable-64.h
> @@ -9,16 +9,24 @@
>   #include <linux/const.h>
>   
>   extern bool pgtable_l4_enabled;
> +extern bool pgtable_l5_enabled;
>   
>   #define PGDIR_SHIFT_L3  30
>   #define PGDIR_SHIFT_L4  39
> +#define PGDIR_SHIFT_L5  48
>   #define PGDIR_SIZE_L3   (_AC(1, UL) << PGDIR_SHIFT_L3)
>   
> -#define PGDIR_SHIFT     (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3)
> +#define PGDIR_SHIFT     (pgtable_l5_enabled ? PGDIR_SHIFT_L5 : \
> +		(pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3))


This syntax is very cumbersome, the best I could come up with is the 
following macro:

#define pgtable_level(l3, l4, l5) (pgtable_l5_enabled ? (l5): 
(pgtable_l4_enabled ? (l4): (l3))

And I'm wondering if a single variable that contains the number of page 
table levels would not be better actually, any idea?


>   /* Size of region mapped by a page global directory */
>   #define PGDIR_SIZE      (_AC(1, UL) << PGDIR_SHIFT)
>   #define PGDIR_MASK      (~(PGDIR_SIZE - 1))
>   
> +/* p4d is folded into pgd in case of 4-level page table */
> +#define P4D_SHIFT      39
> +#define P4D_SIZE       (_AC(1, UL) << P4D_SHIFT)
> +#define P4D_MASK       (~(P4D_SIZE - 1))
> +
>   /* pud is folded into pgd in case of 3-level page table */
>   #define PUD_SHIFT      30
>   #define PUD_SIZE       (_AC(1, UL) << PUD_SHIFT)
> @@ -29,6 +37,15 @@ extern bool pgtable_l4_enabled;
>   #define PMD_SIZE        (_AC(1, UL) << PMD_SHIFT)
>   #define PMD_MASK        (~(PMD_SIZE - 1))
>   
> +/* Page 4th Directory entry */
> +typedef struct {
> +	unsigned long p4d;
> +} p4d_t;
> +
> +#define p4d_val(x)	((x).p4d)
> +#define __p4d(x)	((p4d_t) { (x) })
> +#define PTRS_PER_P4D	(PAGE_SIZE / sizeof(p4d_t))
> +
>   /* Page Upper Directory entry */
>   typedef struct {
>   	unsigned long pud;
> @@ -99,6 +116,15 @@ static inline struct page *pud_page(pud_t pud)
>   	return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
>   }
>   
> +#define mm_p4d_folded  mm_p4d_folded
> +static inline bool mm_p4d_folded(struct mm_struct *mm)
> +{
> +	if (pgtable_l5_enabled)
> +		return false;
> +
> +	return true;
> +}
> +
>   #define mm_pud_folded  mm_pud_folded
>   static inline bool mm_pud_folded(struct mm_struct *mm)
>   {
> @@ -128,6 +154,9 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
>   #define pud_ERROR(e)   \
>   	pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
>   
> +#define p4d_ERROR(e)   \
> +	pr_err("%s:%d: bad p4d %016lx.\n", __FILE__, __LINE__, p4d_val(e))
> +
>   static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
>   {
>   	if (pgtable_l4_enabled)
> @@ -166,6 +195,16 @@ static inline void p4d_clear(p4d_t *p4d)
>   		set_p4d(p4d, __p4d(0));
>   }
>   
> +static inline p4d_t pfn_p4d(unsigned long pfn, pgprot_t prot)
> +{
> +	return __p4d((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
> +}
> +
> +static inline unsigned long _p4d_pfn(p4d_t p4d)
> +{
> +	return p4d_val(p4d) >> _PAGE_PFN_SHIFT;
> +}
> +
>   static inline pud_t *p4d_pgtable(p4d_t p4d)
>   {
>   	if (pgtable_l4_enabled)
> @@ -190,4 +229,66 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
>   	return (pud_t *)p4d;
>   }
>   
> +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
> +{
> +	if (pgtable_l5_enabled)
> +		*pgdp = pgd;
> +	else
> +		set_p4d((p4d_t *)pgdp, (p4d_t){ pgd_val(pgd) });
> +}
> +
> +static inline int pgd_none(pgd_t pgd)
> +{
> +	if (pgtable_l5_enabled)
> +		return (pgd_val(pgd) == 0);
> +
> +	return 0;
> +}
> +
> +static inline int pgd_present(pgd_t pgd)
> +{
> +	if (pgtable_l5_enabled)
> +		return (pgd_val(pgd) & _PAGE_PRESENT);
> +
> +	return 1;
> +}
> +
> +static inline int pgd_bad(pgd_t pgd)
> +{
> +	if (pgtable_l5_enabled)
> +		return !pgd_present(pgd);
> +
> +	return 0;
> +}
> +
> +static inline void pgd_clear(pgd_t *pgd)
> +{
> +	if (pgtable_l5_enabled)
> +		set_pgd(pgd, __pgd(0));
> +}
> +
> +static inline p4d_t *pgd_pgtable(pgd_t pgd)
> +{
> +	if (pgtable_l5_enabled)
> +		return (p4d_t *)pfn_to_virt(pgd_val(pgd) >> _PAGE_PFN_SHIFT);
> +
> +	return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) });
> +}
> +
> +static inline struct page *pgd_page(pgd_t pgd)
> +{
> +	return pfn_to_page(pgd_val(pgd) >> _PAGE_PFN_SHIFT);
> +}
> +
> +#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
> +
> +#define p4d_offset p4d_offset
> +static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
> +{
> +	if (pgtable_l5_enabled)
> +		return pgd_pgtable(*pgd) + p4d_index(address);
> +
> +	return (p4d_t *)pgd;
> +}
> +
>   #endif /* _ASM_RISCV_PGTABLE_64_H */
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index 152fc5454c02..922cc436b36b 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -49,7 +49,8 @@
>    * position vmemmap directly below the VMALLOC region.
>    */
>   #ifdef CONFIG_64BIT
> -#define VA_BITS		(pgtable_l4_enabled ? 48 : 39)
> +#define VA_BITS		(pgtable_l5_enabled ? \
> +				57 : (pgtable_l4_enabled ? 48 : 39))
>   #else
>   #define VA_BITS		32
>   #endif
> @@ -89,7 +90,6 @@
>   
>   #ifndef __ASSEMBLY__
>   
> -#include <asm-generic/pgtable-nop4d.h>
>   #include <asm/page.h>
>   #include <asm/tlbflush.h>
>   #include <linux/mm_types.h>
> diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> index dea9b1c31889..e190bd205376 100644
> --- a/arch/riscv/kernel/cpu.c
> +++ b/arch/riscv/kernel/cpu.c
> @@ -78,7 +78,9 @@ static void print_mmu(struct seq_file *f)
>   #if defined(CONFIG_32BIT)
>   	strncpy(sv_type, "sv32", 5);
>   #elif defined(CONFIG_64BIT)
> -	if (pgtable_l4_enabled)
> +	if (pgtable_l5_enabled)
> +		strncpy(sv_type, "sv57", 5);
> +	else if (pgtable_l4_enabled)
>   		strncpy(sv_type, "sv48", 5);
>   	else
>   		strncpy(sv_type, "sv39", 5);
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index 0945ac9ed682..81822db8dd10 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -38,12 +38,15 @@ EXPORT_SYMBOL(kernel_map);
>   #endif
>   
>   #ifdef CONFIG_64BIT
> -u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_48 : SATP_MODE_39;
> +u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_57 : SATP_MODE_39;
>   #else
>   u64 satp_mode = SATP_MODE_32;
>   #endif
>   EXPORT_SYMBOL(satp_mode);
>   
> +bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL) ?
> +				true : false;
> +EXPORT_SYMBOL(pgtable_l5_enabled);
>   bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL) ?
>   				true : false;
>   EXPORT_SYMBOL(pgtable_l4_enabled);
> @@ -72,6 +75,8 @@ struct pt_alloc_ops {
>   	phys_addr_t (*alloc_pmd)(uintptr_t va);
>   	pud_t *(*get_pud_virt)(phys_addr_t pa);
>   	phys_addr_t (*alloc_pud)(uintptr_t va);
> +	p4d_t *(*get_p4d_virt)(phys_addr_t pa);
> +	phys_addr_t (*alloc_p4d)(uintptr_t va);
>   #endif
>   };
>   
> @@ -285,6 +290,7 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
>   static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
>   
>   pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
> +static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
>   static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
>   static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
>   
> @@ -374,6 +380,16 @@ static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
>   #define early_pmd      ((pmd_t *)XIP_FIXUP(early_pmd))
>   #endif /* CONFIG_XIP_KERNEL */
>   
> +static p4d_t trampoline_p4d[PTRS_PER_P4D] __page_aligned_bss;
> +static p4d_t fixmap_p4d[PTRS_PER_P4D] __page_aligned_bss;
> +static p4d_t early_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
> +
> +#ifdef CONFIG_XIP_KERNEL
> +#define trampoline_p4d ((p4d_t *)XIP_FIXUP(trampoline_p4d))
> +#define fixmap_p4d     ((p4d_t *)XIP_FIXUP(fixmap_p4d))
> +#define early_p4d      ((p4d_t *)XIP_FIXUP(early_p4d))
> +#endif /* CONFIG_XIP_KERNEL */
> +
>   static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss;
>   static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss;
>   static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
> @@ -487,6 +503,44 @@ static phys_addr_t alloc_pud_late(uintptr_t va)
>   	return __pa(vaddr);
>   }
>   
> +static p4d_t *__init get_p4d_virt_early(phys_addr_t pa)
> +{
> +	return (p4d_t *)((uintptr_t)pa);
> +}
> +
> +static p4d_t *__init get_p4d_virt_fixmap(phys_addr_t pa)
> +{
> +	clear_fixmap(FIX_P4D);
> +	return (p4d_t *)set_fixmap_offset(FIX_P4D, pa);
> +}
> +
> +static p4d_t *__init get_p4d_virt_late(phys_addr_t pa)
> +{
> +	return (p4d_t *)__va(pa);
> +}
> +
> +static phys_addr_t __init alloc_p4d_early(uintptr_t va)
> +{
> +	/* Only one P4D is available for early mapping */
> +	BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT);
> +
> +	return (uintptr_t)early_p4d;
> +}
> +
> +static phys_addr_t __init alloc_p4d_fixmap(uintptr_t va)
> +{
> +	return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
> +}
> +
> +static phys_addr_t alloc_p4d_late(uintptr_t va)
> +{
> +	unsigned long vaddr;
> +
> +	vaddr = __get_free_page(GFP_KERNEL);
> +	BUG_ON(!vaddr);
> +	return __pa(vaddr);
> +}
> +
>   static void __init create_pud_mapping(pud_t *pudp,
>   				      uintptr_t va, phys_addr_t pa,
>   				      phys_addr_t sz, pgprot_t prot)
> @@ -514,21 +568,55 @@ static void __init create_pud_mapping(pud_t *pudp,
>   	create_pmd_mapping(nextp, va, pa, sz, prot);
>   }
>   
> -#define pgd_next_t		pud_t
> -#define alloc_pgd_next(__va)	(pgtable_l4_enabled ?			\
> -		pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va))
> -#define get_pgd_next_virt(__pa)	(pgtable_l4_enabled ?			\
> -		pt_ops.get_pud_virt(__pa) : (pgd_next_t *)pt_ops.get_pmd_virt(__pa))
> +static void __init create_p4d_mapping(p4d_t *p4dp,
> +				      uintptr_t va, phys_addr_t pa,
> +				      phys_addr_t sz, pgprot_t prot)
> +{
> +	pud_t *nextp;
> +	phys_addr_t next_phys;
> +	uintptr_t p4d_index = p4d_index(va);
> +
> +	if (sz == P4D_SIZE) {
> +		if (p4d_val(p4dp[p4d_index]) == 0)
> +			p4dp[p4d_index] = pfn_p4d(PFN_DOWN(pa), prot);
> +		return;
> +	}
> +
> +	if (p4d_val(p4dp[p4d_index]) == 0) {
> +		next_phys = pt_ops.alloc_pud(va);
> +		p4dp[p4d_index] = pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE);
> +		nextp = pt_ops.get_pud_virt(next_phys);
> +		memset(nextp, 0, PAGE_SIZE);
> +	} else {
> +		next_phys = PFN_PHYS(_p4d_pfn(p4dp[p4d_index]));
> +		nextp = pt_ops.get_pud_virt(next_phys);
> +	}
> +
> +	create_pud_mapping(nextp, va, pa, sz, prot);
> +}
> +
> +#define pgd_next_t		p4d_t
> +#define alloc_pgd_next(__va)	(pgtable_l5_enabled ?			\
> +		pt_ops.alloc_p4d(__va) : (pgtable_l4_enabled ?		\
> +		pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va)))
> +#define get_pgd_next_virt(__pa)	(pgtable_l5_enabled ?			\
> +		pt_ops.get_p4d_virt(__pa) : (pgd_next_t *)(pgtable_l4_enabled ?	\
> +		pt_ops.get_pud_virt(__pa) : (pud_t *)pt_ops.get_pmd_virt(__pa)))
>   #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot)	\
> +				(pgtable_l5_enabled ?			\
> +		create_p4d_mapping(__nextp, __va, __pa, __sz, __prot) : \
>   				(pgtable_l4_enabled ?			\
> -		create_pud_mapping(__nextp, __va, __pa, __sz, __prot) :	\
> -		create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot))
> -#define fixmap_pgd_next		(pgtable_l4_enabled ?			\
> -		(uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd)
> -#define trampoline_pgd_next	(pgtable_l4_enabled ?			\
> -		(uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)
> -#define early_dtb_pgd_next	(pgtable_l4_enabled ?			\
> -		(uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)
> +		create_pud_mapping((pud_t *)__nextp, __va, __pa, __sz, __prot) :	\
> +		create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot)))
> +#define fixmap_pgd_next		(pgtable_l5_enabled ?			\
> +		(uintptr_t)fixmap_p4d : (pgtable_l4_enabled ?		\
> +		(uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd))
> +#define trampoline_pgd_next	(pgtable_l5_enabled ?			\
> +		(uintptr_t)trampoline_p4d : (pgtable_l4_enabled ?	\
> +		(uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd))
> +#define early_dtb_pgd_next	(pgtable_l5_enabled ?			\
> +		(uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ?	\
> +		(uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd))
>   #else
>   #define pgd_next_t		pte_t
>   #define alloc_pgd_next(__va)	pt_ops.alloc_pte(__va)
> @@ -537,6 +625,7 @@ static void __init create_pud_mapping(pud_t *pudp,
>   	create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
>   #define fixmap_pgd_next		((uintptr_t)fixmap_pte)
>   #define early_dtb_pgd_next	((uintptr_t)early_dtb_pmd)
> +#define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot)
>   #define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot)
>   #define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot)
>   #endif /* __PAGETABLE_PMD_FOLDED */
> @@ -627,6 +716,13 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
>   #endif /* CONFIG_STRICT_KERNEL_RWX */
>   
>   #ifdef CONFIG_64BIT
> +static void __init disable_pgtable_l5(void)
> +{
> +	pgtable_l5_enabled = false;
> +	kernel_map.page_offset = PAGE_OFFSET_L4;
> +	satp_mode = SATP_MODE_48;
> +}
> +
>   static void __init disable_pgtable_l4(void)
>   {
>   	pgtable_l4_enabled = false;
> @@ -643,8 +739,9 @@ static void __init disable_pgtable_l4(void)
>   static __init void set_satp_mode(uintptr_t dtb_pa)
>   {
>   	u64 identity_satp, hw_satp;
> -	uintptr_t set_satp_mode_pmd;
> +	uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
>   	int cpus_node;
> +	bool check_l4 = false;
>   
>   	/* Check if the user asked for sv39 explicitly in the device tree */
>   	cpus_node = fdt_path_offset((void *)dtb_pa, "/cpus");
> @@ -658,18 +755,31 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
>   				continue;
>   
>   			if (!strcmp(mmu_type, "riscv,sv39")) {
> +				disable_pgtable_l5();
>   				disable_pgtable_l4();
>   				return;
>   			}
>   
> +			if (!strcmp(mmu_type, "riscv,sv48")) {
> +				check_l4 = true;
> +			}
> +


If sv48 is set in the device tree, why would you test if it is supported 
below? I would take it as is, just like for sv39, I'm not sure we want 
to override this silently and make a wrong device tree work.


>   			break;
>   		}
>   	}
>   
> -	set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
> +retry:
> +	if (check_l4)
> +		disable_pgtable_l5();
> +
>   	create_pgd_mapping(early_pg_dir,
> -			   set_satp_mode_pmd, (uintptr_t)early_pud,
> +			   set_satp_mode_pmd,
> +			   check_l4 ? (uintptr_t)early_pud : (uintptr_t)early_p4d,
>   			   PGDIR_SIZE, PAGE_TABLE);
> +	if (!check_l4)
> +		create_p4d_mapping(early_p4d,
> +				set_satp_mode_pmd, (uintptr_t)early_pud,
> +				P4D_SIZE, PAGE_TABLE);
>   	create_pud_mapping(early_pud,
>   			   set_satp_mode_pmd, (uintptr_t)early_pmd,
>   			   PUD_SIZE, PAGE_TABLE);
> @@ -689,10 +799,16 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
>   	hw_satp = csr_swap(CSR_SATP, 0ULL);
>   	local_flush_tlb_all();
>   
> -	if (hw_satp != identity_satp)
> +	if (hw_satp != identity_satp) {
> +		if (!check_l4) {
> +			check_l4 = true;
> +			goto retry;
> +		}
>   		disable_pgtable_l4();
> +	}
>   
>   	memset(early_pg_dir, 0, PAGE_SIZE);
> +	memset(early_p4d, 0, PAGE_SIZE);
>   	memset(early_pud, 0, PAGE_SIZE);
>   	memset(early_pmd, 0, PAGE_SIZE);
>   }
> @@ -766,6 +882,10 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
>   			   PGDIR_SIZE,
>   			   IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
>   
> +	if (pgtable_l5_enabled)
> +		create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA,
> +				   (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE);
> +
>   	if (pgtable_l4_enabled)
>   		create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
>   				   (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
> @@ -802,6 +922,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
>   	pt_ops.get_pmd_virt = get_pmd_virt_early;
>   	pt_ops.alloc_pud = alloc_pud_early;
>   	pt_ops.get_pud_virt = get_pud_virt_early;
> +	pt_ops.alloc_p4d = alloc_p4d_early;
> +	pt_ops.get_p4d_virt = get_p4d_virt_early;
>   #endif
>   
>   	kernel_map.virt_addr = KERNEL_LINK_ADDR;
> @@ -855,6 +977,10 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
>   			   fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
>   
>   #ifndef __PAGETABLE_PMD_FOLDED
> +	/* Setup fixmap P4D and PUD */
> +	if (pgtable_l5_enabled)
> +		create_p4d_mapping(fixmap_p4d, FIXADDR_START,
> +				   (uintptr_t)fixmap_pud, P4D_SIZE, PAGE_TABLE);
>   	/* Setup fixmap PUD and PMD */
>   	if (pgtable_l4_enabled)
>   		create_pud_mapping(fixmap_pud, FIXADDR_START,
> @@ -864,6 +990,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
>   	/* Setup trampoline PGD and PMD */
>   	create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr,
>   			   trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE);
> +	if (pgtable_l5_enabled)
> +		create_p4d_mapping(trampoline_p4d, kernel_map.virt_addr,
> +				   (uintptr_t)trampoline_pud, P4D_SIZE, PAGE_TABLE);
>   	if (pgtable_l4_enabled)
>   		create_pud_mapping(trampoline_pud, kernel_map.virt_addr,
>   				   (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE);
> @@ -938,6 +1067,8 @@ static void __init setup_vm_final(void)
>   	pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
>   	pt_ops.alloc_pud = alloc_pud_fixmap;
>   	pt_ops.get_pud_virt = get_pud_virt_fixmap;
> +	pt_ops.alloc_p4d = alloc_p4d_fixmap;
> +	pt_ops.get_p4d_virt = get_p4d_virt_fixmap;
>   #endif
>   	/* Setup swapper PGD for fixmap */
>   	create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
> @@ -985,6 +1116,8 @@ static void __init setup_vm_final(void)
>   	pt_ops.get_pmd_virt = get_pmd_virt_late;
>   	pt_ops.alloc_pud = alloc_pud_late;
>   	pt_ops.get_pud_virt = get_pud_virt_late;
> +	pt_ops.alloc_p4d = alloc_p4d_late;
> +	pt_ops.get_p4d_virt = get_p4d_virt_late;
>   #endif
>   }
>   #else


You forgot to handle kasan in this patch. Actually, I'm updating kasan 
for the sv48 patchset after commit 54c5639d8f50 ("riscv: Fix asan-stack 
clang build") broke it. I'm struggling a bit as the kasan offset that is 
known at compile time must be the same for sv39, s48 and sv57, so we 
have to move the kasan region next to the kernel, but then it is not 
aligned on pgdir boundaries for sv48 and sv57, so the current kasan 
population functions must be adapted.

Anyway, I would advise you to wait for my updated patchset before 
tackling kasan for sv57.

Thanks,

Alex




More information about the linux-riscv mailing list