[PATCH v2 5/8] riscv: Implement sv48 support

Nick Kossifidis mick at ics.forth.gr
Sat Jun 27 08:30:50 EDT 2020


Στις 2020-06-03 11:11, Alexandre Ghiti έγραψε:
> By adding a new 4th level of page table, give the possibility to 64bit
> kernel to address 2^48 bytes of virtual address: in practice, that 
> roughly
> offers ~160TB of virtual address space to userspace and allows up to 
> 64TB
> of physical memory.
> 
> If the underlying hardware does not support sv48, we will automatically
> fallback to a standard 3-level page table by folding the new PUD level 
> into
> PGDIR level. In order to detect HW capabilities at runtime, we
> use SATP feature that ignores writes with an unsupported mode.
> 
> Signed-off-by: Alexandre Ghiti <alex at ghiti.fr>
> Reviewed-by: Anup Patel <anup at brainfault.org>
> ---
>  arch/riscv/Kconfig                  |   6 +-
>  arch/riscv/include/asm/csr.h        |   3 +-
>  arch/riscv/include/asm/fixmap.h     |   1 +
>  arch/riscv/include/asm/page.h       |  15 +++
>  arch/riscv/include/asm/pgalloc.h    |  36 +++++++
>  arch/riscv/include/asm/pgtable-64.h |  97 ++++++++++++++++-
>  arch/riscv/include/asm/pgtable.h    |  10 +-
>  arch/riscv/kernel/head.S            |   3 +-
>  arch/riscv/mm/context.c             |   2 +-
>  arch/riscv/mm/init.c                | 158 +++++++++++++++++++++++++---
>  10 files changed, 307 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index e167f16131f4..3f73f60e9732 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -68,6 +68,7 @@ config RISCV
>  	select ARCH_HAS_GCOV_PROFILE_ALL
>  	select HAVE_COPY_THREAD_TLS
>  	select HAVE_ARCH_KASAN if MMU && 64BIT
> +	select RELOCATABLE if 64BIT
> 
>  config ARCH_MMAP_RND_BITS_MIN
>  	default 18 if 64BIT
> @@ -106,7 +107,7 @@ config PAGE_OFFSET
>  	default 0xC0000000 if 32BIT && MAXPHYSMEM_2GB
>  	default 0x80000000 if 64BIT && !MMU
>  	default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB
> -	default 0xffffffe000000000 if 64BIT && !MAXPHYSMEM_2GB
> +	default 0xffffc00000000000 if 64BIT && !MAXPHYSMEM_2GB
> 
>  config ARCH_FLATMEM_ENABLE
>  	def_bool y
> @@ -155,8 +156,11 @@ config GENERIC_HWEIGHT
>  config FIX_EARLYCON_MEM
>  	def_bool MMU
> 
> +# On a 64BIT relocatable kernel, the 4-level page table is at runtime 
> folded
> +# on a 3-level page table when sv48 is not supported.
>  config PGTABLE_LEVELS
>  	int
> +	default 4 if 64BIT && RELOCATABLE
>  	default 3 if 64BIT
>  	default 2
> 
> diff --git a/arch/riscv/include/asm/csr.h 
> b/arch/riscv/include/asm/csr.h
> index cec462e198ce..d41536c3f8d4 100644
> --- a/arch/riscv/include/asm/csr.h
> +++ b/arch/riscv/include/asm/csr.h
> @@ -40,11 +40,10 @@
>  #ifndef CONFIG_64BIT
>  #define SATP_PPN	_AC(0x003FFFFF, UL)
>  #define SATP_MODE_32	_AC(0x80000000, UL)
> -#define SATP_MODE	SATP_MODE_32
>  #else
>  #define SATP_PPN	_AC(0x00000FFFFFFFFFFF, UL)
>  #define SATP_MODE_39	_AC(0x8000000000000000, UL)
> -#define SATP_MODE	SATP_MODE_39
> +#define SATP_MODE_48	_AC(0x9000000000000000, UL)
>  #endif
> 
>  /* Exception cause high bit - is an interrupt if set */
> diff --git a/arch/riscv/include/asm/fixmap.h 
> b/arch/riscv/include/asm/fixmap.h
> index 2368d49eb4ef..d891cf9c73c5 100644
> --- a/arch/riscv/include/asm/fixmap.h
> +++ b/arch/riscv/include/asm/fixmap.h
> @@ -27,6 +27,7 @@ enum fixed_addresses {
>  	FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
>  	FIX_PTE,
>  	FIX_PMD,
> +	FIX_PUD,
>  	FIX_TEXT_POKE1,
>  	FIX_TEXT_POKE0,
>  	FIX_EARLYCON_MEM_BASE,
> diff --git a/arch/riscv/include/asm/page.h 
> b/arch/riscv/include/asm/page.h
> index 48bb09b6a9b7..5e77fe7f0d6d 100644
> --- a/arch/riscv/include/asm/page.h
> +++ b/arch/riscv/include/asm/page.h
> @@ -31,7 +31,19 @@
>   * When not using MMU this corresponds to the first free page in
>   * physical memory (aligned on a page boundary).
>   */
> +#ifdef CONFIG_RELOCATABLE
> +#define PAGE_OFFSET		__page_offset
> +
> +#ifdef CONFIG_64BIT
> +/*
> + * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address 
> space so
> + * define the PAGE_OFFSET value for SV39.
> + */
> +#define PAGE_OFFSET_L3		0xffffffe000000000
> +#endif /* CONFIG_64BIT */
> +#else
>  #define PAGE_OFFSET		_AC(CONFIG_PAGE_OFFSET, UL)
> +#endif /* CONFIG_RELOCATABLE */
> 
>  #define KERN_VIRT_SIZE (-PAGE_OFFSET)
> 
> @@ -102,6 +114,9 @@ extern unsigned long pfn_base;
>  extern unsigned long max_low_pfn;
>  extern unsigned long min_low_pfn;
>  extern unsigned long kernel_virt_addr;
> +#ifdef CONFIG_RELOCATABLE
> +extern unsigned long __page_offset;
> +#endif
> 
>  #define __pa_to_va_nodebug(x)	((void *)((unsigned long) (x) + 
> va_pa_offset))
>  #define linear_mapping_va_to_pa(x)	((unsigned long)(x) - va_pa_offset)
> diff --git a/arch/riscv/include/asm/pgalloc.h 
> b/arch/riscv/include/asm/pgalloc.h
> index 3f601ee8233f..540eaa5a8658 100644
> --- a/arch/riscv/include/asm/pgalloc.h
> +++ b/arch/riscv/include/asm/pgalloc.h
> @@ -36,6 +36,42 @@ static inline void pud_populate(struct mm_struct
> *mm, pud_t *pud, pmd_t *pmd)
> 
>  	set_pud(pud, __pud((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
>  }
> +
> +static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, 
> pud_t *pud)
> +{
> +	if (pgtable_l4_enabled) {
> +		unsigned long pfn = virt_to_pfn(pud);
> +
> +		set_p4d(p4d, __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
> +	}
> +}
> +
> +static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
> +				     pud_t *pud)
> +{
> +	if (pgtable_l4_enabled) {
> +		unsigned long pfn = virt_to_pfn(pud);
> +
> +		set_p4d_safe(p4d,
> +			     __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
> +	}
> +}
> +
> +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long 
> addr)
> +{
> +	if (pgtable_l4_enabled)
> +		return (pud_t *)__get_free_page(
> +				GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
> +	return NULL;
> +}
> +
> +static inline void pud_free(struct mm_struct *mm, pud_t *pud)
> +{
> +	if (pgtable_l4_enabled)
> +		free_page((unsigned long)pud);
> +}
> +
> +#define __pud_free_tlb(tlb, pud, addr)  pud_free((tlb)->mm, pud)
>  #endif /* __PAGETABLE_PMD_FOLDED */
> 
>  #define pmd_pgtable(pmd)	pmd_page(pmd)
> diff --git a/arch/riscv/include/asm/pgtable-64.h
> b/arch/riscv/include/asm/pgtable-64.h
> index b15f70a1fdfa..c84c31fbf8da 100644
> --- a/arch/riscv/include/asm/pgtable-64.h
> +++ b/arch/riscv/include/asm/pgtable-64.h
> @@ -8,16 +8,32 @@
> 
>  #include <linux/const.h>
> 
> -#define PGDIR_SHIFT     30
> +extern bool pgtable_l4_enabled;
> +
> +#define PGDIR_SHIFT     (pgtable_l4_enabled ? 39 : 30)
>  /* Size of region mapped by a page global directory */
>  #define PGDIR_SIZE      (_AC(1, UL) << PGDIR_SHIFT)
>  #define PGDIR_MASK      (~(PGDIR_SIZE - 1))
> 
> +/* pud is folded into pgd in case of 3-level page table */
> +#define PUD_SHIFT	30
> +#define PUD_SIZE	(_AC(1, UL) << PUD_SHIFT)
> +#define PUD_MASK	(~(PUD_SIZE - 1))
> +
>  #define PMD_SHIFT       21
>  /* Size of region mapped by a page middle directory */
>  #define PMD_SIZE        (_AC(1, UL) << PMD_SHIFT)
>  #define PMD_MASK        (~(PMD_SIZE - 1))
> 
> +/* Page Upper Directory entry */
> +typedef struct {
> +	unsigned long pud;
> +} pud_t;
> +
> +#define pud_val(x)      ((x).pud)
> +#define __pud(x)        ((pud_t) { (x) })
> +#define PTRS_PER_PUD    (PAGE_SIZE / sizeof(pud_t))
> +
>  /* Page Middle Directory entry */
>  typedef struct {
>  	unsigned long pmd;
> @@ -60,6 +76,16 @@ static inline void pud_clear(pud_t *pudp)
>  	set_pud(pudp, __pud(0));
>  }
> 
> +static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot)
> +{
> +	return __pud((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
> +}
> +
> +static inline unsigned long _pud_pfn(pud_t pud)
> +{
> +	return pud_val(pud) >> _PAGE_PFN_SHIFT;
> +}
> +
>  static inline unsigned long pud_page_vaddr(pud_t pud)
>  {
>  	return (unsigned long)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT);
> @@ -70,6 +96,15 @@ static inline struct page *pud_page(pud_t pud)
>  	return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
>  }
> 
> +#define mm_pud_folded	mm_pud_folded
> +static inline bool mm_pud_folded(struct mm_struct *mm)
> +{
> +	if (pgtable_l4_enabled)
> +		return false;
> +
> +	return true;
> +}
> +
>  #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
> 
>  static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
> @@ -90,4 +125,64 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
>  #define pmd_ERROR(e) \
>  	pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
> 
> +#define pud_ERROR(e)	\
> +	pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
> +
> +static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
> +{
> +	if (pgtable_l4_enabled)
> +		*p4dp = p4d;
> +	else
> +		set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) });
> +}
> +
> +static inline int p4d_none(p4d_t p4d)
> +{
> +	if (pgtable_l4_enabled)
> +		return (p4d_val(p4d) == 0);
> +
> +	return 0;
> +}
> +
> +static inline int p4d_present(p4d_t p4d)
> +{
> +	if (pgtable_l4_enabled)
> +		return (p4d_val(p4d) & _PAGE_PRESENT);
> +
> +	return 1;
> +}
> +
> +static inline int p4d_bad(p4d_t p4d)
> +{
> +	if (pgtable_l4_enabled)
> +		return !p4d_present(p4d);
> +
> +	return 0;
> +}
> +
> +static inline void p4d_clear(p4d_t *p4d)
> +{
> +	if (pgtable_l4_enabled)
> +		set_p4d(p4d, __p4d(0));
> +}
> +
> +static inline unsigned long p4d_page_vaddr(p4d_t p4d)
> +{
> +	if (pgtable_l4_enabled)
> +		return (unsigned long)pfn_to_virt(
> +				p4d_val(p4d) >> _PAGE_PFN_SHIFT);
> +
> +	return pud_page_vaddr((pud_t) { p4d_val(p4d) });
> +}
> +
> +#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
> +
> +static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
> +{
> +	if (pgtable_l4_enabled)
> +		return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
> +
> +	return (pud_t *)p4d;
> +}
> +

In my test I had to put
#define pud_offset pud_offset
here or else I got a compilation error due to pud_offset being redefined 
on include/linux/pgtable.h:

#ifndef pud_offset
static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
{
         return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
}
#define pud_offset pud_offset
#endif



More information about the linux-riscv mailing list