[PATCH v2 5/8] riscv: Implement sv48 support
Alex Ghiti
alex at ghiti.fr
Sat Jun 27 10:26:32 EDT 2020
Hi Nick,
Le 6/27/20 à 8:30 AM, Nick Kossifidis a écrit :
> Στις 2020-06-03 11:11, Alexandre Ghiti έγραψε:
>> By adding a new 4th level of page table, give the possibility to 64bit
>> kernel to address 2^48 bytes of virtual address: in practice, that
>> roughly
>> offers ~160TB of virtual address space to userspace and allows up to 64TB
>> of physical memory.
>>
>> If the underlying hardware does not support sv48, we will automatically
>> fallback to a standard 3-level page table by folding the new PUD level
>> into
>> PGDIR level. In order to detect HW capabilities at runtime, we
>> use SATP feature that ignores writes with an unsupported mode.
>>
>> Signed-off-by: Alexandre Ghiti <alex at ghiti.fr>
>> Reviewed-by: Anup Patel <anup at brainfault.org>
>> ---
>> arch/riscv/Kconfig | 6 +-
>> arch/riscv/include/asm/csr.h | 3 +-
>> arch/riscv/include/asm/fixmap.h | 1 +
>> arch/riscv/include/asm/page.h | 15 +++
>> arch/riscv/include/asm/pgalloc.h | 36 +++++++
>> arch/riscv/include/asm/pgtable-64.h | 97 ++++++++++++++++-
>> arch/riscv/include/asm/pgtable.h | 10 +-
>> arch/riscv/kernel/head.S | 3 +-
>> arch/riscv/mm/context.c | 2 +-
>> arch/riscv/mm/init.c | 158 +++++++++++++++++++++++++---
>> 10 files changed, 307 insertions(+), 24 deletions(-)
>>
>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>> index e167f16131f4..3f73f60e9732 100644
>> --- a/arch/riscv/Kconfig
>> +++ b/arch/riscv/Kconfig
>> @@ -68,6 +68,7 @@ config RISCV
>> select ARCH_HAS_GCOV_PROFILE_ALL
>> select HAVE_COPY_THREAD_TLS
>> select HAVE_ARCH_KASAN if MMU && 64BIT
>> + select RELOCATABLE if 64BIT
>>
>> config ARCH_MMAP_RND_BITS_MIN
>> default 18 if 64BIT
>> @@ -106,7 +107,7 @@ config PAGE_OFFSET
>> default 0xC0000000 if 32BIT && MAXPHYSMEM_2GB
>> default 0x80000000 if 64BIT && !MMU
>> default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB
>> - default 0xffffffe000000000 if 64BIT && !MAXPHYSMEM_2GB
>> + default 0xffffc00000000000 if 64BIT && !MAXPHYSMEM_2GB
>>
>> config ARCH_FLATMEM_ENABLE
>> def_bool y
>> @@ -155,8 +156,11 @@ config GENERIC_HWEIGHT
>> config FIX_EARLYCON_MEM
>> def_bool MMU
>>
>> +# On a 64BIT relocatable kernel, the 4-level page table is at runtime
>> folded
>> +# on a 3-level page table when sv48 is not supported.
>> config PGTABLE_LEVELS
>> int
>> + default 4 if 64BIT && RELOCATABLE
>> default 3 if 64BIT
>> default 2
>>
>> diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
>> index cec462e198ce..d41536c3f8d4 100644
>> --- a/arch/riscv/include/asm/csr.h
>> +++ b/arch/riscv/include/asm/csr.h
>> @@ -40,11 +40,10 @@
>> #ifndef CONFIG_64BIT
>> #define SATP_PPN _AC(0x003FFFFF, UL)
>> #define SATP_MODE_32 _AC(0x80000000, UL)
>> -#define SATP_MODE SATP_MODE_32
>> #else
>> #define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
>> #define SATP_MODE_39 _AC(0x8000000000000000, UL)
>> -#define SATP_MODE SATP_MODE_39
>> +#define SATP_MODE_48 _AC(0x9000000000000000, UL)
>> #endif
>>
>> /* Exception cause high bit - is an interrupt if set */
>> diff --git a/arch/riscv/include/asm/fixmap.h
>> b/arch/riscv/include/asm/fixmap.h
>> index 2368d49eb4ef..d891cf9c73c5 100644
>> --- a/arch/riscv/include/asm/fixmap.h
>> +++ b/arch/riscv/include/asm/fixmap.h
>> @@ -27,6 +27,7 @@ enum fixed_addresses {
>> FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
>> FIX_PTE,
>> FIX_PMD,
>> + FIX_PUD,
>> FIX_TEXT_POKE1,
>> FIX_TEXT_POKE0,
>> FIX_EARLYCON_MEM_BASE,
>> diff --git a/arch/riscv/include/asm/page.h
>> b/arch/riscv/include/asm/page.h
>> index 48bb09b6a9b7..5e77fe7f0d6d 100644
>> --- a/arch/riscv/include/asm/page.h
>> +++ b/arch/riscv/include/asm/page.h
>> @@ -31,7 +31,19 @@
>> * When not using MMU this corresponds to the first free page in
>> * physical memory (aligned on a page boundary).
>> */
>> +#ifdef CONFIG_RELOCATABLE
>> +#define PAGE_OFFSET __page_offset
>> +
>> +#ifdef CONFIG_64BIT
>> +/*
>> + * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address
>> space so
>> + * define the PAGE_OFFSET value for SV39.
>> + */
>> +#define PAGE_OFFSET_L3 0xffffffe000000000
>> +#endif /* CONFIG_64BIT */
>> +#else
>> #define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
>> +#endif /* CONFIG_RELOCATABLE */
>>
>> #define KERN_VIRT_SIZE (-PAGE_OFFSET)
>>
>> @@ -102,6 +114,9 @@ extern unsigned long pfn_base;
>> extern unsigned long max_low_pfn;
>> extern unsigned long min_low_pfn;
>> extern unsigned long kernel_virt_addr;
>> +#ifdef CONFIG_RELOCATABLE
>> +extern unsigned long __page_offset;
>> +#endif
>>
>> #define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) +
>> va_pa_offset))
>> #define linear_mapping_va_to_pa(x) ((unsigned long)(x) -
>> va_pa_offset)
>> diff --git a/arch/riscv/include/asm/pgalloc.h
>> b/arch/riscv/include/asm/pgalloc.h
>> index 3f601ee8233f..540eaa5a8658 100644
>> --- a/arch/riscv/include/asm/pgalloc.h
>> +++ b/arch/riscv/include/asm/pgalloc.h
>> @@ -36,6 +36,42 @@ static inline void pud_populate(struct mm_struct
>> *mm, pud_t *pud, pmd_t *pmd)
>>
>> set_pud(pud, __pud((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
>> }
>> +
>> +static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d,
>> pud_t *pud)
>> +{
>> + if (pgtable_l4_enabled) {
>> + unsigned long pfn = virt_to_pfn(pud);
>> +
>> + set_p4d(p4d, __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
>> + }
>> +}
>> +
>> +static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
>> + pud_t *pud)
>> +{
>> + if (pgtable_l4_enabled) {
>> + unsigned long pfn = virt_to_pfn(pud);
>> +
>> + set_p4d_safe(p4d,
>> + __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
>> + }
>> +}
>> +
>> +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned
>> long addr)
>> +{
>> + if (pgtable_l4_enabled)
>> + return (pud_t *)__get_free_page(
>> + GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_ZERO);
>> + return NULL;
>> +}
>> +
>> +static inline void pud_free(struct mm_struct *mm, pud_t *pud)
>> +{
>> + if (pgtable_l4_enabled)
>> + free_page((unsigned long)pud);
>> +}
>> +
>> +#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
>> #endif /* __PAGETABLE_PMD_FOLDED */
>>
>> #define pmd_pgtable(pmd) pmd_page(pmd)
>> diff --git a/arch/riscv/include/asm/pgtable-64.h
>> b/arch/riscv/include/asm/pgtable-64.h
>> index b15f70a1fdfa..c84c31fbf8da 100644
>> --- a/arch/riscv/include/asm/pgtable-64.h
>> +++ b/arch/riscv/include/asm/pgtable-64.h
>> @@ -8,16 +8,32 @@
>>
>> #include <linux/const.h>
>>
>> -#define PGDIR_SHIFT 30
>> +extern bool pgtable_l4_enabled;
>> +
>> +#define PGDIR_SHIFT (pgtable_l4_enabled ? 39 : 30)
>> /* Size of region mapped by a page global directory */
>> #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
>> #define PGDIR_MASK (~(PGDIR_SIZE - 1))
>>
>> +/* pud is folded into pgd in case of 3-level page table */
>> +#define PUD_SHIFT 30
>> +#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
>> +#define PUD_MASK (~(PUD_SIZE - 1))
>> +
>> #define PMD_SHIFT 21
>> /* Size of region mapped by a page middle directory */
>> #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
>> #define PMD_MASK (~(PMD_SIZE - 1))
>>
>> +/* Page Upper Directory entry */
>> +typedef struct {
>> + unsigned long pud;
>> +} pud_t;
>> +
>> +#define pud_val(x) ((x).pud)
>> +#define __pud(x) ((pud_t) { (x) })
>> +#define PTRS_PER_PUD (PAGE_SIZE / sizeof(pud_t))
>> +
>> /* Page Middle Directory entry */
>> typedef struct {
>> unsigned long pmd;
>> @@ -60,6 +76,16 @@ static inline void pud_clear(pud_t *pudp)
>> set_pud(pudp, __pud(0));
>> }
>>
>> +static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot)
>> +{
>> + return __pud((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
>> +}
>> +
>> +static inline unsigned long _pud_pfn(pud_t pud)
>> +{
>> + return pud_val(pud) >> _PAGE_PFN_SHIFT;
>> +}
>> +
>> static inline unsigned long pud_page_vaddr(pud_t pud)
>> {
>> return (unsigned long)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT);
>> @@ -70,6 +96,15 @@ static inline struct page *pud_page(pud_t pud)
>> return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
>> }
>>
>> +#define mm_pud_folded mm_pud_folded
>> +static inline bool mm_pud_folded(struct mm_struct *mm)
>> +{
>> + if (pgtable_l4_enabled)
>> + return false;
>> +
>> + return true;
>> +}
>> +
>> #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
>>
>> static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
>> @@ -90,4 +125,64 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
>> #define pmd_ERROR(e) \
>> pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
>>
>> +#define pud_ERROR(e) \
>> + pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
>> +
>> +static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
>> +{
>> + if (pgtable_l4_enabled)
>> + *p4dp = p4d;
>> + else
>> + set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) });
>> +}
>> +
>> +static inline int p4d_none(p4d_t p4d)
>> +{
>> + if (pgtable_l4_enabled)
>> + return (p4d_val(p4d) == 0);
>> +
>> + return 0;
>> +}
>> +
>> +static inline int p4d_present(p4d_t p4d)
>> +{
>> + if (pgtable_l4_enabled)
>> + return (p4d_val(p4d) & _PAGE_PRESENT);
>> +
>> + return 1;
>> +}
>> +
>> +static inline int p4d_bad(p4d_t p4d)
>> +{
>> + if (pgtable_l4_enabled)
>> + return !p4d_present(p4d);
>> +
>> + return 0;
>> +}
>> +
>> +static inline void p4d_clear(p4d_t *p4d)
>> +{
>> + if (pgtable_l4_enabled)
>> + set_p4d(p4d, __p4d(0));
>> +}
>> +
>> +static inline unsigned long p4d_page_vaddr(p4d_t p4d)
>> +{
>> + if (pgtable_l4_enabled)
>> + return (unsigned long)pfn_to_virt(
>> + p4d_val(p4d) >> _PAGE_PFN_SHIFT);
>> +
>> + return pud_page_vaddr((pud_t) { p4d_val(p4d) });
>> +}
>> +
>> +#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
>> +
>> +static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
>> +{
>> + if (pgtable_l4_enabled)
>> + return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
>> +
>> + return (pud_t *)p4d;
>> +}
>> +
>
> In my test I had to put
> #define pud_offset pud_offset
> here or else I got a compilation error due to pud_offset being redefined
> on include/linux/pgtable.h:
>
> #ifndef pud_offset
> static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
> {
> return (pud_t *)p4d_page_vaddr(*p4d) + pud_index(address);
> }
> #define pud_offset pud_offset
> #endif
Yes, the rebase on 5.8-rc2 requires that and removing pmd_offset definition.
Alex
More information about the linux-riscv
mailing list