[PATCH V3 3/3] arm64: Extend early page table code to allow for larger kernels
Steve Capper
steve.capper at arm.com
Wed Jan 3 08:20:11 PST 2018
On Tue, Jan 02, 2018 at 10:01:29PM +0000, Ard Biesheuvel wrote:
> Hi Steve,
Hi Ard,
>
> On 2 January 2018 at 15:12, Steve Capper <steve.capper at arm.com> wrote:
> > Currently the early assembler page table code assumes that precisely
> > 1xpgd, 1xpud, 1xpmd are sufficient to represent the early kernel text
> > mappings.
> >
> > Unfortunately this is rarely the case when running with a 16KB granule,
> > and we also run into limits with 4KB granule when building much larger
> > kernels.
> >
> > This patch re-writes the early page table logic to compute indices of
> > mappings for each level of page table, and if multiple indices are
> > required, the next-level page table is scaled up accordingly.
> >
> > Also the required size of the swapper_pg_dir is computed at link time
> > to cover the mapping [KIMAGE_ADDR + VOFFSET, _end]. When KASLR is
> > enabled, an extra page is set aside for each level that may require extra
> > entries at runtime.
> >
> > Signed-off-by: Steve Capper <steve.capper at arm.com>
> >
> > ---
> > Changed in V3:
> > Corrected KASLR computation
> > Rebased against arm64/for-next/core, particularly Kristina's 52-bit
> > PA series.
> > ---
> > arch/arm64/include/asm/kernel-pgtable.h | 47 ++++++++++-
> > arch/arm64/include/asm/pgtable.h | 1 +
> > arch/arm64/kernel/head.S | 145 +++++++++++++++++++++++---------
> > arch/arm64/kernel/vmlinux.lds.S | 1 +
> > arch/arm64/mm/mmu.c | 3 +-
> > 5 files changed, 157 insertions(+), 40 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
> > index 77a27af01371..82386e860dd2 100644
> > --- a/arch/arm64/include/asm/kernel-pgtable.h
> > +++ b/arch/arm64/include/asm/kernel-pgtable.h
> > @@ -52,7 +52,52 @@
> > #define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT))
> > #endif
> >
> > -#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
> > +
> > +/*
> > + * If KASLR is enabled, then an offset K is added to the kernel address
> > + * space. The bottom 21 bits of this offset are zero to guarantee 2MB
> > + * alignment for PA and VA.
> > + *
> > + * For each pagetable level of the swapper, we know that the shift will
> > + * be larger than 21 (for the 4KB granule case we use section maps thus
> > + * the smallest shift is actually 30) thus there is the possibility that
> > + * KASLR can increase the number of pagetable entries by 1, so we make
> > + * room for this extra entry.
> > + *
> > + * Note KASLR cannot increase the number of required entries for a level
> > + * by more than one because it increments both the virtual start and end
> > + * addresses equally (the extra entry comes from the case where the end
> > + * address is just pushed over a boundary and the start address isn't).
> > + */
> > +
> > +#ifdef CONFIG_RANDOMIZE_BASE
> > +#define EARLY_KASLR (1)
> > +#else
> > +#define EARLY_KASLR (0)
> > +#endif
> > +
> > +#define EARLY_ENTRIES(vstart, vend, shift) (((vend) >> (shift)) \
> > + - ((vstart) >> (shift)) + 1 + EARLY_KASLR)
> > +
> > +#define EARLY_PGDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT))
> > +
> > +#if SWAPPER_PGTABLE_LEVELS > 3
> > +#define EARLY_PUDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PUD_SHIFT))
> > +#else
> > +#define EARLY_PUDS(vstart, vend) (0)
> > +#endif
> > +
> > +#if SWAPPER_PGTABLE_LEVELS > 2
> > +#define EARLY_PMDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, SWAPPER_TABLE_SHIFT))
> > +#else
> > +#define EARLY_PMDS(vstart, vend) (0)
> > +#endif
> > +
> > +#define EARLY_PAGES(vstart, vend) ( 1 /* PGDIR page */ \
> > + + EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \
> > + + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \
> > + + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */
> > +#define SWAPPER_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end))
> > #define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
> >
> > #ifdef CONFIG_ARM64_SW_TTBR0_PAN
> > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> > index bfa237e892f1..54b0a8398055 100644
> > --- a/arch/arm64/include/asm/pgtable.h
> > +++ b/arch/arm64/include/asm/pgtable.h
> > @@ -706,6 +706,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
> > #endif
> >
> > extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
> > +extern pgd_t swapper_pg_end[];
> > extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
> > extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
> >
> > diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> > index 66f01869e97c..539e2642ed41 100644
> > --- a/arch/arm64/kernel/head.S
> > +++ b/arch/arm64/kernel/head.S
> > @@ -191,44 +191,110 @@ ENDPROC(preserve_boot_args)
> > .endm
> >
> > /*
> > - * Macro to populate the PGD (and possibily PUD) for the corresponding
> > - * block entry in the next level (tbl) for the given virtual address.
> > + * Macro to populate page table entries, these entries can be pointers to the next level
> > + * or last level entries pointing to physical memory.
> > *
> > - * Preserves: tbl, next, virt
> > - * Corrupts: ptrs_per_pgd, tmp1, tmp2
> > + * tbl: page table address
> > + * rtbl: pointer to page table or physical memory
> > + * index: start index to write
> > + * eindex: end index to write - [index, eindex] written to
> > + * flags: flags for pagetable entry to or in
> > + * inc: increment to rtbl between each entry
> > + * tmp1: temporary variable
> > + *
> > + * Preserves: tbl, eindex, flags, inc
> > + * Corrupts: index, tmp1
> > + * Returns: rtbl
> > */
> > - .macro create_pgd_entry, tbl, virt, ptrs_per_pgd, tmp1, tmp2
> > - create_table_entry \tbl, \virt, PGDIR_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2
> > -#if SWAPPER_PGTABLE_LEVELS > 3
> > - mov \ptrs_per_pgd, PTRS_PER_PUD
> > - create_table_entry \tbl, \virt, PUD_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2
> > -#endif
> > -#if SWAPPER_PGTABLE_LEVELS > 2
> > - mov \ptrs_per_pgd, PTRS_PER_PTE
> > - create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2
> > -#endif
> > + .macro populate_entries, tbl, rtbl, index, eindex, flags, inc, tmp1
> > +9999: phys_to_pte \rtbl, \tmp1
>
> I know this is existing code, but you could take the opportunity to
> replace this label with
>
> .L\@ :
>
> (and update the branch instruction accordingly) so that we don't have
> to rely on the uniqueness of '9999'
Thanks, I must confess I was unaware of that technique and had to look
it up; I will incorporate this into the patch.
>
> > + orr \tmp1, \tmp1, \flags // tmp1 = table entry
> > + str \tmp1, [\tbl, \index, lsl #3]
> > + add \rtbl, \rtbl, \inc // rtbl = pa next level
> > + add \index, \index, #1
> > + cmp \index, \eindex
> > + b.ls 9999b
> > .endm
> >
> > /*
> > - * Macro to populate block entries in the page table for the start..end
> > - * virtual range (inclusive).
> > + * Compute indices of table entries from virtual address range. If multiple entries
> > + * were needed in the previous page table level then the next page table level is assumed
> > + * to be composed of multiple pages. (This effectively scales the end index).
> > + *
> > + * vstart: virtual address of start of range
> > + * vend: virtual address of end of range
> > + * shift: shift used to transform virtual address into index
> > + * ptrs: number of entries in page table
> > + * istart: index in table corresponding to vstart
> > + * iend: index in table corresponding to vend
> > + * count: On entry: how many entries required in previous level, scales our end index
> > + * On exit: returns how many entries required for next page table level
> > *
>
> If you make 'count' the number of /additional/ entries, you no longer
> have to add/sub #1 each time.
Agreed, I'll simplify this.
>
> > - * Preserves: tbl, flags
> > - * Corrupts: phys, start, end, tmp, pstate
> > + * Preserves: vstart, vend, shift, ptrs
> > + * Returns: istart, iend, count
> > */
> > - .macro create_block_map, tbl, flags, phys, start, end, tmp
> > - lsr \start, \start, #SWAPPER_BLOCK_SHIFT
> > - and \start, \start, #PTRS_PER_PTE - 1 // table index
> > - bic \phys, \phys, #SWAPPER_BLOCK_SIZE - 1
> > - lsr \end, \end, #SWAPPER_BLOCK_SHIFT
> > - and \end, \end, #PTRS_PER_PTE - 1 // table end index
> > -9999: phys_to_pte \phys, \tmp
> > - orr \tmp, \tmp, \flags // table entry
> > - str \tmp, [\tbl, \start, lsl #3] // store the entry
> > - add \start, \start, #1 // next entry
> > - add \phys, \phys, #SWAPPER_BLOCK_SIZE // next block
> > - cmp \start, \end
> > - b.ls 9999b
> > + .macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count
> > + lsr \iend, \vend, \shift
> > + mov \istart, \ptrs
> > + sub \istart, \istart, #1
> > + and \iend, \iend, \istart // iend = (vend >> shift) & (ptrs - 1)
> > + mov \istart, \ptrs
> > + sub \count, \count, #1
> > + mul \istart, \istart, \count
> > + add \iend, \iend, \istart // iend += (count - 1) * ptrs
> > + // our entries span multiple tables
> > +
> > + lsr \istart, \vstart, \shift
> > + mov \count, \ptrs
> > + sub \count, \count, #1
> > + and \istart, \istart, \count
> > +
> > + sub \count, \iend, \istart
> > + add \count, \count, #1
> > + .endm
> > +
>
> You can simplify this macro by using an immediate for \ptrs. Please
> see the diff below [whitespace mangling courtesy of Gmail]
Thanks, I like the simplification a lot. For 52-bit kernel VAs though, I
will need a variable PTRS_PER_PGD at compile time.
For 52-bit kernel PAs one can just use the maximum number of ptrs available.
For a 48-bit PA the leading address bits will always be zero, thus we
will derive the same PGD indices from both 48 and 52 bit PTRS_PER_PGD.
For kernel VAs, because the leading address bits are one, we need to use a
PTRS_PER_PGD corresponding to the VA size.
>
> > +/*
> > + * Map memory for specified virtual address range. Each level of page table needed supports
> > + * multiple entries. If a level requires n entries the next page table level is assumed to be
> > + * formed from n pages.
> > + *
> > + * tbl: location of page table
> > + * rtbl: address to be used for first level page table entry (typically tbl + PAGE_SIZE)
> > + * vstart: start address to map
> > + * vend: end address to map - we map [vstart, vend]
> > + * flags: flags to use to map last level entries
> > + * phys: physical address corresponding to vstart - physical memory is contiguous
> > + * pgds: the number of pgd entries
> > + *
> > + * Temporaries: istart, iend, tmp, count, sv - these need to be different registers
> > + * Preserves: vstart, vend, flags
> > + * Corrupts: tbl, rtbl, istart, iend, tmp, count, sv
> > + */
> > + .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv
> > + add \rtbl, \tbl, #PAGE_SIZE
> > + mov \sv, \rtbl
> > + mov \count, #1
>
> #0 if you make \count the number of additional entries.
>
> In any case, for the series:
>
> Tested-by: Ard Biesheuvel <ard.biesheuvel at linaro.org>
> Reviewed-by: Ard Biesheuvel <ard.biesheuvel at linaro.org>
Thanks!
>
> > + compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count
> > + populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
> > + mov \tbl, \sv
> > + mov \sv, \rtbl
> > +
> > +#if SWAPPER_PGTABLE_LEVELS > 3
> > + compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count
> > + populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
> > + mov \tbl, \sv
> > + mov \sv, \rtbl
> > +#endif
> > +
> > +#if SWAPPER_PGTABLE_LEVELS > 2
> > + compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count
> > + populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
> > + mov \tbl, \sv
> > +#endif
> > +
> > + compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count
> > + bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1
> > + populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
> > .endm
> >
> > /*
> > @@ -246,14 +312,16 @@ __create_page_tables:
> > * dirty cache lines being evicted.
> > */
> > adrp x0, idmap_pg_dir
> > - ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
> > + adrp x1, swapper_pg_end
> > + sub x1, x1, x0
> > bl __inval_dcache_area
> >
> > /*
> > * Clear the idmap and swapper page tables.
> > */
> > adrp x0, idmap_pg_dir
> > - ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
> > + adrp x1, swapper_pg_end
> > + sub x1, x1, x0
> > 1: stp xzr, xzr, [x0], #16
> > stp xzr, xzr, [x0], #16
> > stp xzr, xzr, [x0], #16
> > @@ -318,10 +386,10 @@ __create_page_tables:
> > #endif
> > 1:
> > ldr_l x4, idmap_ptrs_per_pgd
> > - create_pgd_entry x0, x3, x4, x5, x6
> > mov x5, x3 // __pa(__idmap_text_start)
> > adr_l x6, __idmap_text_end // __pa(__idmap_text_end)
> > - create_block_map x0, x7, x3, x5, x6, x4
> > +
> > + map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14
> >
> > /*
> > * Map the kernel image (starting with PHYS_OFFSET).
> > @@ -330,12 +398,12 @@ __create_page_tables:
> > mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text)
> > add x5, x5, x23 // add KASLR displacement
> > mov x4, PTRS_PER_PGD
> > - create_pgd_entry x0, x5, x4, x3, x6
> > adrp x6, _end // runtime __pa(_end)
> > adrp x3, _text // runtime __pa(_text)
> > sub x6, x6, x3 // _end - _text
> > add x6, x6, x5 // runtime __va(_end)
> > - create_block_map x0, x7, x3, x5, x6, x4
> > +
> > + map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14
> >
> > /*
> > * Since the page tables have been populated with non-cacheable
> > @@ -343,7 +411,8 @@ __create_page_tables:
> > * tables again to remove any speculatively loaded cache lines.
> > */
> > adrp x0, idmap_pg_dir
> > - ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
> > + adrp x1, swapper_pg_end
> > + sub x1, x1, x0
> > dmb sy
> > bl __inval_dcache_area
> >
> > diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> > index 4c7112a47469..0221aca6493d 100644
> > --- a/arch/arm64/kernel/vmlinux.lds.S
> > +++ b/arch/arm64/kernel/vmlinux.lds.S
> > @@ -230,6 +230,7 @@ SECTIONS
> > #endif
> > swapper_pg_dir = .;
> > . += SWAPPER_DIR_SIZE;
> > + swapper_pg_end = .;
> >
> > __pecoff_data_size = ABSOLUTE(. - __initdata_begin);
> > _end = .;
> > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> > index 4071602031ed..fdac11979bae 100644
> > --- a/arch/arm64/mm/mmu.c
> > +++ b/arch/arm64/mm/mmu.c
> > @@ -644,7 +644,8 @@ void __init paging_init(void)
> > * allocated with it.
> > */
> > memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE,
> > - SWAPPER_DIR_SIZE - PAGE_SIZE);
> > + __pa_symbol(swapper_pg_end) - __pa_symbol(swapper_pg_dir)
> > + - PAGE_SIZE);
> > }
> >
> > /*
> > --
> > 2.11.0
> >
>
>
> ------8<---------
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 539e2642ed41..0432dd8d083c 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -235,9 +235,7 @@ ENDPROC(preserve_boot_args)
> */
> .macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count
> lsr \iend, \vend, \shift
> - mov \istart, \ptrs
> - sub \istart, \istart, #1
> - and \iend, \iend, \istart // iend = (vend >> shift) & (ptrs - 1)
> + and \iend, \iend, \ptrs - 1 // iend = (vend >> shift) & (ptrs - 1)
> mov \istart, \ptrs
> sub \count, \count, #1
> mul \istart, \istart, \count
> @@ -245,9 +243,7 @@ ENDPROC(preserve_boot_args)
> // our entries span multiple tables
>
> lsr \istart, \vstart, \shift
> - mov \count, \ptrs
> - sub \count, \count, #1
> - and \istart, \istart, \count
> + and \istart, \istart, \ptrs - 1
>
> sub \count, \iend, \istart
> add \count, \count, #1
> @@ -376,6 +372,7 @@ __create_page_tables:
>
> mov x4, EXTRA_PTRS
> create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
> + .set .Lidmap_ptrs_per_pgd, PTRS_PER_PGD
> #else
> /*
> * If VA_BITS == 48, we don't have to configure an additional
> @@ -383,13 +380,13 @@ __create_page_tables:
> */
> mov x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
> str_l x4, idmap_ptrs_per_pgd, x5
> + .set .Lidmap_ptrs_per_pgd, 1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
> #endif
> 1:
> - ldr_l x4, idmap_ptrs_per_pgd
> mov x5, x3 // __pa(__idmap_text_start)
> adr_l x6, __idmap_text_end // __pa(__idmap_text_end)
>
> - map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14
> + map_memory x0, x1, x3, x6, x7, x3, .Lidmap_ptrs_per_pgd, x10,
> x11, x12, x13, x14
>
> /*
> * Map the kernel image (starting with PHYS_OFFSET).
> @@ -397,13 +394,12 @@ __create_page_tables:
> adrp x0, swapper_pg_dir
> mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text)
> add x5, x5, x23 // add KASLR displacement
> - mov x4, PTRS_PER_PGD
> adrp x6, _end // runtime __pa(_end)
> adrp x3, _text // runtime __pa(_text)
> sub x6, x6, x3 // _end - _text
> add x6, x6, x5 // runtime __va(_end)
>
> - map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14
> + map_memory x0, x1, x5, x6, x7, x3, PTRS_PER_PGD, x10, x11, x12, x13, x14
>
> /*
> * Since the page tables have been populated with non-cacheable
Cheers,
--
Steve
More information about the linux-arm-kernel
mailing list