[PATCH 09/10] arm64: allow ID map to be extended to 52 bits

Marc Zyngier marc.zyngier at arm.com
Fri Dec 15 07:37:46 PST 2017


On 13/12/17 17:07, Kristina Martsenko wrote:
> Currently, when using VA_BITS < 48, if the ID map text happens to be
> placed in physical memory above VA_BITS, we increase the VA size (up to
> 48) and create a new table level, in order to map in the ID map text.
> This is okay because the system always supports 48 bits of VA.
> 
> This patch extends the code such that if the system supports 52 bits of
> VA, and the ID map text is placed that high up, then we increase the VA
> size accordingly, up to 52.
> 
> One difference from the current implementation is that so far the
> condition of VA_BITS < 48 has meant that the top level table is always
> "full", with the maximum number of entries, and an extra table level is
> always needed. Now, when VA_BITS = 48 (and using 64k pages), the top
> level table is not full, and we simply need to increase the number of
> entries in it, instead of creating a new table level.
> 
> Signed-off-by: Kristina Martsenko <kristina.martsenko at arm.com>
> ---
>  arch/arm/include/asm/kvm_mmu.h       |  5 +++
>  arch/arm64/include/asm/assembler.h   |  2 -
>  arch/arm64/include/asm/kvm_mmu.h     |  7 +++-
>  arch/arm64/include/asm/mmu_context.h | 14 ++++++-
>  arch/arm64/kernel/head.S             | 76 +++++++++++++++++++++---------------
>  arch/arm64/kvm/hyp-init.S            | 17 ++++----
>  arch/arm64/mm/mmu.c                  |  1 +
>  virt/kvm/arm/mmu.c                   | 12 +++---
>  8 files changed, 83 insertions(+), 51 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
> index 8dbec683638b..8c5643e2eea4 100644
> --- a/arch/arm/include/asm/kvm_mmu.h
> +++ b/arch/arm/include/asm/kvm_mmu.h
> @@ -211,6 +211,11 @@ static inline bool __kvm_cpu_uses_extended_idmap(void)
>  	return false;
>  }
>  
> +static inline unsigned long __kvm_idmap_ptrs_per_pgd(void)
> +{
> +	return PTRS_PER_PGD;
> +}
> +
>  static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
>  				       pgd_t *hyp_pgd,
>  				       pgd_t *merged_hyp_pgd,
> diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
> index 2058fd864bfb..11719b11f4a7 100644
> --- a/arch/arm64/include/asm/assembler.h
> +++ b/arch/arm64/include/asm/assembler.h
> @@ -344,10 +344,8 @@ alternative_endif
>   * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
>   */
>  	.macro	tcr_set_idmap_t0sz, valreg, tmpreg
> -#ifndef CONFIG_ARM64_VA_BITS_48
>  	ldr_l	\tmpreg, idmap_t0sz
>  	bfi	\valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
> -#endif
>  	.endm
>  
>  /*
> diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
> index b3f7b68b042d..8d663ca0d50c 100644
> --- a/arch/arm64/include/asm/kvm_mmu.h
> +++ b/arch/arm64/include/asm/kvm_mmu.h
> @@ -273,7 +273,12 @@ void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
>  
>  static inline bool __kvm_cpu_uses_extended_idmap(void)
>  {
> -	return __cpu_uses_extended_idmap();
> +	return __cpu_uses_extended_idmap_table();
> +}
> +
> +static inline unsigned long __kvm_idmap_ptrs_per_pgd(void)
> +{
> +	return idmap_ptrs_per_pgd;
>  }
>  
>  /*
> diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
> index accc2ff32a0e..043eed856b55 100644
> --- a/arch/arm64/include/asm/mmu_context.h
> +++ b/arch/arm64/include/asm/mmu_context.h
> @@ -63,11 +63,21 @@ static inline void cpu_set_reserved_ttbr0(void)
>   * physical memory, in which case it will be smaller.
>   */
>  extern u64 idmap_t0sz;
> +extern u64 idmap_ptrs_per_pgd;
>  
>  static inline bool __cpu_uses_extended_idmap(void)
>  {
> -	return (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48) &&
> -		unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)));
> +	return unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS));
> +}
> +
> +/*
> + * True if the extended ID map requires an extra level of translation table
> + * to be configured.
> + */
> +static inline bool __cpu_uses_extended_idmap_table(void)
> +{
> +	return __cpu_uses_extended_idmap() &&
> +		(idmap_ptrs_per_pgd == PTRS_PER_PGD);
>  }
>  
>  /*
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index 64e09f207d1d..465f70328ba0 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -172,7 +172,7 @@ ENDPROC(preserve_boot_args)
>   *	ptrs:	#imm pointers per table page
>   *
>   * Preserves:	virt
> - * Corrupts:	tmp1, tmp2
> + * Corrupts:	ptrs, tmp1, tmp2
>   * Returns:	tbl -> next level table page address
>   */
>  	.macro	create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
> @@ -180,7 +180,8 @@ ENDPROC(preserve_boot_args)
>  	phys_to_pte \tmp1, \tmp2
>  	orr	\tmp2, \tmp2, #PMD_TYPE_TABLE	// address of next table and entry type
>  	lsr	\tmp1, \virt, #\shift
> -	and	\tmp1, \tmp1, #\ptrs - 1	// table index
> +	sub	\ptrs, \ptrs, #1
> +	and	\tmp1, \tmp1, \ptrs		// table index
>  	str	\tmp2, [\tbl, \tmp1, lsl #3]
>  	add	\tbl, \tbl, #PAGE_SIZE		// next level table page
>  	.endm
> @@ -190,15 +191,17 @@ ENDPROC(preserve_boot_args)
>   * block entry in the next level (tbl) for the given virtual address.
>   *
>   * Preserves:	tbl, next, virt
> - * Corrupts:	tmp1, tmp2
> + * Corrupts:	ptrs_per_pgd, tmp1, tmp2
>   */
> -	.macro	create_pgd_entry, tbl, virt, tmp1, tmp2
> -	create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
> +	.macro	create_pgd_entry, tbl, virt, ptrs_per_pgd, tmp1, tmp2
> +	create_table_entry \tbl, \virt, PGDIR_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2
>  #if SWAPPER_PGTABLE_LEVELS > 3
> -	create_table_entry \tbl, \virt, PUD_SHIFT, PTRS_PER_PUD, \tmp1, \tmp2
> +	mov	\ptrs_per_pgd, PTRS_PER_PUD
> +	create_table_entry \tbl, \virt, PUD_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2
>  #endif
>  #if SWAPPER_PGTABLE_LEVELS > 2
> -	create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
> +	mov	\ptrs_per_pgd, PTRS_PER_PTE
> +	create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2
>  #endif
>  	.endm
>  
> @@ -262,26 +265,13 @@ __create_page_tables:
>  	adrp	x0, idmap_pg_dir
>  	adrp	x3, __idmap_text_start		// __pa(__idmap_text_start)
>  
> -#ifndef CONFIG_ARM64_VA_BITS_48
> -#define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
> -#define EXTRA_PTRS	(1 << (48 - EXTRA_SHIFT))
> -
> -	/*
> -	 * If VA_BITS < 48, it may be too small to allow for an ID mapping to be
> -	 * created that covers system RAM if that is located sufficiently high
> -	 * in the physical address space. So for the ID map, use an extended
> -	 * virtual range in that case, by configuring an additional translation
> -	 * level.
> -	 * First, we have to verify our assumption that the current value of
> -	 * VA_BITS was chosen such that all translation levels are fully
> -	 * utilised, and that lowering T0SZ will always result in an additional
> -	 * translation level to be configured.
> -	 */
> -#if VA_BITS != EXTRA_SHIFT
> -#error "Mismatch between VA_BITS and page size/number of translation levels"
> -#endif
> -
>  	/*
> +	 * VA_BITS may be too small to allow for an ID mapping to be created
> +	 * that covers system RAM if that is located sufficiently high in the
> +	 * physical address space. So for the ID map, use an extended virtual
> +	 * range in that case, and configure an additional translation level
> +	 * if needed.
> +	 *
>  	 * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
>  	 * entire ID map region can be mapped. As T0SZ == (64 - #bits used),
>  	 * this number conveniently equals the number of leading zeroes in
> @@ -290,18 +280,41 @@ __create_page_tables:
>  	adrp	x5, __idmap_text_end
>  	clz	x5, x5
>  	cmp	x5, TCR_T0SZ(VA_BITS)	// default T0SZ small enough?
> -	b.ge	1f			// .. then skip additional level
> +	b.ge	1f			// .. then skip VA range extension
>  
>  	adr_l	x6, idmap_t0sz
>  	str	x5, [x6]
>  	dmb	sy
>  	dc	ivac, x6		// Invalidate potentially stale cache line
>  
> -	create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6
> -1:
> +#if (VA_BITS < 48)
> +#define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
> +#define EXTRA_PTRS	(1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
> +
> +	/*
> +	 * If VA_BITS < 48, we have to configure an additional table level.
> +	 * First, we have to verify our assumption that the current value of
> +	 * VA_BITS was chosen such that all translation levels are fully
> +	 * utilised, and that lowering T0SZ will always result in an additional
> +	 * translation level to be configured.
> +	 */
> +#if VA_BITS != EXTRA_SHIFT
> +#error "Mismatch between VA_BITS and page size/number of translation levels"
>  #endif
>  
> -	create_pgd_entry x0, x3, x5, x6
> +	mov	x4, EXTRA_PTRS
> +	create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
> +#else
> +	/*
> +	 * If VA_BITS == 48, we don't have to configure an additional
> +	 * translation level, but the top-level table has more entries.
> +	 */
> +	mov	x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
> +	str_l	x4, idmap_ptrs_per_pgd, x5
> +#endif
> +1:
> +	ldr_l	x4, idmap_ptrs_per_pgd
> +	create_pgd_entry x0, x3, x4, x5, x6
>  	mov	x5, x3				// __pa(__idmap_text_start)
>  	adr_l	x6, __idmap_text_end		// __pa(__idmap_text_end)
>  	create_block_map x0, x7, x3, x5, x6, x4
> @@ -312,7 +325,8 @@ __create_page_tables:
>  	adrp	x0, swapper_pg_dir
>  	mov_q	x5, KIMAGE_VADDR + TEXT_OFFSET	// compile time __va(_text)
>  	add	x5, x5, x23			// add KASLR displacement
> -	create_pgd_entry x0, x5, x3, x6
> +	mov	x4, PTRS_PER_PGD
> +	create_pgd_entry x0, x5, x4, x3, x6
>  	adrp	x6, _end			// runtime __pa(_end)
>  	adrp	x3, _text			// runtime __pa(_text)
>  	sub	x6, x6, x3			// _end - _text
> diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
> index a99718f32af9..c2ebe4e992df 100644
> --- a/arch/arm64/kvm/hyp-init.S
> +++ b/arch/arm64/kvm/hyp-init.S
> @@ -72,24 +72,23 @@ __do_hyp_init:
>  	mov	x5, #TCR_EL2_RES1
>  	orr	x4, x4, x5
>  
> -#ifndef CONFIG_ARM64_VA_BITS_48
>  	/*
> -	 * If we are running with VA_BITS < 48, we may be running with an extra
> -	 * level of translation in the ID map. This is only the case if system
> -	 * RAM is out of range for the currently configured page size and number
> -	 * of translation levels, in which case we will also need the extra
> -	 * level for the HYP ID map, or we won't be able to enable the EL2 MMU.
> +	 * The ID map may be configured to use an extended virtual address
> +	 * range. This is only the case if system RAM is out of range for the
> +	 * currently configured page size and VA_BITS, in which case we will
> +	 * also need the extended virtual range for the HYP ID map, or we won't
> +	 * be able to enable the EL2 MMU.
>  	 *
>  	 * However, at EL2, there is only one TTBR register, and we can't switch
>  	 * between translation tables *and* update TCR_EL2.T0SZ at the same
> -	 * time. Bottom line: we need the extra level in *both* our translation
> -	 * tables.
> +	 * time. Bottom line: we need to use the extended range with *both* our
> +	 * translation tables.
>  	 *
>  	 * So use the same T0SZ value we use for the ID map.
>  	 */
>  	ldr_l	x5, idmap_t0sz
>  	bfi	x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
> -#endif
> +
>  	/*
>  	 * Set the PS bits in TCR_EL2.
>  	 */
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 0c631a17ae1d..baa34418c3bf 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -50,6 +50,7 @@
>  #define NO_CONT_MAPPINGS	BIT(1)
>  
>  u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
> +u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
>  
>  u64 kimage_voffset __ro_after_init;
>  EXPORT_SYMBOL(kimage_voffset);
> diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
> index b36945d49986..876caf531d32 100644
> --- a/virt/kvm/arm/mmu.c
> +++ b/virt/kvm/arm/mmu.c
> @@ -623,7 +623,7 @@ static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
>  	return 0;
>  }
>  
> -static int __create_hyp_mappings(pgd_t *pgdp,
> +static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd,
>  				 unsigned long start, unsigned long end,
>  				 unsigned long pfn, pgprot_t prot)
>  {
> @@ -636,7 +636,7 @@ static int __create_hyp_mappings(pgd_t *pgdp,
>  	addr = start & PAGE_MASK;
>  	end = PAGE_ALIGN(end);
>  	do {
> -		pgd = pgdp + pgd_index(addr);
> +		pgd = pgdp + ((addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1));
>  
>  		if (pgd_none(*pgd)) {
>  			pud = pud_alloc_one(NULL, addr);
> @@ -699,8 +699,8 @@ int create_hyp_mappings(void *from, void *to, pgprot_t prot)
>  		int err;
>  
>  		phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
> -		err = __create_hyp_mappings(hyp_pgd, virt_addr,
> -					    virt_addr + PAGE_SIZE,
> +		err = __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD,
> +					    virt_addr, virt_addr + PAGE_SIZE,
>  					    __phys_to_pfn(phys_addr),
>  					    prot);
>  		if (err)
> @@ -731,7 +731,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
>  	if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
>  		return -EINVAL;
>  
> -	return __create_hyp_mappings(hyp_pgd, start, end,
> +	return __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, start, end,
>  				     __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
>  }
>  
> @@ -1737,7 +1737,7 @@ static int kvm_map_idmap_text(pgd_t *pgd)
>  	int err;
>  
>  	/* Create the idmap in the boot page tables */
> -	err = 	__create_hyp_mappings(pgd,
> +	err = 	__create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(),
>  				      hyp_idmap_start, hyp_idmap_end,
>  				      __phys_to_pfn(hyp_idmap_start),
>  				      PAGE_HYP_EXEC);
> 

I wonder if you could push these changes into __create_hyp_mappings()
instead of changing all the call sites:

diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index b36945d49986..0d44fb2bd9c6 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -629,14 +629,17 @@ static int __create_hyp_mappings(pgd_t *pgdp,
 {
 	pgd_t *pgd;
 	pud_t *pud;
-	unsigned long addr, next;
+	unsigned long addr, next, ptrs_per_pgd = PTRS_PER_PGD;
 	int err = 0;
 
+	if (pgdp != hyp_pgd)
+		ptrs_per_pgd = __kvm_idmap_ptrs_per_pgd();
+
 	mutex_lock(&kvm_hyp_pgd_mutex);
 	addr = start & PAGE_MASK;
 	end = PAGE_ALIGN(end);
 	do {
-		pgd = pgdp + pgd_index(addr);
+		pgd = pgdp + ((addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1));
 
 		if (pgd_none(*pgd)) {
 			pud = pud_alloc_one(NULL, addr);

with a suitable comment explaining why this is needed?

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...



More information about the linux-arm-kernel mailing list