[PATCH] KVM: arm64: Use helpers to classify exception types reported via ESR

Mark Rutland mark.rutland at arm.com
Tue Nov 28 06:27:55 PST 2023


On Tue, Nov 28, 2023 at 03:04:01PM +0100, Ard Biesheuvel wrote:
> From: Ard Biesheuvel <ardb at kernel.org>
> 
> Currently, we rely on the fact that exceptions can be trivially
> classified by applying a mask/value pair to the syndrome value reported
> via the ESR register, but this will no longer be true once we enable
> support for 5 level paging.
> 
> So introduce a couple of helpers that encapsulate this mask/value pair
> matching, and wire them up in the code. No functional change intended,
> the actual handling of translation level -1 will be added in a
> subsequent patch.
> 
> Cc: Catalin Marinas <catalin.marinas at arm.com>
> Cc: Will Deacon <will at kernel.org>
> Cc: Marc Zyngier <maz at kernel.org>
> Cc: Oliver Upton <oliver.upton at linux.dev>
> Cc: Ryan Roberts <ryan.roberts at arm.com>
> Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
> ---
> Upon Marc's request, I have separated this from my LPA2 series so it
> can be taken via the KVM tree directly (upon which the next rev of my
> LPA2 series is going to depend in any case)

One minor comment below, but either way:

Acked-by: Mark Rutland <mark.rutland at arm.com>

>  arch/arm64/include/asm/esr.h            | 15 ++++++++
>  arch/arm64/include/asm/kvm_emulate.h    | 36 +++++++++-----------
>  arch/arm64/kvm/hyp/include/hyp/fault.h  |  2 +-
>  arch/arm64/kvm/hyp/include/hyp/switch.h |  2 +-
>  arch/arm64/kvm/mmu.c                    | 35 +++++++++----------
>  5 files changed, 50 insertions(+), 40 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
> index ae35939f395b..450bf8fc908e 100644
> --- a/arch/arm64/include/asm/esr.h
> +++ b/arch/arm64/include/asm/esr.h
> @@ -392,6 +392,21 @@ static inline bool esr_is_data_abort(unsigned long esr)
>  	return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR;
>  }
>  
> +static inline bool esr_is_translation_fault(unsigned long esr)
> +{
> +	return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_FAULT;
> +}
> +
> +static inline bool esr_is_permission_fault(unsigned long esr)
> +{
> +	return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM;
> +}
> +
> +static inline bool esr_is_access_flag_fault(unsigned long esr)
> +{
> +	return (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS;
> +}

Since these don't look at the EC field, it might be worth having 'fsc' in the
name to indicate that they only make sense to look at given a valid FSC field.

i.e. s/esr_is/esr_fsc_is/

Mark.

> +
>  const char *esr_get_class_string(unsigned long esr);
>  #endif /* __ASSEMBLY */
>  
> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
> index d4f1e9cdd554..9b141452b6d7 100644
> --- a/arch/arm64/include/asm/kvm_emulate.h
> +++ b/arch/arm64/include/asm/kvm_emulate.h
> @@ -404,24 +404,25 @@ static __always_inline u8 kvm_vcpu_trap_get_fault(const struct kvm_vcpu *vcpu)
>  	return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC;
>  }
>  
> -static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vcpu)
> +static inline
> +bool kvm_vcpu_trap_is_permission_fault(const struct kvm_vcpu *vcpu)
>  {
> -	return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE;
> +	return esr_is_permission_fault(kvm_vcpu_get_esr(vcpu));
>  }
>  
> -static __always_inline s8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *vcpu)
> +static inline
> +bool kvm_vcpu_trap_is_translation_fault(const struct kvm_vcpu *vcpu)
>  {
> -	/*
> -	 * Note: With the introduction of FEAT_LPA2 an extra level of
> -	 * translation (level -1) is added. This level (obviously) doesn't
> -	 * follow the previous convention of encoding the 4 levels in the 2 LSBs
> -	 * of the FSC so this function breaks if the fault is for level -1.
> -	 *
> -	 * However, stage2 tables always use concatenated tables for first level
> -	 * lookup and therefore it is guaranteed that the level will be between
> -	 * 0 and 3, and this function continues to work.
> -	 */
> -	return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_LEVEL;
> +	return esr_is_translation_fault(kvm_vcpu_get_esr(vcpu));
> +}
> +
> +static inline
> +u64 kvm_vcpu_trap_get_perm_fault_granule(const struct kvm_vcpu *vcpu)
> +{
> +	unsigned long esr = kvm_vcpu_get_esr(vcpu);
> +
> +	BUG_ON(!esr_is_permission_fault(esr));
> +	return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(esr & ESR_ELx_FSC_LEVEL));
>  }
>  
>  static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
> @@ -464,12 +465,7 @@ static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
>  		 * first), then a permission fault to allow the flags
>  		 * to be set.
>  		 */
> -		switch (kvm_vcpu_trap_get_fault_type(vcpu)) {
> -		case ESR_ELx_FSC_PERM:
> -			return true;
> -		default:
> -			return false;
> -		}
> +		return kvm_vcpu_trap_is_permission_fault(vcpu);
>  	}
>  
>  	if (kvm_vcpu_trap_is_iabt(vcpu))
> diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h
> index 9ddcfe2c3e57..18b0ffbc40a2 100644
> --- a/arch/arm64/kvm/hyp/include/hyp/fault.h
> +++ b/arch/arm64/kvm/hyp/include/hyp/fault.h
> @@ -60,7 +60,7 @@ static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
>  	 */
>  	if (!(esr & ESR_ELx_S1PTW) &&
>  	    (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
> -	     (esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM)) {
> +	     esr_is_permission_fault(esr))) {
>  		if (!__translate_far_to_hpfar(far, &hpfar))
>  			return false;
>  	} else {
> diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
> index f99d8af0b9af..f44fb11307fb 100644
> --- a/arch/arm64/kvm/hyp/include/hyp/switch.h
> +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
> @@ -591,7 +591,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
>  	if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
>  		bool valid;
>  
> -		valid = kvm_vcpu_trap_get_fault_type(vcpu) == ESR_ELx_FSC_FAULT &&
> +		valid = kvm_vcpu_trap_is_translation_fault(vcpu) &&
>  			kvm_vcpu_dabt_isvalid(vcpu) &&
>  			!kvm_vcpu_abt_issea(vcpu) &&
>  			!kvm_vcpu_abt_iss1tw(vcpu);
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index 986a2e6fb900..bdf2a1f0f6a9 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -1376,7 +1376,7 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
>  
>  static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>  			  struct kvm_memory_slot *memslot, unsigned long hva,
> -			  unsigned long fault_status)
> +			  bool fault_is_perm)
>  {
>  	int ret = 0;
>  	bool write_fault, writable, force_pte = false;
> @@ -1390,17 +1390,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>  	gfn_t gfn;
>  	kvm_pfn_t pfn;
>  	bool logging_active = memslot_is_logging(memslot);
> -	s8 fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
>  	long vma_pagesize, fault_granule;
>  	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
>  	struct kvm_pgtable *pgt;
>  
> -	fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level);
> +	if (fault_is_perm)
> +		fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu);
>  	write_fault = kvm_is_write_fault(vcpu);
>  	exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
>  	VM_BUG_ON(write_fault && exec_fault);
>  
> -	if (fault_status == ESR_ELx_FSC_PERM && !write_fault && !exec_fault) {
> +	if (fault_is_perm && !write_fault && !exec_fault) {
>  		kvm_err("Unexpected L2 read permission error\n");
>  		return -EFAULT;
>  	}
> @@ -1411,8 +1411,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>  	 * only exception to this is when dirty logging is enabled at runtime
>  	 * and a write fault needs to collapse a block entry into a table.
>  	 */
> -	if (fault_status != ESR_ELx_FSC_PERM ||
> -	    (logging_active && write_fault)) {
> +	if (!fault_is_perm || (logging_active && write_fault)) {
>  		ret = kvm_mmu_topup_memory_cache(memcache,
>  						 kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu));
>  		if (ret)
> @@ -1529,8 +1528,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>  	 * backed by a THP and thus use block mapping if possible.
>  	 */
>  	if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
> -		if (fault_status ==  ESR_ELx_FSC_PERM &&
> -		    fault_granule > PAGE_SIZE)
> +		if (fault_is_perm && fault_granule > PAGE_SIZE)
>  			vma_pagesize = fault_granule;
>  		else
>  			vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
> @@ -1543,7 +1541,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>  		}
>  	}
>  
> -	if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) {
> +	if (!fault_is_perm && !device && kvm_has_mte(kvm)) {
>  		/* Check the VMM hasn't introduced a new disallowed VMA */
>  		if (mte_allowed) {
>  			sanitise_mte_tags(kvm, pfn, vma_pagesize);
> @@ -1569,7 +1567,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>  	 * permissions only if vma_pagesize equals fault_granule. Otherwise,
>  	 * kvm_pgtable_stage2_map() should be called to change block size.
>  	 */
> -	if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule)
> +	if (fault_is_perm && vma_pagesize == fault_granule)
>  		ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
>  	else
>  		ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
> @@ -1620,7 +1618,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
>   */
>  int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
>  {
> -	unsigned long fault_status;
> +	unsigned long esr;
>  	phys_addr_t fault_ipa;
>  	struct kvm_memory_slot *memslot;
>  	unsigned long hva;
> @@ -1628,12 +1626,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
>  	gfn_t gfn;
>  	int ret, idx;
>  
> -	fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
> +	esr = kvm_vcpu_get_esr(vcpu);
>  
>  	fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
>  	is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
>  
> -	if (fault_status == ESR_ELx_FSC_FAULT) {
> +	if (esr_is_permission_fault(esr)) {
>  		/* Beyond sanitised PARange (which is the IPA limit) */
>  		if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
>  			kvm_inject_size_fault(vcpu);
> @@ -1668,9 +1666,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
>  			      kvm_vcpu_get_hfar(vcpu), fault_ipa);
>  
>  	/* Check the stage-2 fault is trans. fault or write fault */
> -	if (fault_status != ESR_ELx_FSC_FAULT &&
> -	    fault_status != ESR_ELx_FSC_PERM &&
> -	    fault_status != ESR_ELx_FSC_ACCESS) {
> +	if (!esr_is_translation_fault(esr) &&
> +	    !esr_is_permission_fault(esr) &&
> +	    !esr_is_access_flag_fault(esr)) {
>  		kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
>  			kvm_vcpu_trap_get_class(vcpu),
>  			(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
> @@ -1732,13 +1730,14 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
>  	/* Userspace should not be able to register out-of-bounds IPAs */
>  	VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->arch.hw_mmu));
>  
> -	if (fault_status == ESR_ELx_FSC_ACCESS) {
> +	if (esr_is_access_flag_fault(esr)) {
>  		handle_access_fault(vcpu, fault_ipa);
>  		ret = 1;
>  		goto out_unlock;
>  	}
>  
> -	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
> +	ret = user_mem_abort(vcpu, fault_ipa, memslot, hva,
> +			     esr_is_permission_fault(esr));
>  	if (ret == 0)
>  		ret = 1;
>  out:
> -- 
> 2.43.0.rc1.413.gea7ed67945-goog
> 
> 



More information about the linux-arm-kernel mailing list