[PATCH v13 27/48] arm64: RMI: Runtime faulting of memory

Steven Price steven.price at arm.com
Fri Mar 20 09:44:51 PDT 2026


On 19/03/2026 18:41, Wei-Lin Chang wrote:
> On Wed, Mar 18, 2026 at 03:53:51PM +0000, Steven Price wrote:
>> At runtime if the realm guest accesses memory which hasn't yet been
>> mapped then KVM needs to either populate the region or fault the guest.
>>
>> For memory in the lower (protected) region of IPA a fresh page is
>> provided to the RMM which will zero the contents. For memory in the
>> upper (shared) region of IPA, the memory from the memslot is mapped
>> into the realm VM non secure.
>>
>> Signed-off-by: Steven Price <steven.price at arm.com>
>> ---
>> Changes since v12:
>>  * Switch to RMM v2.0 range based APIs.
>> Changes since v11:
>>  * Adapt to upstream changes.
>> Changes since v10:
>>  * RME->RMI renaming.
>>  * Adapt to upstream gmem changes.
>> Changes since v9:
>>  * Fix call to kvm_stage2_unmap_range() in kvm_free_stage2_pgd() to set
>>    may_block to avoid stall warnings.
>>  * Minor coding style fixes.
>> Changes since v8:
>>  * Propagate the may_block flag.
>>  * Minor comments and coding style changes.
>> Changes since v7:
>>  * Remove redundant WARN_ONs for realm_create_rtt_levels() - it will
>>    internally WARN when necessary.
>> Changes since v6:
>>  * Handle PAGE_SIZE being larger than RMM granule size.
>>  * Some minor renaming following review comments.
>> Changes since v5:
>>  * Reduce use of struct page in preparation for supporting the RMM
>>    having a different page size to the host.
>>  * Handle a race when delegating a page where another CPU has faulted on
>>    a the same page (and already delegated the physical page) but not yet
>>    mapped it. In this case simply return to the guest to either use the
>>    mapping from the other CPU (or refault if the race is lost).
>>  * The changes to populate_par_region() are moved into the previous
>>    patch where they belong.
>> Changes since v4:
>>  * Code cleanup following review feedback.
>>  * Drop the PTE_SHARED bit when creating unprotected page table entries.
>>    This is now set by the RMM and the host has no control of it and the
>>    spec requires the bit to be set to zero.
>> Changes since v2:
>>  * Avoid leaking memory if failing to map it in the realm.
>>  * Correctly mask RTT based on LPA2 flag (see rtt_get_phys()).
>>  * Adapt to changes in previous patches.
>> ---
>>  arch/arm64/include/asm/kvm_emulate.h |   8 ++
>>  arch/arm64/include/asm/kvm_rmi.h     |  12 ++
>>  arch/arm64/kvm/mmu.c                 | 139 ++++++++++++++++--
>>  arch/arm64/kvm/rmi.c                 | 206 +++++++++++++++++++++++++++
>>  4 files changed, 351 insertions(+), 14 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
>> index d194d91fbc2a..0734c4a65174 100644
>> --- a/arch/arm64/include/asm/kvm_emulate.h
>> +++ b/arch/arm64/include/asm/kvm_emulate.h
>> @@ -706,6 +706,14 @@ static inline bool kvm_realm_is_created(struct kvm *kvm)
>>  	return kvm_is_realm(kvm) && kvm_realm_state(kvm) != REALM_STATE_NONE;
>>  }
>>  
>> +static inline gpa_t kvm_gpa_from_fault(struct kvm *kvm, phys_addr_t ipa)
> 
> I think we should use gpa_t as the type for ipa.

This is just matching the usage in mmu.c, e.g. user_mem_abort() has
fault_ipa as a phys_addr_t.

>> +{
>> +	if (!kvm_is_realm(kvm))
>> +		return ipa;
>> +
>> +	return ipa & ~BIT(kvm->arch.realm.ia_bits - 1);
>> +}
>> +
>>  static inline bool vcpu_is_rec(struct kvm_vcpu *vcpu)
>>  {
>>  	return kvm_is_realm(vcpu->kvm);
>> diff --git a/arch/arm64/include/asm/kvm_rmi.h b/arch/arm64/include/asm/kvm_rmi.h
>> index bf663bb240c4..38208be3c602 100644
>> --- a/arch/arm64/include/asm/kvm_rmi.h
>> +++ b/arch/arm64/include/asm/kvm_rmi.h
>> @@ -6,6 +6,7 @@
>>  #ifndef __ASM_KVM_RMI_H
>>  #define __ASM_KVM_RMI_H
>>  
>> +#include <asm/kvm_pgtable.h>
>>  #include <asm/rmi_smc.h>
>>  
>>  /**
>> @@ -105,6 +106,17 @@ void kvm_realm_unmap_range(struct kvm *kvm,
>>  			   unsigned long size,
>>  			   bool unmap_private,
>>  			   bool may_block);
>> +int realm_map_protected(struct kvm *kvm,
>> +			unsigned long base_ipa,
>> +			kvm_pfn_t pfn,
>> +			unsigned long size,
>> +			struct kvm_mmu_memory_cache *memcache);
>> +int realm_map_non_secure(struct realm *realm,
>> +			 unsigned long ipa,
>> +			 kvm_pfn_t pfn,
>> +			 unsigned long size,
>> +			 enum kvm_pgtable_prot prot,
>> +			 struct kvm_mmu_memory_cache *memcache);
>>  
>>  static inline bool kvm_realm_is_private_address(struct realm *realm,
>>  						unsigned long addr)
>> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
>> index bad93938acdb..73c18c2861a2 100644
>> --- a/arch/arm64/kvm/mmu.c
>> +++ b/arch/arm64/kvm/mmu.c
>> @@ -334,8 +334,15 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
>>  
>>  	lockdep_assert_held_write(&kvm->mmu_lock);
>>  	WARN_ON(size & ~PAGE_MASK);
>> -	WARN_ON(stage2_apply_range(mmu, start, end, KVM_PGT_FN(kvm_pgtable_stage2_unmap),
>> -				   may_block));
>> +
>> +	if (kvm_is_realm(kvm)) {
>> +		kvm_realm_unmap_range(kvm, start, size, !only_shared,
>> +				      may_block);
>> +	} else {
>> +		WARN_ON(stage2_apply_range(mmu, start, end,
>> +					   KVM_PGT_FN(kvm_pgtable_stage2_unmap),
>> +					   may_block));
>> +	}
>>  }
>>  
>>  void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start,
>> @@ -355,7 +362,10 @@ static void stage2_flush_memslot(struct kvm *kvm,
>>  	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
>>  	phys_addr_t end = addr + PAGE_SIZE * memslot->npages;
>>  
>> -	kvm_stage2_flush_range(&kvm->arch.mmu, addr, end);
>> +	if (kvm_is_realm(kvm))
>> +		kvm_realm_unmap_range(kvm, addr, end - addr, false, true);
>> +	else
>> +		kvm_stage2_flush_range(&kvm->arch.mmu, addr, end);
>>  }
>>  
>>  /**
>> @@ -1081,6 +1091,10 @@ void stage2_unmap_vm(struct kvm *kvm)
>>  	struct kvm_memory_slot *memslot;
>>  	int idx, bkt;
>>  
>> +	/* For realms this is handled by the RMM so nothing to do here */
>> +	if (kvm_is_realm(kvm))
>> +		return;
>> +
>>  	idx = srcu_read_lock(&kvm->srcu);
>>  	mmap_read_lock(current->mm);
>>  	write_lock(&kvm->mmu_lock);
>> @@ -1106,6 +1120,9 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
>>  	if (kvm_is_realm(kvm) &&
>>  	    (kvm_realm_state(kvm) != REALM_STATE_DEAD &&
>>  	     kvm_realm_state(kvm) != REALM_STATE_NONE)) {
>> +		struct realm *realm = &kvm->arch.realm;
>> +
>> +		kvm_stage2_unmap_range(mmu, 0, BIT(realm->ia_bits - 1), true);
>>  		write_unlock(&kvm->mmu_lock);
>>  		kvm_realm_destroy_rtts(kvm);
>>  
>> @@ -1516,6 +1533,29 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
>>  	return vma->vm_flags & VM_MTE_ALLOWED;
>>  }
>>  
>> +static int realm_map_ipa(struct kvm *kvm, phys_addr_t ipa,
>> +			 kvm_pfn_t pfn, unsigned long map_size,
>> +			 enum kvm_pgtable_prot prot,
>> +			 struct kvm_mmu_memory_cache *memcache)
>> +{
>> +	struct realm *realm = &kvm->arch.realm;
>> +
>> +	/*
>> +	 * Write permission is required for now even though it's possible to
>> +	 * map unprotected pages (granules) as read-only. It's impossible to
>> +	 * map protected pages (granules) as read-only.
>> +	 */
>> +	if (WARN_ON(!(prot & KVM_PGTABLE_PROT_W)))
>> +		return -EFAULT;
>> +
>> +	ipa = ALIGN_DOWN(ipa, PAGE_SIZE);
>> +	if (!kvm_realm_is_private_address(realm, ipa))
>> +		return realm_map_non_secure(realm, ipa, pfn, map_size, prot,
>> +					    memcache);
>> +
>> +	return realm_map_protected(kvm, ipa, pfn, map_size, memcache);
>> +}
>> +
>>  static bool kvm_vma_is_cacheable(struct vm_area_struct *vma)
>>  {
>>  	switch (FIELD_GET(PTE_ATTRINDX_MASK, pgprot_val(vma->vm_page_prot))) {
>> @@ -1588,6 +1628,7 @@ static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>>  	enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_SHARED;
>>  	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
>>  	struct kvm_pgtable *pgt = vcpu->arch.hw_mmu->pgt;
>> +	gpa_t gpa = kvm_gpa_from_fault(vcpu->kvm, fault_ipa);
>>  	unsigned long mmu_seq;
>>  	struct page *page;
>>  	struct kvm *kvm = vcpu->kvm;
>> @@ -1596,6 +1637,29 @@ static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>>  	gfn_t gfn;
>>  	int ret;
>>  
>> +	if (kvm_is_realm(vcpu->kvm)) {
>> +		/* check for memory attribute mismatch */
>> +		bool is_priv_gfn = kvm_mem_is_private(kvm, gpa >> PAGE_SHIFT);
>> +		/*
>> +		 * For Realms, the shared address is an alias of the private
>> +		 * PA with the top bit set. Thus is the fault address matches
> 
> Hi,
> 
> Thus is -> Thus if.

Ack

>> +		 * the GPA then it is the private alias.
>> +		 */
>> +		bool is_priv_fault = (gpa == fault_ipa);
>> +
>> +		if (is_priv_gfn != is_priv_fault) {
>> +			kvm_prepare_memory_fault_exit(vcpu, gpa, PAGE_SIZE,
>> +						      kvm_is_write_fault(vcpu),
>> +						      false,
>> +						      is_priv_fault);
>> +			/*
>> +			 * KVM_EXIT_MEMORY_FAULT requires an return code of
>> +			 * -EFAULT, see the API documentation
>> +			 */
>> +			return -EFAULT;
>> +		}
>> +	}
>> +
>>  	ret = prepare_mmu_memcache(vcpu, true, &memcache);
>>  	if (ret)
>>  		return ret;
>> @@ -1603,7 +1667,7 @@ static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>>  	if (nested)
>>  		gfn = kvm_s2_trans_output(nested) >> PAGE_SHIFT;
>>  	else
>> -		gfn = fault_ipa >> PAGE_SHIFT;
>> +		gfn = gpa >> PAGE_SHIFT;
>>  
>>  	write_fault = kvm_is_write_fault(vcpu);
>>  	exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
>> @@ -1616,7 +1680,7 @@ static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>>  
>>  	ret = kvm_gmem_get_pfn(kvm, memslot, gfn, &pfn, &page, NULL);
>>  	if (ret) {
>> -		kvm_prepare_memory_fault_exit(vcpu, fault_ipa, PAGE_SIZE,
>> +		kvm_prepare_memory_fault_exit(vcpu, gpa, PAGE_SIZE,
>>  					      write_fault, exec_fault, false);
>>  		return ret;
>>  	}
>> @@ -1638,15 +1702,25 @@ static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>>  	kvm_fault_lock(kvm);
>>  	if (mmu_invalidate_retry(kvm, mmu_seq)) {
>>  		ret = -EAGAIN;
>> -		goto out_unlock;
>> +		goto out_release_page;
>> +	}
>> +
>> +	if (kvm_is_realm(kvm)) {
>> +		ret = realm_map_ipa(kvm, fault_ipa, pfn,
>> +				    PAGE_SIZE, KVM_PGTABLE_PROT_W, memcache);
>> +		/* if successful don't release the page */
>> +		if (!ret)
>> +			goto out_unlock;
>> +		goto out_release_page;
>>  	}
>>  
>>  	ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, fault_ipa, PAGE_SIZE,
>>  						 __pfn_to_phys(pfn), prot,
>>  						 memcache, flags);
>>  
>> -out_unlock:
>> +out_release_page:
>>  	kvm_release_faultin_page(kvm, page, !!ret, writable);
>> +out_unlock:
>>  	kvm_fault_unlock(kvm);
>>  
>>  	if (writable && !ret)
>> @@ -1685,6 +1759,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>>  	if (fault_is_perm)
>>  		fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu);
>>  	write_fault = kvm_is_write_fault(vcpu);
>> +
>> +	/*
>> +	 * Realms cannot map protected pages read-only
>> +	 * FIXME: It should be possible to map unprotected pages read-only
>> +	 */
>> +	if (vcpu_is_rec(vcpu))
>> +		write_fault = true;
>> +
>>  	exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
>>  	VM_WARN_ON_ONCE(write_fault && exec_fault);
>>  
>> @@ -1779,7 +1861,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>>  		ipa &= ~(vma_pagesize - 1);
>>  	}
>>  
>> -	gfn = ipa >> PAGE_SHIFT;
>> +	gfn = kvm_gpa_from_fault(kvm, ipa) >> PAGE_SHIFT;
>>  	mte_allowed = kvm_vma_mte_allowed(vma);
>>  
>>  	vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED;
>> @@ -1855,6 +1937,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>>  	if (exec_fault && s2_force_noncacheable)
>>  		ret = -ENOEXEC;
>>  
>> +	/*
>> +	 * For now we shouldn't be hitting protected addresses because they are
>> +	 * handled in gmem_abort(). In the future this check may be relaxed to
>> +	 * support e.g. protected devices.
>> +	 */
>> +	if (!ret && vcpu_is_rec(vcpu) &&
>> +	    kvm_gpa_from_fault(kvm, fault_ipa) == fault_ipa)
> 
> Maybe use !shared_ipa_fault() here?

Ack.

Thanks,
Steve

> 
> Thanks,
> Wei-Lin Chang
> 
>> +		ret = -EINVAL;
>> +
>>  	if (ret) {
>>  		kvm_release_page_unused(page);
>>  		return ret;
>> @@ -1939,6 +2030,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
>>  		 */
>>  		prot &= ~KVM_NV_GUEST_MAP_SZ;
>>  		ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, fault_ipa, prot, flags);
>> +	} else if (kvm_is_realm(kvm)) {
>> +		ret = realm_map_ipa(kvm, fault_ipa, pfn, vma_pagesize,
>> +				    prot, memcache);
>>  	} else {
>>  		ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, fault_ipa, vma_pagesize,
>>  					     __pfn_to_phys(pfn), prot,
>> @@ -2049,6 +2143,13 @@ int kvm_handle_guest_sea(struct kvm_vcpu *vcpu)
>>  	return 0;
>>  }
>>  
>> +static bool shared_ipa_fault(struct kvm *kvm, phys_addr_t fault_ipa)
>> +{
>> +	gpa_t gpa = kvm_gpa_from_fault(kvm, fault_ipa);
>> +
>> +	return (gpa != fault_ipa);
>> +}
>> +
>>  /**
>>   * kvm_handle_guest_abort - handles all 2nd stage aborts
>>   * @vcpu:	the VCPU pointer
>> @@ -2159,8 +2260,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
>>  		nested = &nested_trans;
>>  	}
>>  
>> -	gfn = ipa >> PAGE_SHIFT;
>> +	gfn = kvm_gpa_from_fault(vcpu->kvm, ipa) >> PAGE_SHIFT;
>>  	memslot = gfn_to_memslot(vcpu->kvm, gfn);
>> +
>>  	hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable);
>>  	write_fault = kvm_is_write_fault(vcpu);
>>  	if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
>> @@ -2203,7 +2305,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
>>  		 * of the page size.
>>  		 */
>>  		ipa |= FAR_TO_FIPA_OFFSET(kvm_vcpu_get_hfar(vcpu));
>> -		ret = io_mem_abort(vcpu, ipa);
>> +		ret = io_mem_abort(vcpu, kvm_gpa_from_fault(vcpu->kvm, ipa));
>>  		goto out_unlock;
>>  	}
>>  
>> @@ -2219,7 +2321,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
>>  	VM_WARN_ON_ONCE(kvm_vcpu_trap_is_permission_fault(vcpu) &&
>>  			!write_fault && !kvm_vcpu_trap_is_exec_fault(vcpu));
>>  
>> -	if (kvm_slot_has_gmem(memslot))
>> +	if (kvm_slot_has_gmem(memslot) && !shared_ipa_fault(vcpu->kvm, fault_ipa))
>>  		ret = gmem_abort(vcpu, fault_ipa, nested, memslot,
>>  				 esr_fsc_is_permission_fault(esr));
>>  	else
>> @@ -2256,6 +2358,10 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
>>  	if (!kvm->arch.mmu.pgt)
>>  		return false;
>>  
>> +	/* We don't support aging for Realms */
>> +	if (kvm_is_realm(kvm))
>> +		return true;
>> +
>>  	return KVM_PGT_FN(kvm_pgtable_stage2_test_clear_young)(kvm->arch.mmu.pgt,
>>  						   range->start << PAGE_SHIFT,
>>  						   size, true);
>> @@ -2272,6 +2378,10 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
>>  	if (!kvm->arch.mmu.pgt)
>>  		return false;
>>  
>> +	/* We don't support aging for Realms */
>> +	if (kvm_is_realm(kvm))
>> +		return true;
>> +
>>  	return KVM_PGT_FN(kvm_pgtable_stage2_test_clear_young)(kvm->arch.mmu.pgt,
>>  						   range->start << PAGE_SHIFT,
>>  						   size, false);
>> @@ -2438,10 +2548,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
>>  		return -EFAULT;
>>  
>>  	/*
>> -	 * Only support guest_memfd backed memslots with mappable memory, since
>> -	 * there aren't any CoCo VMs that support only private memory on arm64.
>> +	 * Only support guest_memfd backed memslots with mappable memory,
>> +	 * unless the guest is a CCA realm guest.
>>  	 */
>> -	if (kvm_slot_has_gmem(new) && !kvm_memslot_is_gmem_only(new))
>> +	if (kvm_slot_has_gmem(new) && !kvm_memslot_is_gmem_only(new) &&
>> +	    !kvm_is_realm(kvm))
>>  		return -EINVAL;
>>  
>>  	hva = new->userspace_addr;
>> diff --git a/arch/arm64/kvm/rmi.c b/arch/arm64/kvm/rmi.c
>> index d5fee203824b..30292814b1ec 100644
>> --- a/arch/arm64/kvm/rmi.c
>> +++ b/arch/arm64/kvm/rmi.c
>> @@ -837,6 +837,212 @@ static int realm_create_protected_data_page(struct kvm *kvm,
>>  	return ret;
>>  }
>>  
>> +static int fold_rtt(struct realm *realm, unsigned long addr, int level)
>> +{
>> +	phys_addr_t rtt_addr;
>> +	int ret;
>> +
>> +	ret = realm_rtt_fold(realm, addr, level, &rtt_addr);
>> +	if (ret)
>> +		return ret;
>> +
>> +	free_rtt(rtt_addr);
>> +
>> +	return 0;
>> +}
>> +
>> +static unsigned long addr_range_desc(unsigned long phys, unsigned long size)
>> +{
>> +	unsigned long out = 0;
>> +
>> +	switch (size) {
>> +	case P4D_SIZE:
>> +		out = 0 | (1 << 2);
>> +		break;
>> +	case PUD_SIZE:
>> +		out = 1 | (1 << 2);
>> +		break;
>> +	case PMD_SIZE:
>> +		out = 2 | (1 << 2);
>> +		break;
>> +	case PAGE_SIZE:
>> +		out = 3 | (1 << 2);
>> +		break;
>> +	default:
>> +		/*
>> +		 * Only support mapping at the page level granulatity when
>> +		 * it's an unusual length. This should get us back onto a larger
>> +		 * block size for the subsequent mappings.
>> +		 */
>> +		out = 3 | ((MIN(size >> PAGE_SHIFT, PTRS_PER_PTE - 1)) << 2);
>> +		break;
>> +	}
>> +
>> +	WARN_ON(phys & ~PAGE_MASK);
>> +
>> +	out |= phys & PAGE_MASK;
>> +
>> +	return out;
>> +}
>> +
>> +int realm_map_protected(struct kvm *kvm,
>> +			unsigned long ipa,
>> +			kvm_pfn_t pfn,
>> +			unsigned long map_size,
>> +			struct kvm_mmu_memory_cache *memcache)
>> +{
>> +	struct realm *realm = &kvm->arch.realm;
>> +	phys_addr_t phys = __pfn_to_phys(pfn);
>> +	phys_addr_t rd = virt_to_phys(realm->rd);
>> +	unsigned long base_ipa = ipa;
>> +	unsigned long ipa_top = ipa + map_size;
>> +	int map_level = IS_ALIGNED(map_size, RMM_L2_BLOCK_SIZE) ?
>> +			RMM_RTT_BLOCK_LEVEL : RMM_RTT_MAX_LEVEL;
>> +	int ret = 0;
>> +
>> +	if (WARN_ON(!IS_ALIGNED(map_size, PAGE_SIZE) ||
>> +		    !IS_ALIGNED(ipa, map_size)))
>> +		return -EINVAL;
>> +
>> +	if (map_level < RMM_RTT_MAX_LEVEL) {
>> +		/*
>> +		 * A temporary RTT is needed during the map, precreate it,
>> +		 * however if there is an error (e.g. missing parent tables)
>> +		 * this will be handled below.
>> +		 */
>> +		realm_create_rtt_levels(realm, ipa, map_level,
>> +					RMM_RTT_MAX_LEVEL, memcache);
>> +	}
>> +
>> +	if (delegate_range(phys, map_size)) {
>> +		/*
>> +		 * It's likely we raced with another VCPU on the same
>> +		 * fault. Assume the other VCPU has handled the fault
>> +		 * and return to the guest.
>> +		 */
>> +		return 0;
>> +	}
>> +
>> +	while (ipa < ipa_top) {
>> +		unsigned long flags = RMI_ADDR_TYPE_SINGLE;
>> +		unsigned long range_desc = addr_range_desc(phys, ipa_top - ipa);
>> +		unsigned long out_top;
>> +
>> +		ret = rmi_rtt_data_map(rd, ipa, ipa_top, flags, range_desc,
>> +				       &out_top);
>> +
>> +		if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) {
>> +			/* Create missing RTTs and retry */
>> +			int level = RMI_RETURN_INDEX(ret);
>> +
>> +			WARN_ON(level == RMM_RTT_MAX_LEVEL);
>> +			ret = realm_create_rtt_levels(realm, ipa, level,
>> +						      RMM_RTT_MAX_LEVEL,
>> +						      memcache);
>> +			if (ret)
>> +				goto err_undelegate;
>> +
>> +			ret = rmi_rtt_data_map(rd, ipa, ipa_top, flags,
>> +					       range_desc, &out_top);
>> +		}
>> +
>> +		if (WARN_ON(ret))
>> +			goto err_undelegate;
>> +
>> +		phys += out_top - ipa;
>> +		ipa = out_top;
>> +	}
>> +
>> +	if (map_size == RMM_L2_BLOCK_SIZE) {
>> +		ret = fold_rtt(realm, base_ipa, map_level + 1);
>> +		if (WARN_ON(ret))
>> +			goto err;
>> +	}
>> +
>> +	return 0;
>> +
>> +err_undelegate:
>> +	if (WARN_ON(undelegate_range(phys, map_size))) {
>> +		/* Page can't be returned to NS world so is lost */
>> +		get_page(phys_to_page(phys));
>> +	}
>> +err:
>> +	realm_unmap_private_range(kvm, base_ipa, ipa, true);
>> +	return -ENXIO;
>> +}
>> +
>> +int realm_map_non_secure(struct realm *realm,
>> +			 unsigned long ipa,
>> +			 kvm_pfn_t pfn,
>> +			 unsigned long size,
>> +			 enum kvm_pgtable_prot prot,
>> +			 struct kvm_mmu_memory_cache *memcache)
>> +{
>> +	unsigned long attr;
>> +	phys_addr_t rd = virt_to_phys(realm->rd);
>> +	phys_addr_t phys = __pfn_to_phys(pfn);
>> +	unsigned long offset;
>> +	/* TODO: Support block mappings */
>> +	int map_level = RMM_RTT_MAX_LEVEL;
>> +	int map_size = rmi_rtt_level_mapsize(map_level);
>> +	int ret = 0;
>> +
>> +	if (WARN_ON(!IS_ALIGNED(size, PAGE_SIZE) ||
>> +		    !IS_ALIGNED(ipa, size)))
>> +		return -EINVAL;
>> +
>> +	switch (prot & (KVM_PGTABLE_PROT_DEVICE | KVM_PGTABLE_PROT_NORMAL_NC)) {
>> +	case KVM_PGTABLE_PROT_DEVICE | KVM_PGTABLE_PROT_NORMAL_NC:
>> +		return -EINVAL;
>> +	case KVM_PGTABLE_PROT_DEVICE:
>> +		attr = PTE_S2_MEMATTR(MT_S2_FWB_DEVICE_nGnRE);
>> +		break;
>> +	case KVM_PGTABLE_PROT_NORMAL_NC:
>> +		attr = PTE_S2_MEMATTR(MT_S2_FWB_NORMAL_NC);
>> +		break;
>> +	default:
>> +		attr = PTE_S2_MEMATTR(MT_S2_FWB_NORMAL);
>> +	}
>> +
>> +	for (offset = 0; offset < size; offset += map_size) {
>> +		/*
>> +		 * realm_map_ipa() enforces that the memory is writable,
>> +		 * so for now we permit both read and write.
>> +		 */
>> +		unsigned long desc = kvm_phys_to_pte(phys) | attr |
>> +				     KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R |
>> +				     KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W;
>> +		ret = rmi_rtt_map_unprotected(rd, ipa, map_level, desc);
>> +
>> +		if (RMI_RETURN_STATUS(ret) == RMI_ERROR_RTT) {
>> +			/* Create missing RTTs and retry */
>> +			int level = RMI_RETURN_INDEX(ret);
>> +
>> +			ret = realm_create_rtt_levels(realm, ipa, level,
>> +						      map_level, memcache);
>> +			if (ret)
>> +				return -ENXIO;
>> +
>> +			ret = rmi_rtt_map_unprotected(rd, ipa, map_level, desc);
>> +		}
>> +		/*
>> +		 * RMI_ERROR_RTT can be reported for two reasons: either the
>> +		 * RTT tables are not there, or there is an RTTE already
>> +		 * present for the address.  The above call to create RTTs
>> +		 * handles the first case, and in the second case this
>> +		 * indicates that another thread has already populated the RTTE
>> +		 * for us, so we can ignore the error and continue.
>> +		 */
>> +		if (ret && RMI_RETURN_STATUS(ret) != RMI_ERROR_RTT)
>> +			return -ENXIO;
>> +
>> +		ipa += map_size;
>> +		phys += map_size;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>  static int populate_region_cb(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
>>  			      struct page *src_page, void *opaque)
>>  {
>> -- 
>> 2.43.0
>>




More information about the linux-arm-kernel mailing list