[PATCH v3 1/6] kvm: determine memory type from VMA

ankita at nvidia.com ankita at nvidia.com
Wed Apr 5 11:01:29 PDT 2023


From: Ankit Agrawal <ankita at nvidia.com>

Each VM stores the requires pgprots for its mappings in the
vma->pgprot. Based on this we can determine the desired MT_DEVICE_*
for the VMA directly, and do not have to guess based on heuristics
based on pfn_is_map_memory().

There are the following kinds of pgprot available to userspace and their
corresponding type:
pgprot_noncached -> MT_DEVICE_nGnRnE
pgprot_writecombine -> MT_NORMAL_NC
pgprot_device -> MT_DEVICE_nGnRE
pgprot_tagged -> MT_NORMAL_TAGGED

Decode the relevant MT_* types in use and translate them into the
corresponding KVM_PGTABLEPROT_*:

 - MT_DEVICE_nGnRE -> KVM_PGTABLE_PROT_DEVICE_nGnRE (device)
 - MT_DEVICE_nGnRnE -> KVM_PGTABLE_PROT_DEVICE_nGnRnE (noncached)
 - MT_NORMAL/_TAGGED/_NC -> 0

The selection of 0 for the S2 KVM_PGTABLE_PROT_DEVICE_nGnRnE is based
on [2].

Also worth noting is the result of the stage-1 and stage-2. Ref [3]
If FWB not set, then the combination is the one that is more restrictive.
The sequence from lowest restriction to the highest:
DEVICE_nGnRnE -> DEVICE_nGnRE -> NORMAL/_TAGGED/_NC
If FWB is set, then stage-2 mapping type overrides the stage-1 [1].

This solves a problem where KVM cannot preserve the MT_NORMAL memory
type for non-struct page backed memory into the S2 mapping. Instead
the VMA creator determines the MT type and the S2 will follow it.

[1] https://developer.arm.com/documentation/102376/0100/Combining-Stage-1-and-Stage-2-attributes
[2] ARMv8 reference manual: https://developer.arm.com/documentation/ddi0487/gb/ Section D5.5.3, Table D5-38
[3] ARMv8 reference manual: https://developer.arm.com/documentation/ddi0487/gb/ Table G5-20 on page G5-6330

Signed-off-by: Ankit Agrawal <ankita at nvidia.com>
---
 arch/arm64/include/asm/kvm_pgtable.h |  8 +++++---
 arch/arm64/include/asm/memory.h      |  6 ++++--
 arch/arm64/kvm/hyp/pgtable.c         | 16 +++++++++++-----
 arch/arm64/kvm/mmu.c                 | 27 ++++++++++++++++++++++-----
 4 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 4cd6762bda80..d3166b6e6329 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -150,7 +150,8 @@ enum kvm_pgtable_stage2_flags {
  * @KVM_PGTABLE_PROT_X:		Execute permission.
  * @KVM_PGTABLE_PROT_W:		Write permission.
  * @KVM_PGTABLE_PROT_R:		Read permission.
- * @KVM_PGTABLE_PROT_DEVICE:	Device attributes.
+ * @KVM_PGTABLE_PROT_DEVICE_nGnRE:	Device nGnRE attributes.
+ * @KVM_PGTABLE_PROT_DEVICE_nGnRnE:	Device nGnRnE attributes.
  * @KVM_PGTABLE_PROT_SW0:	Software bit 0.
  * @KVM_PGTABLE_PROT_SW1:	Software bit 1.
  * @KVM_PGTABLE_PROT_SW2:	Software bit 2.
@@ -161,7 +162,8 @@ enum kvm_pgtable_prot {
 	KVM_PGTABLE_PROT_W			= BIT(1),
 	KVM_PGTABLE_PROT_R			= BIT(2),
 
-	KVM_PGTABLE_PROT_DEVICE			= BIT(3),
+	KVM_PGTABLE_PROT_DEVICE_nGnRE		= BIT(3),
+	KVM_PGTABLE_PROT_DEVICE_nGnRnE		= BIT(4),
 
 	KVM_PGTABLE_PROT_SW0			= BIT(55),
 	KVM_PGTABLE_PROT_SW1			= BIT(56),
@@ -178,7 +180,7 @@ enum kvm_pgtable_prot {
 #define PAGE_HYP		KVM_PGTABLE_PROT_RW
 #define PAGE_HYP_EXEC		(KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X)
 #define PAGE_HYP_RO		(KVM_PGTABLE_PROT_R)
-#define PAGE_HYP_DEVICE		(PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
+#define PAGE_HYP_DEVICE		(PAGE_HYP | KVM_PGTABLE_PROT_DEVICE_nGnRE)
 
 typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
 					   enum kvm_pgtable_prot prot);
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 78e5163836a0..4ebbc4b1ba4d 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -147,14 +147,16 @@
  * Memory types for Stage-2 translation
  */
 #define MT_S2_NORMAL		0xf
+#define MT_S2_DEVICE_nGnRnE 0x0
 #define MT_S2_DEVICE_nGnRE	0x1
 
 /*
  * Memory types for Stage-2 translation when ID_AA64MMFR2_EL1.FWB is 0001
  * Stage-2 enforces Normal-WB and Device-nGnRE
  */
-#define MT_S2_FWB_NORMAL	6
-#define MT_S2_FWB_DEVICE_nGnRE	1
+#define MT_S2_FWB_NORMAL	0x6
+#define MT_S2_FWB_DEVICE_nGnRnE 0x0
+#define MT_S2_FWB_DEVICE_nGnRE	0x1
 
 #ifdef CONFIG_ARM64_4K_PAGES
 #define IOREMAP_MAX_ORDER	(PUD_SHIFT)
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 3d61bd3e591d..7a8238b41590 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -355,7 +355,7 @@ struct hyp_map_data {
 
 static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
 {
-	bool device = prot & KVM_PGTABLE_PROT_DEVICE;
+	bool device = prot & KVM_PGTABLE_PROT_DEVICE_nGnRE;
 	u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL;
 	kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype);
 	u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS;
@@ -636,14 +636,20 @@ static bool stage2_has_fwb(struct kvm_pgtable *pgt)
 static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
 				kvm_pte_t *ptep)
 {
-	bool device = prot & KVM_PGTABLE_PROT_DEVICE;
-	kvm_pte_t attr = device ? KVM_S2_MEMATTR(pgt, DEVICE_nGnRE) :
-			    KVM_S2_MEMATTR(pgt, NORMAL);
 	u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
+	kvm_pte_t attr;
+
+	if (prot & KVM_PGTABLE_PROT_DEVICE_nGnRE)
+		attr = KVM_S2_MEMATTR(pgt, DEVICE_nGnRE);
+	else if (prot & KVM_PGTABLE_PROT_DEVICE_nGnRnE)
+		attr = KVM_S2_MEMATTR(pgt, DEVICE_nGnRnE);
+	else
+		attr = KVM_S2_MEMATTR(pgt, NORMAL);
 
 	if (!(prot & KVM_PGTABLE_PROT_X))
 		attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
-	else if (device)
+	else if (prot & KVM_PGTABLE_PROT_DEVICE_nGnRE ||
+		 prot & KVM_PGTABLE_PROT_DEVICE_nGnRnE)
 		return -EINVAL;
 
 	if (prot & KVM_PGTABLE_PROT_R)
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 7113587222ff..8d63aa951c33 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -897,7 +897,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 	int ret = 0;
 	struct kvm_mmu_memory_cache cache = { .gfp_zero = __GFP_ZERO };
 	struct kvm_pgtable *pgt = kvm->arch.mmu.pgt;
-	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE |
+	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_DEVICE_nGnRE |
 				     KVM_PGTABLE_PROT_R |
 				     (writable ? KVM_PGTABLE_PROT_W : 0);
 
@@ -1186,6 +1186,15 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
 	return vma->vm_flags & VM_MTE_ALLOWED;
 }
 
+/*
+ * Determine the memory region cacheability from VMA's pgprot. This
+ * is used to set the stage 2 PTEs.
+ */
+static unsigned long mapping_type(pgprot_t page_prot)
+{
+	return ((pgprot_val(page_prot) & PTE_ATTRINDX_MASK) >> 2);
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot, unsigned long hva,
 			  unsigned long fault_status)
@@ -1368,10 +1377,18 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	if (exec_fault)
 		prot |= KVM_PGTABLE_PROT_X;
 
-	if (device)
-		prot |= KVM_PGTABLE_PROT_DEVICE;
-	else if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
-		prot |= KVM_PGTABLE_PROT_X;
+	switch (mapping_type(vma->vm_page_prot)) {
+	case MT_DEVICE_nGnRE:
+		prot |= KVM_PGTABLE_PROT_DEVICE_nGnRE;
+		break;
+	case MT_DEVICE_nGnRnE:
+		prot |= KVM_PGTABLE_PROT_DEVICE_nGnRnE;
+		break;
+		/* MT_NORMAL/_TAGGED/_NC */
+	default:
+		if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
+			prot |= KVM_PGTABLE_PROT_X;
+	}
 
 	/*
 	 * Under the premise of getting a FSC_PERM fault, we just need to relax
-- 
2.17.1




More information about the linux-arm-kernel mailing list