[RFC][PATCH v2 07/13] iommu/arm-smmu-v3: Add shared VMID support for NESTING

Nicolin Chen nicolinc at nvidia.com
Mon Aug 30 19:59:17 PDT 2021


A VMID can be shared among iommu domains being attached to the same
Virtual Machine in order to improve utilization of TLB cache.

This patch implements ->set_nesting_vmid() and ->get_nesting_vmid()
to set/get s2_cfg->vmid for nesting cases, and then changes to reuse
the VMID.

Signed-off-by: Nicolin Chen <nicolinc at nvidia.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 65 +++++++++++++++++++--
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h |  1 +
 2 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index a388e318f86e..c0ae117711fa 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2051,7 +2051,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
 		mutex_unlock(&arm_smmu_asid_lock);
 	} else {
 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
-		if (cfg->vmid)
+		if (cfg->vmid && !atomic_dec_return(&smmu->vmid_refcnts[cfg->vmid]))
 			arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid);
 	}
 
@@ -2121,17 +2121,28 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
 				       struct arm_smmu_master *master,
 				       struct io_pgtable_cfg *pgtbl_cfg)
 {
-	int vmid;
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
 
-	vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
-	if (vmid < 0)
-		return vmid;
+	/*
+	 * For a nested case where there are multiple passthrough devices to a
+	 * VM, they share a commond VMID, allocated when the first passthrough
+	 * device is attached to the VM. So the cfg->vmid might be already set
+	 * in arm_smmu_set_nesting_vmid(), reported from the hypervisor. In this
+	 * case, simply reuse the shared VMID and increase its refcount.
+	 */
+	if (!cfg->vmid) {
+		int vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits);
+
+		if (vmid < 0)
+			return vmid;
+		cfg->vmid = (u16)vmid;
+	}
+
+	atomic_inc(&smmu->vmid_refcnts[cfg->vmid]);
 
 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
-	cfg->vmid	= (u16)vmid;
 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
@@ -2731,6 +2742,44 @@ static int arm_smmu_enable_nesting(struct iommu_domain *domain)
 	return ret;
 }
 
+static int arm_smmu_set_nesting_vmid(struct iommu_domain *domain, u32 vmid)
+{
+	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+	struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg;
+	int ret = 0;
+
+	if (vmid == IOMMU_VMID_INVALID)
+		return -EINVAL;
+
+	mutex_lock(&smmu_domain->init_mutex);
+	if (smmu_domain->smmu || smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
+		ret = -EPERM;
+	else
+		s2_cfg->vmid = vmid;
+	mutex_unlock(&smmu_domain->init_mutex);
+
+	return ret;
+}
+
+static int arm_smmu_get_nesting_vmid(struct iommu_domain *domain, u32 *vmid)
+{
+	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+	struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg;
+	int ret = 0;
+
+	if (!vmid)
+		return -EINVAL;
+
+	mutex_lock(&smmu_domain->init_mutex);
+	if (smmu_domain->smmu || smmu_domain->stage != ARM_SMMU_DOMAIN_NESTED)
+		ret = -EPERM;
+	else
+		*vmid = s2_cfg->vmid;
+	mutex_unlock(&smmu_domain->init_mutex);
+
+	return ret;
+}
+
 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
 {
 	return iommu_fwspec_add_ids(dev, args->args, 1);
@@ -2845,6 +2894,8 @@ static struct iommu_ops arm_smmu_ops = {
 	.release_device		= arm_smmu_release_device,
 	.device_group		= arm_smmu_device_group,
 	.enable_nesting		= arm_smmu_enable_nesting,
+	.set_nesting_vmid	= arm_smmu_set_nesting_vmid,
+	.get_nesting_vmid	= arm_smmu_get_nesting_vmid,
 	.of_xlate		= arm_smmu_of_xlate,
 	.get_resv_regions	= arm_smmu_get_resv_regions,
 	.put_resv_regions	= generic_iommu_put_resv_regions,
@@ -3530,6 +3581,8 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 	/* ASID/VMID sizes */
 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
+	smmu->vmid_refcnts = devm_kcalloc(smmu->dev, 1 << smmu->vmid_bits,
+					  sizeof(*smmu->vmid_refcnts), GFP_KERNEL);
 
 	/* IDR1 */
 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 4cb136f07914..ea2c61d52df8 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -664,6 +664,7 @@ struct arm_smmu_device {
 #define ARM_SMMU_MAX_VMIDS		(1 << 16)
 	unsigned int			vmid_bits;
 	DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS);
+	atomic_t			*vmid_refcnts;
 
 	unsigned int			ssid_bits;
 	unsigned int			sid_bits;
-- 
2.17.1




More information about the linux-arm-kernel mailing list