[ARM IOMMU] IOMMU framework concurrency issue

Jason Gunthorpe jgg at ziepe.ca
Wed Oct 18 09:19:18 PDT 2023


On Wed, Oct 18, 2023 at 04:34:20PM +0100, Robin Murphy wrote:
> On 2023-10-17 17:33, Jason Gunthorpe wrote:
> >
> > eg make sure the iommu driver is fully registered before allowing any
> > concurrent probes. Once the iommu driver is registered it will be able
> > to catch the bus notifiers and serialize things properly.
> 
> Ugh, I think I see at least how this happens for device which *don't* have
> an IOMMU - because iommu_init_device() has to transiently allocate
> dev->iommu in order to call ops->probe_device in order to discover
> that the

Hmm! Is it essential though? That ordering was C&P from before, I
didn't study it closely when I copied it..

I only checked some drivers, but something like this looked like it
could resolve the situation you described - Zhenhua is that your
situation, a non-probed device?

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 47172f1084d8fd..580e74afdb0765 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -386,6 +386,16 @@ static u32 dev_iommu_get_max_pasids(struct device *dev)
 	return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids);
 }
 
+void dev_iommu_priv_set(struct device *dev, void *priv)
+{
+	struct dev_iommu *dev_iommu;
+
+	dev_iommu = dev_iommu_get(dev);
+	if (WARN_ON(!dev_iommu))
+		return; // FIXME handle failure in drivers
+	dev->iommu->priv = priv;
+}
+
 /*
  * Init the dev->iommu and dev->iommu_group in the struct device and get the
  * driver probed
@@ -393,12 +403,10 @@ static u32 dev_iommu_get_max_pasids(struct device *dev)
 static int iommu_init_device(struct device *dev, const struct iommu_ops *ops)
 {
 	struct iommu_device *iommu_dev;
+	struct dev_iommu *dev_iommu;
 	struct iommu_group *group;
 	int ret;
 
-	if (!dev_iommu_get(dev))
-		return -ENOMEM;
-
 	if (!try_module_get(ops->owner)) {
 		ret = -EINVAL;
 		goto err_free;
@@ -409,7 +417,14 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops)
 		ret = PTR_ERR(iommu_dev);
 		goto err_module_put;
 	}
-	dev->iommu->iommu_dev = iommu_dev;
+
+	dev_iommu = dev_iommu_get(dev);
+	if (WARN_ON(!dev_iommu)) {
+		ret = -ENOMEM;
+		goto err_release;
+	}
+
+	dev_iommu->iommu_dev = iommu_dev;
 
 	ret = iommu_device_link(iommu_dev, dev);
 	if (ret)
@@ -424,9 +439,9 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops)
 	}
 	dev->iommu_group = group;
 
-	dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
+	dev_iommu->max_pasids = dev_iommu_get_max_pasids(dev);
 	if (ops->is_attach_deferred)
-		dev->iommu->attach_deferred = ops->is_attach_deferred(dev);
+		dev_iommu->attach_deferred = ops->is_attach_deferred(dev);
 	return 0;
 
 err_unlink:
@@ -438,7 +453,11 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops)
 	module_put(ops->owner);
 err_free:
 	dev->iommu->iommu_dev = NULL;
-	dev_iommu_free(dev);
+	/*
+	 * If probe_device allocated a dev->iommu and things failed later
+	 * we just leave it. We don't yet have robust locking, there
+	 * could be concurrent users.
+	 */
 	return ret;
 }
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 68c9be9293e4c0..5c25c378a13ece 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -713,10 +713,7 @@ static inline void *dev_iommu_priv_get(struct device *dev)
 		return NULL;
 }
 
-static inline void dev_iommu_priv_set(struct device *dev, void *priv)
-{
-	dev->iommu->priv = priv;
-}
+void dev_iommu_priv_set(struct device *dev, void *priv);
 
 int iommu_probe_device(struct device *dev);
 



More information about the linux-arm-kernel mailing list