[PATCH 09/10] iommu: Complete the locking for dev->iommu_group

Jason Gunthorpe jgg at nvidia.com
Tue Jul 18 12:05:57 PDT 2023


Revise the locking for dev->iommu_group so that it has three safe ways to
access it:

 - It is read by a probe'd device driver. So long as a device driver is
   probed the dev->iommu_group will be guaranteed stable without further
   locking.

 - Read under the device_lock(), this primarily protects against
   parallel probe of the same device, and parallel probe/remove

 - Read/Write under the global dev_iommu_group_lock. This is used during
   probe time discovery of groups. Device drivers will scan unlocked
   portions of the device tree to locate an already existing group. These
   scans can access the dev->iommu_group under the global lock to single
   thread determining and installing the group. This ensures that groups
   are reliably formed.

Narrow the scope of the global dev_iommu_group_lock to be only during the
dev->iommu_group setup, and not for the entire probing.

Prior patches removed the various races inherent to the probe process by
consolidating all the work under the group->mutex. In this configuration
it is fine if two devices race to the group_device step of a new
iommu_group, the group->mutex locking will ensure the group_device and
domain setup part remains properly ordered.

Add the missing locking on the remove paths. For iommu_deinit_device() it
is necessary to hold the dev_iommu_group_lock due to possible races during
probe error unwind.

Fully lock the iommu_group_add/remove_device() path so we can use lockdep
assertions. Other than lockdep this is redundant, VFIO no-iommu doesn't
use group clustering.

For iommu_release_device() it is redundant, as we expect no external
references to the struct device by this point, but it is harmless so
add the missing lock to allow lockdep assertions to work.

This resolves the remarks of the comment in __iommu_probe_device().

Signed-off-by: Jason Gunthorpe <jgg at nvidia.com>
---
 drivers/iommu/iommu.c | 54 +++++++++++++++++++++++++++++--------------
 1 file changed, 37 insertions(+), 17 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 1e0c5d9a0370fb..0f75428be79d4d 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -369,14 +369,17 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops)
 	if (ret)
 		goto err_release;
 
+	mutex_lock(&dev_iommu_group_lock);
 	group = ops->device_group(dev);
 	if (WARN_ON_ONCE(group == NULL))
 		group = ERR_PTR(-EINVAL);
 	if (IS_ERR(group)) {
+		mutex_unlock(&dev_iommu_group_lock);
 		ret = PTR_ERR(group);
 		goto err_unlink;
 	}
 	dev->iommu_group = group;
+	mutex_unlock(&dev_iommu_group_lock);
 
 	dev->iommu->iommu_dev = iommu_dev;
 	dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
@@ -434,7 +437,9 @@ static void iommu_deinit_device(struct device *dev)
 	}
 
 	/* Caller must put iommu_group */
+	mutex_lock(&dev_iommu_group_lock);
 	dev->iommu_group = NULL;
+	mutex_unlock(&dev_iommu_group_lock);
 	module_put(ops->owner);
 	dev_iommu_free(dev);
 }
@@ -449,13 +454,11 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
 	if (!ops)
 		return -ENODEV;
 	/*
-	 * Serialise to avoid races between IOMMU drivers registering in
-	 * parallel and/or the "replay" calls from ACPI/OF code via client
-	 * driver probe. Once the latter have been cleaned up we should
-	 * probably be able to use device_lock() here to minimise the scope,
-	 * but for now enforcing a simple global ordering is fine.
+	 * Allow __iommu_probe_device() to be safely called in parallel,
+	 * both dev->iommu_group and the initial setup of dev->iommu are
+	 * protected this way.
 	 */
-	mutex_lock(&dev_iommu_group_lock);
+	device_lock(dev);
 
 	/* Device is probed already if in a group */
 	if (dev->iommu_group) {
@@ -501,7 +504,7 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
 			list_add_tail(&group->entry, group_list);
 	}
 	mutex_unlock(&group->mutex);
-	mutex_unlock(&dev_iommu_group_lock);
+	device_unlock(dev);
 
 	if (dev_is_pci(dev))
 		iommu_dma_set_pci_32bit_workaround(dev);
@@ -516,8 +519,7 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
 	mutex_unlock(&group->mutex);
 	iommu_group_put(group);
 out_unlock:
-	mutex_unlock(&dev_iommu_group_lock);
-
+	device_unlock(dev);
 	return ret;
 }
 
@@ -566,6 +568,7 @@ static void __iommu_group_remove_device(struct device *dev)
 	struct iommu_group *group = dev->iommu_group;
 	struct group_device *device;
 
+	device_lock_assert(dev);
 	mutex_lock(&group->mutex);
 	for_each_group_device(group, device) {
 		if (device->dev != dev)
@@ -590,14 +593,23 @@ static void __iommu_group_remove_device(struct device *dev)
 
 static void iommu_release_device(struct device *dev)
 {
-	struct iommu_group *group = dev->iommu_group;
+	struct iommu_group *group;
 
+	/*
+	 * This locking for dev->iommu_group is overkill when this is called
+	 * from the BUS_NOTIFY_REMOVED_DEVICE, as we don't expect any other
+	 * threads to have a reference to the device at that point. Keep it
+	 * because this isn't a performance path and helps lockdep analysis.
+	 */
+	device_lock(dev);
+	group = dev->iommu_group;
 	if (group)
 		__iommu_group_remove_device(dev);
 
 	/* Free any fwspec if no iommu_driver was ever attached */
 	if (dev->iommu)
 		dev_iommu_free(dev);
+	device_unlock(dev);
 }
 
 static int __init iommu_set_def_domain_type(char *str)
@@ -1080,6 +1092,8 @@ static struct group_device *iommu_group_alloc_device(struct iommu_group *group,
 	int ret, i = 0;
 	struct group_device *device;
 
+	device_lock_assert(dev);
+
 	device = kzalloc(sizeof(*device), GFP_KERNEL);
 	if (!device)
 		return ERR_PTR(-ENOMEM);
@@ -1141,9 +1155,12 @@ int iommu_group_add_device(struct iommu_group *group, struct device *dev)
 {
 	struct group_device *gdev;
 
+	device_lock(dev);
 	gdev = iommu_group_alloc_device(group, dev);
-	if (IS_ERR(gdev))
+	if (IS_ERR(gdev)) {
+		device_unlock(dev);
 		return PTR_ERR(gdev);
+	}
 
 	iommu_group_ref_get(group);
 	dev->iommu_group = group;
@@ -1151,6 +1168,7 @@ int iommu_group_add_device(struct iommu_group *group, struct device *dev)
 	mutex_lock(&group->mutex);
 	list_add_tail(&gdev->list, &group->devices);
 	mutex_unlock(&group->mutex);
+	device_unlock(dev);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(iommu_group_add_device);
@@ -1164,14 +1182,16 @@ EXPORT_SYMBOL_GPL(iommu_group_add_device);
  */
 void iommu_group_remove_device(struct device *dev)
 {
-	struct iommu_group *group = dev->iommu_group;
+	struct iommu_group *group;
 
-	if (!group)
-		return;
+	device_lock(dev);
+	group = dev->iommu_group;
+	if (group) {
+		dev_info(dev, "Removing from iommu group %d\n", group->id);
+		__iommu_group_remove_device(dev);
+	}
+	device_unlock(dev);
 
-	dev_info(dev, "Removing from iommu group %d\n", group->id);
-
-	__iommu_group_remove_device(dev);
 }
 EXPORT_SYMBOL_GPL(iommu_group_remove_device);
 
-- 
2.41.0




More information about the Linux-rockchip mailing list