[PATCH v4 06/24] iommu: Defer iommu_group free via kfree_rcu()

Nicolin Chen nicolinc at nvidia.com
Mon May 18 20:38:49 PDT 2026


dev->iommu_group will be read in an ISR-context to look up a group_device
for fault reporting, in which case mutex cannot be used. For that read to
be safe, two things are needed:

 (1) The iommu_group memory that dev->iommu_group points to must outlive
     any in-flight rcu_read_lock section. Add rcu_head to iommu_group and
     switch iommu_group_release() to calling kfree_rcu().

 (2) The publication of dev->iommu_group must pair with rcu_dereference()
     at the upcoming reader (cannot hold mutex but rcu_read_lock), so the
     writers must use rcu_assign_pointer().

Existing readers do not use rcu_dereference(); they retain their current
synchronization model. Apply a __rcu __force cast at the writer sites to
satisfy sparse without forcing every reader to convert.

New reader added by the subsequent change uses rcu_dereference() only, to
reach group->devices for a list lookup. And it does not touch group->name
and other fields. The kfree_rcu() here is supposed to keep group->devices
alive across the read-side critical section; other fields will not affect
the reader.

Note: this change alone does not yet make group->devices iteration safe
under rcu_read_lock(); a subsequent change will convert the group_device
list to RCU and switch struct group_device to kfree_rcu().

Assisted-by: Claude:claude-opus-4-7
Signed-off-by: Nicolin Chen <nicolinc at nvidia.com>
---
 drivers/iommu/iommu.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index e68c7b142ad5a..6727b6f7797bd 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -71,8 +71,12 @@ struct iommu_group {
 	 */
 	unsigned int recovery_cnt;
 	void *owner;
+	struct rcu_head rcu;
 };
 
+#define dev_iommu_group_rcu(dev) \
+	(*((struct iommu_group __rcu __force **)&(dev)->iommu_group))
+
 enum gdev_blocked {
 	BLOCKED_NO = 0, /* Not blocked */
 	BLOCKED_RESETTING, /* PCI reset in flight */
@@ -531,7 +535,7 @@ static int iommu_init_device(struct device *dev)
 		ret = PTR_ERR(group);
 		goto err_unlink;
 	}
-	dev->iommu_group = group;
+	rcu_assign_pointer(dev_iommu_group_rcu(dev), group);
 
 	dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
 	if (ops->is_attach_deferred)
@@ -613,7 +617,7 @@ static void iommu_deinit_device(struct device *dev)
 	}
 
 	/* Caller must put iommu_group */
-	dev->iommu_group = NULL;
+	rcu_assign_pointer(dev_iommu_group_rcu(dev), NULL);
 	module_put(ops->owner);
 	dev_iommu_free(dev);
 #ifdef CONFIG_IOMMU_DMA
@@ -772,7 +776,7 @@ static void __iommu_group_remove_device(struct device *dev)
 		if (dev_has_iommu(dev))
 			iommu_deinit_device(dev);
 		else
-			dev->iommu_group = NULL;
+			rcu_assign_pointer(dev_iommu_group_rcu(dev), NULL);
 		break;
 	}
 	mutex_unlock(&group->mutex);
@@ -1059,7 +1063,7 @@ static void iommu_group_release(struct kobject *kobj)
 	WARN_ON(group->blocking_domain);
 
 	kfree(group->name);
-	kfree(group);
+	kfree_rcu(group, rcu);
 }
 
 static const struct kobj_type iommu_group_ktype = {
@@ -1344,7 +1348,7 @@ int iommu_group_add_device(struct iommu_group *group, struct device *dev)
 		return PTR_ERR(gdev);
 
 	iommu_group_ref_get(group);
-	dev->iommu_group = group;
+	rcu_assign_pointer(dev_iommu_group_rcu(dev), group);
 
 	mutex_lock(&group->mutex);
 	list_add_tail(&gdev->list, &group->devices);
-- 
2.43.0




More information about the linux-arm-kernel mailing list