[PATCH v2 00/10] Refine the locking for dev->iommu_group
Jason Gunthorpe
jgg at nvidia.com
Tue Aug 8 07:30:23 PDT 2023
On Tue, Aug 08, 2023 at 04:02:40PM +0200, Marek Szyprowski wrote:
> Hi Jason,
>
> On 08.08.2023 15:25, Jason Gunthorpe wrote:
> > On Tue, Aug 08, 2023 at 03:08:30PM +0200, Marek Szyprowski wrote:
> >>> Any of the drivers that use platform device as the iommu_device will
> >>> have a problem, please try:
> >>>
> >>> https://lore.kernel.org/linux-iommu/ZNIz%2FNVLb6WqqvQx@nvidia.com/
> >> I've checked and it doesn't help in my case. I will soon check why.
> > Oh, I botched it. Forgot that the iommu_device->dev is the sysfs
> > handle not the HW device. Maybe this:
>
> This fixed the early lockup, but then system hangs again a bit later. It
> looks that this device lock in __iommu_probe_device() is really
> problematic,
Yes, I expected we'd hit something like this - I checked alot of call
paths but missed these two. The self-probe is sneaky, but here the
device_lock is held way up the call chain, I just missed it.
The fix is to just annotate that we already hold the lock when calling
iommu_probe_device(), since we know in those cases that we must be
holding it:
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index daa64dd687524b..3fc5e12f2f1c09 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1582,7 +1582,7 @@ static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev,
* iommu_probe_device() call for dev, replay it to get things in order.
*/
if (!err && dev->bus)
- err = iommu_probe_device(dev);
+ err = iommu_probe_device_locked(dev);
/* Ignore all other errors apart from EPROBE_DEFER */
if (err == -EPROBE_DEFER) {
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 7dbbcffac21930..b867d7f22954e9 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -264,6 +264,7 @@ int iommu_device_register(struct iommu_device *iommu,
return -EBUSY;
iommu->ops = ops;
+ iommu->hwdev = hwdev;
if (hwdev)
iommu->fwnode = dev_fwnode(hwdev);
@@ -273,7 +274,7 @@ int iommu_device_register(struct iommu_device *iommu,
for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) {
iommu_buses[i]->iommu_ops = ops;
- err = bus_iommu_probe(iommu_buses[i]);
+ err = bus_iommu_probe(iommu_buses[i], iommu);
}
if (err)
iommu_device_unregister(iommu);
@@ -452,24 +453,23 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
struct group_device *gdev;
int ret;
- if (!ops)
- return -ENODEV;
/*
* Allow __iommu_probe_device() to be safely called in parallel,
* both dev->iommu_group and the initial setup of dev->iommu are
* protected this way.
*/
- device_lock(dev);
+ device_lock_assert(dev);
+
+ if (!ops)
+ return -ENODEV;
/* Device is probed already if in a group */
- if (dev->iommu_group) {
- ret = 0;
- goto out_unlock;
- }
+ if (dev->iommu_group)
+ return 0;
ret = iommu_init_device(dev, ops);
if (ret)
- goto out_unlock;
+ return ret;
group = dev->iommu_group;
gdev = iommu_group_alloc_device(group, dev);
@@ -505,7 +505,6 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
list_add_tail(&group->entry, group_list);
}
mutex_unlock(&group->mutex);
- device_unlock(dev);
if (dev_is_pci(dev))
iommu_dma_set_pci_32bit_workaround(dev);
@@ -519,12 +518,10 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
iommu_deinit_device(dev);
mutex_unlock(&group->mutex);
iommu_group_put(group);
-out_unlock:
- device_unlock(dev);
return ret;
}
-int iommu_probe_device(struct device *dev)
+int iommu_probe_device_locked(struct device *dev)
{
const struct iommu_ops *ops;
int ret;
@@ -540,6 +537,16 @@ int iommu_probe_device(struct device *dev)
return 0;
}
+int iommu_probe_device(struct device *dev)
+{
+ int ret;
+
+ device_lock(dev);
+ ret = iommu_probe_device_locked(dev);
+ device_unlock(dev);
+ return ret;
+}
+
static void __iommu_group_free_device(struct iommu_group *group,
struct group_device *grp_dev)
{
@@ -1784,12 +1791,26 @@ struct iommu_domain *iommu_group_default_domain(struct iommu_group *group)
return group->default_domain;
}
+struct probe_iommu_args {
+ struct list_head *group_list;
+ struct iommu_device *iommu;
+};
+
static int probe_iommu_group(struct device *dev, void *data)
{
- struct list_head *group_list = data;
+ struct probe_iommu_args *args = data;
+ bool need_lock;
int ret;
- ret = __iommu_probe_device(dev, group_list);
+ /* Probing the iommu itself is always done under the device_lock */
+ need_lock = !args->iommu || args->iommu->hwdev != dev;
+
+ if (need_lock)
+ device_lock(dev);
+ ret = __iommu_probe_device(dev, args->group_list);
+ if (need_lock)
+ device_unlock(dev);
+
if (ret == -ENODEV)
ret = 0;
@@ -1858,13 +1879,16 @@ static void iommu_group_do_probe_finalize(struct device *dev)
ops->probe_finalize(dev);
}
-int bus_iommu_probe(const struct bus_type *bus)
+int bus_iommu_probe(const struct bus_type *bus, struct iommu_device *iommu)
{
struct iommu_group *group, *next;
+ struct probe_iommu_args args = {};
LIST_HEAD(group_list);
int ret;
- ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group);
+ args.group_list = &group_list;
+ args.iommu = iommu;
+ ret = bus_for_each_dev(bus, NULL, &args, probe_iommu_group);
if (ret)
return ret;
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 157b286e36bf3a..b5b7d4bd2cefb9 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -160,7 +160,7 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
* probe for dev, replay it to get things in order.
*/
if (!err && dev->bus)
- err = iommu_probe_device(dev);
+ err = iommu_probe_device_locked(dev);
/* Ignore all other errors apart from EPROBE_DEFER */
if (err == -EPROBE_DEFER) {
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 97c45f50bf4332..828679abef7503 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1234,6 +1234,10 @@ static int omap_iommu_probe(struct platform_device *pdev)
if (err)
goto out_sysfs;
obj->has_iommu_driver = true;
+ } else {
+ /* Re-probe bus to probe device attached to this IOMMU */
+ obj->iommu.hwdev = &pdev->dev;
+ bus_iommu_probe(&platform_bus_type, &obj->iommu);
}
pm_runtime_enable(obj->dev);
@@ -1242,9 +1246,6 @@ static int omap_iommu_probe(struct platform_device *pdev)
dev_info(&pdev->dev, "%s registered\n", obj->name);
- /* Re-probe bus to probe device attached to this IOMMU */
- bus_iommu_probe(&platform_bus_type);
-
return 0;
out_sysfs:
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index f1e18e81fca78b..96782bfb384462 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -361,6 +361,7 @@ struct iommu_domain_ops {
* @list: Used by the iommu-core to keep a list of registered iommus
* @ops: iommu-ops for talking to this iommu
* @dev: struct device for sysfs handling
+ * @hwdev: The device HW that controls the iommu
* @singleton_group: Used internally for drivers that have only one group
* @max_pasids: number of supported PASIDs
*/
@@ -369,6 +370,7 @@ struct iommu_device {
const struct iommu_ops *ops;
struct fwnode_handle *fwnode;
struct device *dev;
+ struct device *hwdev;
struct iommu_group *singleton_group;
u32 max_pasids;
};
@@ -465,7 +467,8 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
return dev->iommu->iommu_dev->ops;
}
-extern int bus_iommu_probe(const struct bus_type *bus);
+extern int bus_iommu_probe(const struct bus_type *bus,
+ struct iommu_device *iommu);
extern bool iommu_present(const struct bus_type *bus);
extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap);
extern bool iommu_group_has_isolated_msi(struct iommu_group *group);
@@ -709,6 +712,7 @@ static inline void dev_iommu_priv_set(struct device *dev, void *priv)
}
int iommu_probe_device(struct device *dev);
+int iommu_probe_device_locked(struct device *dev);
int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f);
int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f);
More information about the Linux-rockchip
mailing list