[PATCH v2 00/10] Refine the locking for dev->iommu_group
Chen-Yu Tsai
wenst at chromium.org
Tue Aug 8 23:23:37 PDT 2023
On Tue, Aug 8, 2023 at 10:30 PM Jason Gunthorpe <jgg at nvidia.com> wrote:
>
> On Tue, Aug 08, 2023 at 04:02:40PM +0200, Marek Szyprowski wrote:
> > Hi Jason,
> >
> > On 08.08.2023 15:25, Jason Gunthorpe wrote:
> > > On Tue, Aug 08, 2023 at 03:08:30PM +0200, Marek Szyprowski wrote:
> > >>> Any of the drivers that use platform device as the iommu_device will
> > >>> have a problem, please try:
> > >>>
> > >>> https://lore.kernel.org/linux-iommu/ZNIz%2FNVLb6WqqvQx@nvidia.com/
> > >> I've checked and it doesn't help in my case. I will soon check why.
> > > Oh, I botched it. Forgot that the iommu_device->dev is the sysfs
> > > handle not the HW device. Maybe this:
> >
> > This fixed the early lockup, but then system hangs again a bit later. It
> > looks that this device lock in __iommu_probe_device() is really
> > problematic,
>
> Yes, I expected we'd hit something like this - I checked alot of call
> paths but missed these two. The self-probe is sneaky, but here the
> device_lock is held way up the call chain, I just missed it.
>
> The fix is to just annotate that we already hold the lock when calling
> iommu_probe_device(), since we know in those cases that we must be
> holding it:
This fixed things for me, so
Tested-by: Chen-Yu Tsai <wenst at chromium.org>
> diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
> index daa64dd687524b..3fc5e12f2f1c09 100644
> --- a/drivers/acpi/scan.c
> +++ b/drivers/acpi/scan.c
> @@ -1582,7 +1582,7 @@ static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev,
> * iommu_probe_device() call for dev, replay it to get things in order.
> */
> if (!err && dev->bus)
> - err = iommu_probe_device(dev);
> + err = iommu_probe_device_locked(dev);
>
> /* Ignore all other errors apart from EPROBE_DEFER */
> if (err == -EPROBE_DEFER) {
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 7dbbcffac21930..b867d7f22954e9 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -264,6 +264,7 @@ int iommu_device_register(struct iommu_device *iommu,
> return -EBUSY;
>
> iommu->ops = ops;
> + iommu->hwdev = hwdev;
> if (hwdev)
> iommu->fwnode = dev_fwnode(hwdev);
>
> @@ -273,7 +274,7 @@ int iommu_device_register(struct iommu_device *iommu,
>
> for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) {
> iommu_buses[i]->iommu_ops = ops;
> - err = bus_iommu_probe(iommu_buses[i]);
> + err = bus_iommu_probe(iommu_buses[i], iommu);
> }
> if (err)
> iommu_device_unregister(iommu);
> @@ -452,24 +453,23 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
> struct group_device *gdev;
> int ret;
>
> - if (!ops)
> - return -ENODEV;
> /*
> * Allow __iommu_probe_device() to be safely called in parallel,
> * both dev->iommu_group and the initial setup of dev->iommu are
> * protected this way.
> */
> - device_lock(dev);
> + device_lock_assert(dev);
> +
> + if (!ops)
> + return -ENODEV;
>
> /* Device is probed already if in a group */
> - if (dev->iommu_group) {
> - ret = 0;
> - goto out_unlock;
> - }
> + if (dev->iommu_group)
> + return 0;
>
> ret = iommu_init_device(dev, ops);
> if (ret)
> - goto out_unlock;
> + return ret;
>
> group = dev->iommu_group;
> gdev = iommu_group_alloc_device(group, dev);
> @@ -505,7 +505,6 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
> list_add_tail(&group->entry, group_list);
> }
> mutex_unlock(&group->mutex);
> - device_unlock(dev);
>
> if (dev_is_pci(dev))
> iommu_dma_set_pci_32bit_workaround(dev);
> @@ -519,12 +518,10 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
> iommu_deinit_device(dev);
> mutex_unlock(&group->mutex);
> iommu_group_put(group);
> -out_unlock:
> - device_unlock(dev);
> return ret;
> }
>
> -int iommu_probe_device(struct device *dev)
> +int iommu_probe_device_locked(struct device *dev)
> {
> const struct iommu_ops *ops;
> int ret;
> @@ -540,6 +537,16 @@ int iommu_probe_device(struct device *dev)
> return 0;
> }
>
> +int iommu_probe_device(struct device *dev)
> +{
> + int ret;
> +
> + device_lock(dev);
> + ret = iommu_probe_device_locked(dev);
> + device_unlock(dev);
> + return ret;
> +}
> +
> static void __iommu_group_free_device(struct iommu_group *group,
> struct group_device *grp_dev)
> {
> @@ -1784,12 +1791,26 @@ struct iommu_domain *iommu_group_default_domain(struct iommu_group *group)
> return group->default_domain;
> }
>
> +struct probe_iommu_args {
> + struct list_head *group_list;
> + struct iommu_device *iommu;
> +};
> +
> static int probe_iommu_group(struct device *dev, void *data)
> {
> - struct list_head *group_list = data;
> + struct probe_iommu_args *args = data;
> + bool need_lock;
> int ret;
>
> - ret = __iommu_probe_device(dev, group_list);
> + /* Probing the iommu itself is always done under the device_lock */
> + need_lock = !args->iommu || args->iommu->hwdev != dev;
> +
> + if (need_lock)
> + device_lock(dev);
> + ret = __iommu_probe_device(dev, args->group_list);
> + if (need_lock)
> + device_unlock(dev);
> +
> if (ret == -ENODEV)
> ret = 0;
>
> @@ -1858,13 +1879,16 @@ static void iommu_group_do_probe_finalize(struct device *dev)
> ops->probe_finalize(dev);
> }
>
> -int bus_iommu_probe(const struct bus_type *bus)
> +int bus_iommu_probe(const struct bus_type *bus, struct iommu_device *iommu)
> {
> struct iommu_group *group, *next;
> + struct probe_iommu_args args = {};
> LIST_HEAD(group_list);
> int ret;
>
> - ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group);
> + args.group_list = &group_list;
> + args.iommu = iommu;
> + ret = bus_for_each_dev(bus, NULL, &args, probe_iommu_group);
> if (ret)
> return ret;
>
> diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
> index 157b286e36bf3a..b5b7d4bd2cefb9 100644
> --- a/drivers/iommu/of_iommu.c
> +++ b/drivers/iommu/of_iommu.c
> @@ -160,7 +160,7 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
> * probe for dev, replay it to get things in order.
> */
> if (!err && dev->bus)
> - err = iommu_probe_device(dev);
> + err = iommu_probe_device_locked(dev);
>
> /* Ignore all other errors apart from EPROBE_DEFER */
> if (err == -EPROBE_DEFER) {
> diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
> index 97c45f50bf4332..828679abef7503 100644
> --- a/drivers/iommu/omap-iommu.c
> +++ b/drivers/iommu/omap-iommu.c
> @@ -1234,6 +1234,10 @@ static int omap_iommu_probe(struct platform_device *pdev)
> if (err)
> goto out_sysfs;
> obj->has_iommu_driver = true;
> + } else {
> + /* Re-probe bus to probe device attached to this IOMMU */
> + obj->iommu.hwdev = &pdev->dev;
> + bus_iommu_probe(&platform_bus_type, &obj->iommu);
> }
>
> pm_runtime_enable(obj->dev);
> @@ -1242,9 +1246,6 @@ static int omap_iommu_probe(struct platform_device *pdev)
>
> dev_info(&pdev->dev, "%s registered\n", obj->name);
>
> - /* Re-probe bus to probe device attached to this IOMMU */
> - bus_iommu_probe(&platform_bus_type);
> -
> return 0;
>
> out_sysfs:
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index f1e18e81fca78b..96782bfb384462 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -361,6 +361,7 @@ struct iommu_domain_ops {
> * @list: Used by the iommu-core to keep a list of registered iommus
> * @ops: iommu-ops for talking to this iommu
> * @dev: struct device for sysfs handling
> + * @hwdev: The device HW that controls the iommu
> * @singleton_group: Used internally for drivers that have only one group
> * @max_pasids: number of supported PASIDs
> */
> @@ -369,6 +370,7 @@ struct iommu_device {
> const struct iommu_ops *ops;
> struct fwnode_handle *fwnode;
> struct device *dev;
> + struct device *hwdev;
> struct iommu_group *singleton_group;
> u32 max_pasids;
> };
> @@ -465,7 +467,8 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
> return dev->iommu->iommu_dev->ops;
> }
>
> -extern int bus_iommu_probe(const struct bus_type *bus);
> +extern int bus_iommu_probe(const struct bus_type *bus,
> + struct iommu_device *iommu);
> extern bool iommu_present(const struct bus_type *bus);
> extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap);
> extern bool iommu_group_has_isolated_msi(struct iommu_group *group);
> @@ -709,6 +712,7 @@ static inline void dev_iommu_priv_set(struct device *dev, void *priv)
> }
>
> int iommu_probe_device(struct device *dev);
> +int iommu_probe_device_locked(struct device *dev);
>
> int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f);
> int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f);
>
>
More information about the Linux-rockchip
mailing list