[PATCH v2 00/10] Refine the locking for dev->iommu_group

Chen-Yu Tsai wenst at chromium.org
Tue Aug 8 23:23:37 PDT 2023


On Tue, Aug 8, 2023 at 10:30 PM Jason Gunthorpe <jgg at nvidia.com> wrote:
>
> On Tue, Aug 08, 2023 at 04:02:40PM +0200, Marek Szyprowski wrote:
> > Hi Jason,
> >
> > On 08.08.2023 15:25, Jason Gunthorpe wrote:
> > > On Tue, Aug 08, 2023 at 03:08:30PM +0200, Marek Szyprowski wrote:
> > >>> Any of the drivers that use platform device as the iommu_device will
> > >>> have a problem, please try:
> > >>>
> > >>> https://lore.kernel.org/linux-iommu/ZNIz%2FNVLb6WqqvQx@nvidia.com/
> > >> I've checked and it doesn't help in my case. I will soon check why.
> > > Oh, I botched it. Forgot that the iommu_device->dev is the sysfs
> > > handle not the HW device. Maybe this:
> >
> > This fixed the early lockup, but then system hangs again a bit later. It
> > looks that this device lock in __iommu_probe_device() is really
> > problematic,
>
> Yes, I expected we'd hit something like this - I checked alot of call
> paths but missed these two. The self-probe is sneaky, but here the
> device_lock is held way up the call chain, I just missed it.
>
> The fix is to just annotate that we already hold the lock when calling
> iommu_probe_device(), since we know in those cases that we must be
> holding it:

This fixed things for me, so

Tested-by: Chen-Yu Tsai <wenst at chromium.org>

> diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
> index daa64dd687524b..3fc5e12f2f1c09 100644
> --- a/drivers/acpi/scan.c
> +++ b/drivers/acpi/scan.c
> @@ -1582,7 +1582,7 @@ static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev,
>          * iommu_probe_device() call for dev, replay it to get things in order.
>          */
>         if (!err && dev->bus)
> -               err = iommu_probe_device(dev);
> +               err = iommu_probe_device_locked(dev);
>
>         /* Ignore all other errors apart from EPROBE_DEFER */
>         if (err == -EPROBE_DEFER) {
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 7dbbcffac21930..b867d7f22954e9 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -264,6 +264,7 @@ int iommu_device_register(struct iommu_device *iommu,
>                 return -EBUSY;
>
>         iommu->ops = ops;
> +       iommu->hwdev = hwdev;
>         if (hwdev)
>                 iommu->fwnode = dev_fwnode(hwdev);
>
> @@ -273,7 +274,7 @@ int iommu_device_register(struct iommu_device *iommu,
>
>         for (int i = 0; i < ARRAY_SIZE(iommu_buses) && !err; i++) {
>                 iommu_buses[i]->iommu_ops = ops;
> -               err = bus_iommu_probe(iommu_buses[i]);
> +               err = bus_iommu_probe(iommu_buses[i], iommu);
>         }
>         if (err)
>                 iommu_device_unregister(iommu);
> @@ -452,24 +453,23 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
>         struct group_device *gdev;
>         int ret;
>
> -       if (!ops)
> -               return -ENODEV;
>         /*
>          * Allow __iommu_probe_device() to be safely called in parallel,
>          * both dev->iommu_group and the initial setup of dev->iommu are
>          * protected this way.
>          */
> -       device_lock(dev);
> +       device_lock_assert(dev);
> +
> +       if (!ops)
> +               return -ENODEV;
>
>         /* Device is probed already if in a group */
> -       if (dev->iommu_group) {
> -               ret = 0;
> -               goto out_unlock;
> -       }
> +       if (dev->iommu_group)
> +               return 0;
>
>         ret = iommu_init_device(dev, ops);
>         if (ret)
> -               goto out_unlock;
> +               return ret;
>
>         group = dev->iommu_group;
>         gdev = iommu_group_alloc_device(group, dev);
> @@ -505,7 +505,6 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
>                         list_add_tail(&group->entry, group_list);
>         }
>         mutex_unlock(&group->mutex);
> -       device_unlock(dev);
>
>         if (dev_is_pci(dev))
>                 iommu_dma_set_pci_32bit_workaround(dev);
> @@ -519,12 +518,10 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
>         iommu_deinit_device(dev);
>         mutex_unlock(&group->mutex);
>         iommu_group_put(group);
> -out_unlock:
> -       device_unlock(dev);
>         return ret;
>  }
>
> -int iommu_probe_device(struct device *dev)
> +int iommu_probe_device_locked(struct device *dev)
>  {
>         const struct iommu_ops *ops;
>         int ret;
> @@ -540,6 +537,16 @@ int iommu_probe_device(struct device *dev)
>         return 0;
>  }
>
> +int iommu_probe_device(struct device *dev)
> +{
> +       int ret;
> +
> +       device_lock(dev);
> +       ret = iommu_probe_device_locked(dev);
> +       device_unlock(dev);
> +       return ret;
> +}
> +
>  static void __iommu_group_free_device(struct iommu_group *group,
>                                       struct group_device *grp_dev)
>  {
> @@ -1784,12 +1791,26 @@ struct iommu_domain *iommu_group_default_domain(struct iommu_group *group)
>         return group->default_domain;
>  }
>
> +struct probe_iommu_args {
> +       struct list_head *group_list;
> +       struct iommu_device *iommu;
> +};
> +
>  static int probe_iommu_group(struct device *dev, void *data)
>  {
> -       struct list_head *group_list = data;
> +       struct probe_iommu_args *args = data;
> +       bool need_lock;
>         int ret;
>
> -       ret = __iommu_probe_device(dev, group_list);
> +       /* Probing the iommu itself is always done under the device_lock */
> +       need_lock = !args->iommu || args->iommu->hwdev != dev;
> +
> +       if (need_lock)
> +               device_lock(dev);
> +       ret = __iommu_probe_device(dev, args->group_list);
> +       if (need_lock)
> +               device_unlock(dev);
> +
>         if (ret == -ENODEV)
>                 ret = 0;
>
> @@ -1858,13 +1879,16 @@ static void iommu_group_do_probe_finalize(struct device *dev)
>                 ops->probe_finalize(dev);
>  }
>
> -int bus_iommu_probe(const struct bus_type *bus)
> +int bus_iommu_probe(const struct bus_type *bus, struct iommu_device *iommu)
>  {
>         struct iommu_group *group, *next;
> +       struct probe_iommu_args args = {};
>         LIST_HEAD(group_list);
>         int ret;
>
> -       ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group);
> +       args.group_list = &group_list;
> +       args.iommu = iommu;
> +       ret = bus_for_each_dev(bus, NULL, &args, probe_iommu_group);
>         if (ret)
>                 return ret;
>
> diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
> index 157b286e36bf3a..b5b7d4bd2cefb9 100644
> --- a/drivers/iommu/of_iommu.c
> +++ b/drivers/iommu/of_iommu.c
> @@ -160,7 +160,7 @@ const struct iommu_ops *of_iommu_configure(struct device *dev,
>          * probe for dev, replay it to get things in order.
>          */
>         if (!err && dev->bus)
> -               err = iommu_probe_device(dev);
> +               err = iommu_probe_device_locked(dev);
>
>         /* Ignore all other errors apart from EPROBE_DEFER */
>         if (err == -EPROBE_DEFER) {
> diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
> index 97c45f50bf4332..828679abef7503 100644
> --- a/drivers/iommu/omap-iommu.c
> +++ b/drivers/iommu/omap-iommu.c
> @@ -1234,6 +1234,10 @@ static int omap_iommu_probe(struct platform_device *pdev)
>                 if (err)
>                         goto out_sysfs;
>                 obj->has_iommu_driver = true;
> +       } else {
> +               /* Re-probe bus to probe device attached to this IOMMU */
> +               obj->iommu.hwdev = &pdev->dev;
> +               bus_iommu_probe(&platform_bus_type, &obj->iommu);
>         }
>
>         pm_runtime_enable(obj->dev);
> @@ -1242,9 +1246,6 @@ static int omap_iommu_probe(struct platform_device *pdev)
>
>         dev_info(&pdev->dev, "%s registered\n", obj->name);
>
> -       /* Re-probe bus to probe device attached to this IOMMU */
> -       bus_iommu_probe(&platform_bus_type);
> -
>         return 0;
>
>  out_sysfs:
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index f1e18e81fca78b..96782bfb384462 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -361,6 +361,7 @@ struct iommu_domain_ops {
>   * @list: Used by the iommu-core to keep a list of registered iommus
>   * @ops: iommu-ops for talking to this iommu
>   * @dev: struct device for sysfs handling
> + * @hwdev: The device HW that controls the iommu
>   * @singleton_group: Used internally for drivers that have only one group
>   * @max_pasids: number of supported PASIDs
>   */
> @@ -369,6 +370,7 @@ struct iommu_device {
>         const struct iommu_ops *ops;
>         struct fwnode_handle *fwnode;
>         struct device *dev;
> +       struct device *hwdev;
>         struct iommu_group *singleton_group;
>         u32 max_pasids;
>  };
> @@ -465,7 +467,8 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
>         return dev->iommu->iommu_dev->ops;
>  }
>
> -extern int bus_iommu_probe(const struct bus_type *bus);
> +extern int bus_iommu_probe(const struct bus_type *bus,
> +                          struct iommu_device *iommu);
>  extern bool iommu_present(const struct bus_type *bus);
>  extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap);
>  extern bool iommu_group_has_isolated_msi(struct iommu_group *group);
> @@ -709,6 +712,7 @@ static inline void dev_iommu_priv_set(struct device *dev, void *priv)
>  }
>
>  int iommu_probe_device(struct device *dev);
> +int iommu_probe_device_locked(struct device *dev);
>
>  int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f);
>  int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f);
>
>



More information about the linux-arm-kernel mailing list