[PATCH v3] iommu/arm-smmu-v3: Do not use GFP_KERNEL under as spinlock

Michael Shavit mshavit at google.com
Wed Feb 21 18:37:56 PST 2024


On Thu, Feb 22, 2024 at 8:27 AM Jason Gunthorpe <jgg at nvidia.com> wrote:
>
> If the SMMU is configured to use a two level CD table then
> arm_smmu_write_ctx_desc() allocates a CD table leaf internally using
> GFP_KERNEL. Due to recent changes this is being done under a spinlock to
> iterate over the device list - thus it will trigger a sleeping while
> atomic warning:
>
>   arm_smmu_sva_set_dev_pasid()
>     mutex_lock(&sva_lock);
>     __arm_smmu_sva_bind()
>      arm_smmu_mmu_notifier_get()
>       spin_lock_irqsave()
>       arm_smmu_write_ctx_desc()
>         arm_smmu_get_cd_ptr()
>          arm_smmu_alloc_cd_leaf_table()
>           dmam_alloc_coherent(GFP_KERNEL)
>
> This is a 64K high order allocation and really should not be done
> atomically.
>
> At the moment the rework of the SVA to follow the new API is half
> finished. Recently the CD table memory was moved from the domain to the
> master, however we have the confusing situation where the SVA code is
> wrongly using the RID domains device's list to track which CD tables the
> SVA is installed in.
>
> Remove the logic to replicate the CD across all the domain's masters
> during attach. We know which master and which CD table the PASID should be
> installed in.
>
> Right now SVA only works when dma-iommu.c is in control of the RID
> translation, which means we have a single iommu_domain shared across the
> entire group and that iommu_domain is not shared outside the group.
>
> Critically this means that the iommu_group->devices list and RID's
> smmu_domain->devices list describe the same set of masters.
>
> For PCI cases the core code also insists on singleton groups so there is
> only one entry in the smmu_domain->devices list that is equal to the
> master being passed in to arm_smmu_sva_set_dev_pasid().
>
> Only non-PCI cases may have multi-device groups. However, the core code
> will repeat the calls to arm_smmu_sva_set_dev_pasid() across the entire
> iommu_group->devices list.
>
> Instead of having arm_smmu_mmu_notifier_get() indirectly loop over all the
> devices in the group via the RID's smmu_domain, rely on
> __arm_smmu_sva_bind() to be called for each device in the group and
> install the repeated CD entry that way.
>
> This avoids taking the spinlock to access the devices list and permits the
> arm_smmu_write_ctx_desc() to use a sleeping allocation. Leave the
> arm_smmu_mm_release() as a confusing situation, this requires tracking
> attached masters inside the SVA domain.
>
> Removing the loop allows arm_smmu_write_ctx_desc() to be called outside
> the spinlock and thus is safe to use GFP_KERNEL.
>
> Move the clearing of the CD into arm_smmu_sva_remove_dev_pasid() so that
> arm_smmu_mmu_notifier_get/put() remain paired functions.
>
> Fixes: 24503148c545 ("iommu/arm-smmu-v3: Refactor write_ctx_desc")
> Reported-by: Dan Carpenter <dan.carpenter at linaro.org>
> Closes: https://lore.kernel.org/all/4e25d161-0cf8-4050-9aa3-dfa21cd63e56@moroto.mountain/
> Signed-off-by: Jason Gunthorpe <jgg at nvidia.com>
> ---
>  .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   | 38 ++++++-------------
>  1 file changed, 12 insertions(+), 26 deletions(-)
>
> I suggest taking these two patches as well which will further help this code:
>
> https://lore.kernel.org/linux-iommu/12-v5-cd1be8dd9c71+3fa-smmuv3_newapi_p1_jgg@nvidia.com/
> https://lore.kernel.org/linux-iommu/2-v4-e7091cdd9e8d+43b1-smmuv3_newapi_p2_jgg@nvidia.com/
>
> Michael should check it, and Nicolin or Shameer should do a quick sanity check
> on their SVA HW.
>
> v3:
>   - Pull arm_smmu_write_ctx_desc() up into __arm_smmu_sva_bind() and
>     arm_smmu_sva_remove_dev_pasid() to avoid the early exit in
>     arm_smmu_mmu_notifier_get()
> v2: https://lore.kernel.org/all/0-v2-7988c2682634+261-smmu_cd_atomic_jgg@nvidia.com/
>
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> index 05722121f00e70..4a27fbdb2d8446 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> @@ -292,10 +292,8 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain,
>                           struct mm_struct *mm)
>  {
>         int ret;
> -       unsigned long flags;
>         struct arm_smmu_ctx_desc *cd;
>         struct arm_smmu_mmu_notifier *smmu_mn;
> -       struct arm_smmu_master *master;
>
>         list_for_each_entry(smmu_mn, &smmu_domain->mmu_notifiers, list) {
>                 if (smmu_mn->mn.mm == mm) {
> @@ -325,28 +323,9 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain,
>                 goto err_free_cd;
>         }
>
> -       spin_lock_irqsave(&smmu_domain->devices_lock, flags);
> -       list_for_each_entry(master, &smmu_domain->devices, domain_head) {
> -               ret = arm_smmu_write_ctx_desc(master, mm_get_enqcmd_pasid(mm),
> -                                             cd);
> -               if (ret) {
> -                       list_for_each_entry_from_reverse(
> -                               master, &smmu_domain->devices, domain_head)
> -                               arm_smmu_write_ctx_desc(
> -                                       master, mm_get_enqcmd_pasid(mm), NULL);
> -                       break;
> -               }
> -       }
> -       spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
> -       if (ret)
> -               goto err_put_notifier;
> -
>         list_add(&smmu_mn->list, &smmu_domain->mmu_notifiers);
>         return smmu_mn;
>
> -err_put_notifier:
> -       /* Frees smmu_mn */
> -       mmu_notifier_put(&smmu_mn->mn);
>  err_free_cd:
>         arm_smmu_free_shared_cd(cd);
>         return ERR_PTR(ret);
> @@ -363,9 +342,6 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
>
>         list_del(&smmu_mn->list);
>
> -       arm_smmu_update_ctx_desc_devices(smmu_domain, mm_get_enqcmd_pasid(mm),
> -                                        NULL);
> -
>         /*
>          * If we went through clear(), we've already invalidated, and no
>          * new TLB entry can have been formed.
> @@ -381,7 +357,8 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
>         arm_smmu_free_shared_cd(cd);
>  }
>
> -static int __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
> +static int __arm_smmu_sva_bind(struct device *dev, ioasid_t pasid,
> +                              struct mm_struct *mm)
>  {
>         int ret;
>         struct arm_smmu_bond *bond;
> @@ -404,9 +381,15 @@ static int __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
>                 goto err_free_bond;
>         }
>
> +       ret = arm_smmu_write_ctx_desc(master, pasid, bond->smmu_mn->cd);
> +       if (ret)
> +               goto err_put_notifier;
> +
>         list_add(&bond->list, &master->bonds);
>         return 0;
>
> +err_put_notifier:
> +       arm_smmu_mmu_notifier_put(bond->smmu_mn);
>  err_free_bond:
>         kfree(bond);
>         return ret;
> @@ -568,6 +551,9 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain,
>         struct arm_smmu_master *master = dev_iommu_priv_get(dev);
>
>         mutex_lock(&sva_lock);
> +
> +       arm_smmu_write_ctx_desc(master, id, NULL);
> +
>         list_for_each_entry(t, &master->bonds, list) {
>                 if (t->mm == mm) {
>                         bond = t;
> @@ -590,7 +576,7 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain,
>         struct mm_struct *mm = domain->mm;
>
>         mutex_lock(&sva_lock);
> -       ret = __arm_smmu_sva_bind(dev, mm);
> +       ret = __arm_smmu_sva_bind(dev, id, mm);
>         mutex_unlock(&sva_lock);
>
>         return ret;
>
> base-commit: c76c50cd425e574df5a071cd7e1805d0764aabff
> --
> 2.43.2
>
Reviewed-by: Michael Shavit <mshavit at google.com>



More information about the linux-arm-kernel mailing list