[PATCH v8 04/14] iommu/arm-smmu-v3: Make changing domains be hitless for ATS
Michael Shavit
mshavit at google.com
Wed Jun 19 03:20:56 PDT 2024
On Tue, Jun 4, 2024 at 8:16 AM Jason Gunthorpe <jgg at nvidia.com> wrote:
>
> The core code allows the domain to be changed on the fly without a forced
> stop in BLOCKED/IDENTITY. In this flow the driver should just continually
> maintain the ATS with no change while the STE is updated.
>
> ATS relies on a linked list smmu_domain->devices to keep track of which
> masters have the domain programmed, but this list is also used by
> arm_smmu_share_asid(), unrelated to ats.
>
> Create two new functions to encapsulate this combined logic:
> arm_smmu_attach_prepare()
> <caller generates and sets the STE>
> arm_smmu_attach_commit()
>
> The two functions can sequence both enabling ATS and disabling across
> the STE store. Have every update of the STE use this sequence.
>
> Installing a S1/S2 domain always enables the ATS if the PCIe device
> supports it.
>
> The enable flow is now ordered differently to allow it to be hitless:
>
> 1) Add the master to the new smmu_domain->devices list
> 2) Program the STE
> 3) Enable ATS at PCIe
> 4) Remove the master from the old smmu_domain
>
> This flow ensures that invalidations to either domain will generate an ATC
> invalidation to the device while the STE is being switched. Thus we don't
> need to turn off the ATS anymore for correctness.
>
> The disable flow is the reverse:
> 1) Disable ATS at PCIe
> 2) Program the STE
> 3) Invalidate the ATC
> 4) Remove the master from the old smmu_domain
>
> Move the nr_ats_masters adjustments to be close to the list
> manipulations. It is a count of the number of ATS enabled masters
> currently in the list. This is stricly before and after the STE/CD are
> revised, and done under the list's spin_lock.
>
> This is part of the bigger picture to allow changing the RID domain while
> a PASID is in use. If a SVA PASID is relying on ATS to function then
> changing the RID domain cannot just temporarily toggle ATS off without
> also wrecking the SVA PASID. The new infrastructure here is organized so
> that the PASID attach/detach flows will make use of it as well in
> following patches.
>
> Tested-by: Nicolin Chen <nicolinc at nvidia.com>
> Tested-by: Shameer Kolothum <shameerali.kolothum.thodi at huawei.com>
> Signed-off-by: Jason Gunthorpe <jgg at nvidia.com>
> ---
> .../iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c | 5 +-
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 238 +++++++++++++-----
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 6 +-
> 3 files changed, 178 insertions(+), 71 deletions(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c
> index 315e487fd990eb..a460b71f585789 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c
> @@ -164,7 +164,7 @@ static void arm_smmu_test_make_cdtable_ste(struct arm_smmu_ste *ste,
> .smmu = &smmu,
> };
>
> - arm_smmu_make_cdtable_ste(ste, &master);
> + arm_smmu_make_cdtable_ste(ste, &master, true);
> }
>
> static void arm_smmu_v3_write_ste_test_bypass_to_abort(struct kunit *test)
> @@ -231,7 +231,6 @@ static void arm_smmu_test_make_s2_ste(struct arm_smmu_ste *ste,
> {
> struct arm_smmu_master master = {
> .smmu = &smmu,
> - .ats_enabled = ats_enabled,
> };
> struct io_pgtable io_pgtable = {};
> struct arm_smmu_domain smmu_domain = {
> @@ -247,7 +246,7 @@ static void arm_smmu_test_make_s2_ste(struct arm_smmu_ste *ste,
> io_pgtable.cfg.arm_lpae_s2_cfg.vtcr.sl = 3;
> io_pgtable.cfg.arm_lpae_s2_cfg.vtcr.tsz = 4;
>
> - arm_smmu_make_s2_domain_ste(ste, &master, &smmu_domain);
> + arm_smmu_make_s2_domain_ste(ste, &master, &smmu_domain, ats_enabled);
> }
>
> static void arm_smmu_v3_write_ste_test_s2_to_abort(struct kunit *test)
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 532fe17f28bfe5..24f42ff39f77a9 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -1538,7 +1538,7 @@ EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_bypass_ste);
>
> VISIBLE_IF_KUNIT
> void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
> - struct arm_smmu_master *master)
> + struct arm_smmu_master *master, bool ats_enabled)
> {
> struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
> struct arm_smmu_device *smmu = master->smmu;
> @@ -1561,7 +1561,7 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
> STRTAB_STE_1_S1STALLD :
> 0) |
> FIELD_PREP(STRTAB_STE_1_EATS,
> - master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
> + ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
>
> if (smmu->features & ARM_SMMU_FEAT_E2H) {
> /*
> @@ -1591,7 +1591,8 @@ EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste);
> VISIBLE_IF_KUNIT
> void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
> struct arm_smmu_master *master,
> - struct arm_smmu_domain *smmu_domain)
> + struct arm_smmu_domain *smmu_domain,
> + bool ats_enabled)
> {
> struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg;
> const struct io_pgtable_cfg *pgtbl_cfg =
> @@ -1608,7 +1609,7 @@ void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
>
> target->data[1] = cpu_to_le64(
> FIELD_PREP(STRTAB_STE_1_EATS,
> - master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
> + ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
>
> if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
> target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
> @@ -2450,22 +2451,16 @@ static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
> return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
> }
>
> -static void arm_smmu_enable_ats(struct arm_smmu_master *master,
> - struct arm_smmu_domain *smmu_domain)
> +static void arm_smmu_enable_ats(struct arm_smmu_master *master)
> {
> size_t stu;
> struct pci_dev *pdev;
> struct arm_smmu_device *smmu = master->smmu;
>
> - /* Don't enable ATS at the endpoint if it's not enabled in the STE */
> - if (!master->ats_enabled)
> - return;
> -
> /* Smallest Translation Unit: log2 of the smallest supported granule */
> stu = __ffs(smmu->pgsize_bitmap);
> pdev = to_pci_dev(master->dev);
>
> - atomic_inc(&smmu_domain->nr_ats_masters);
> /*
> * ATC invalidation of PASID 0 causes the entire ATC to be flushed.
> */
> @@ -2474,22 +2469,6 @@ static void arm_smmu_enable_ats(struct arm_smmu_master *master,
> dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
> }
>
> -static void arm_smmu_disable_ats(struct arm_smmu_master *master,
> - struct arm_smmu_domain *smmu_domain)
> -{
> - if (!master->ats_enabled)
> - return;
> -
> - pci_disable_ats(to_pci_dev(master->dev));
> - /*
> - * Ensure ATS is disabled at the endpoint before we issue the
> - * ATC invalidation via the SMMU.
> - */
> - wmb();
> - arm_smmu_atc_inv_master(master);
> - atomic_dec(&smmu_domain->nr_ats_masters);
> -}
> -
> static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
> {
> int ret;
> @@ -2553,46 +2532,182 @@ arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain,
> return NULL;
> }
>
> -static void arm_smmu_detach_dev(struct arm_smmu_master *master)
> +/*
> + * If the domain uses the smmu_domain->devices list return the arm_smmu_domain
> + * structure, otherwise NULL. These domains track attached devices so they can
> + * issue invalidations.
> + */
> +static struct arm_smmu_domain *
> +to_smmu_domain_devices(struct iommu_domain *domain)
> {
> - struct iommu_domain *domain = iommu_get_domain_for_dev(master->dev);
> + /* The domain can be NULL only when processing the first attach */
> + if (!domain)
> + return NULL;
> + if (domain->type & __IOMMU_DOMAIN_PAGING)
> + return to_smmu_domain(domain);
> + return NULL;
> +}
> +
> +static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,
> + struct iommu_domain *domain)
> +{
> + struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(domain);
> struct arm_smmu_master_domain *master_domain;
> - struct arm_smmu_domain *smmu_domain;
> unsigned long flags;
>
> - if (!domain || !(domain->type & __IOMMU_DOMAIN_PAGING))
> + if (!smmu_domain)
> return;
>
> - smmu_domain = to_smmu_domain(domain);
> - arm_smmu_disable_ats(master, smmu_domain);
> -
> spin_lock_irqsave(&smmu_domain->devices_lock, flags);
> master_domain = arm_smmu_find_master_domain(smmu_domain, master);
> if (master_domain) {
> list_del(&master_domain->devices_elm);
> kfree(master_domain);
> + if (master->ats_enabled)
> + atomic_dec(&smmu_domain->nr_ats_masters);
> }
> spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
> +}
>
> - master->ats_enabled = false;
> +struct arm_smmu_attach_state {
> + /* Inputs */
> + struct iommu_domain *old_domain;
> + struct arm_smmu_master *master;
> + /* Resulting state */
> + bool ats_enabled;
> +};
> +
> +/*
> + * Start the sequence to attach a domain to a master. The sequence contains three
> + * steps:
> + * arm_smmu_attach_prepare()
> + * arm_smmu_install_ste_for_dev()
> + * arm_smmu_attach_commit()
> + *
> + * If prepare succeeds then the sequence must be completed. The STE installed
> + * must set the STE.EATS field according to state.ats_enabled.
> + *
> + * ATS is automatically enabled if the underlying device supports it.
> + * disable_ats can inhibit this to support STEs like bypass that don't allow
> + * ATS.
This comment is out of date since disable_ats was removed between v7 and v8.
A nit, but "automatically" is also a little imprecise IMO (almost
sounds like the device is automatically enabling it). How about:
+ * ATS is enabled after the STE is installed if the new domain and
underlying device
+ * supports it. On the other hand, ATS is disabled before installing
the STE if it doesn't
+ * support ATS like bypass domains.
Or something else if that's too redundant with the next paragraph :) .
> + *
> + * The change of the EATS in the STE and the PCI ATS config space is managed by
> + * this sequence to be in the right order such that if PCI ATS is enabled then
> + * STE.ETAS is enabled.
> + *
> + * new_domain can be NULL if the domain being attached does not have a page
> + * table and does not require invalidation tracking, and does not support ATS.
> + */
This is also confusing, new_domain is never NULL. It's
to_smmu_domain_devices(new_domain) that can be null.
> +static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
> + struct iommu_domain *new_domain)
> +{
> + struct arm_smmu_master *master = state->master;
> + struct arm_smmu_master_domain *master_domain;
> + struct arm_smmu_domain *smmu_domain =
> + to_smmu_domain_devices(new_domain);
> + unsigned long flags;
> +
> + /*
> + * arm_smmu_share_asid() must not see two domains pointing to the same
> + * arm_smmu_master_domain contents otherwise it could randomly write one
> + * or the other to the CD.
> + */
> + lockdep_assert_held(&arm_smmu_asid_lock);
> +
> + if (smmu_domain) {
> + /*
> + * The SMMU does not support enabling ATS with bypass/abort.
> + * When the STE is in bypass (STE.Config[2:0] == 0b100), ATS
> + * Translation Requests and Translated transactions are denied
> + * as though ATS is disabled for the stream (STE.EATS == 0b00),
> + * causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events
> + * (IHI0070Ea 5.2 Stream Table Entry). Thus ATS can only be
> + * enabled if we have arm_smmu_domain, those always have page
> + * tables.
> + */
> + state->ats_enabled = arm_smmu_ats_supported(master);
> +
> + master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL);
> + if (!master_domain)
> + return -ENOMEM;
> + master_domain->master = master;
> +
> + /*
> + * During prepare we want the current smmu_domain and new
> + * smmu_domain to be in the devices list before we change any
> + * HW. This ensures that both domains will send ATS
> + * invalidations to the master until we are done.
> + *
> + * It is tempting to make this list only track masters that are
> + * using ATS, but arm_smmu_share_asid() also uses this to change
> + * the ASID of a domain, unrelated to ATS.
> + *
> + * Notice if we are re-attaching the same domain then the list
> + * will have two identical entries and commit will remove only
> + * one of them.
> + */
> + spin_lock_irqsave(&smmu_domain->devices_lock, flags);
> + if (state->ats_enabled)
> + atomic_inc(&smmu_domain->nr_ats_masters);
> + list_add(&master_domain->devices_elm, &smmu_domain->devices);
> + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
> + }
> +
> + if (!state->ats_enabled && master->ats_enabled) {
> + pci_disable_ats(to_pci_dev(master->dev));
> + /*
> + * This is probably overkill, but the config write for disabling
> + * ATS should complete before the STE is configured to generate
> + * UR to avoid AER noise.
> + */
> + wmb();
> + }
> + return 0;
> +}
> +
> +/*
> + * Commit is done after the STE/CD are configured with the EATS setting. It
> + * completes synchronizing the PCI device's ATC and finishes manipulating the
> + * smmu_domain->devices list.
> + */
> +static void arm_smmu_attach_commit(struct arm_smmu_attach_state *state)
> +{
> + struct arm_smmu_master *master = state->master;
> +
> + lockdep_assert_held(&arm_smmu_asid_lock);
> +
> + if (state->ats_enabled && !master->ats_enabled) {
> + arm_smmu_enable_ats(master);
> + } else if (master->ats_enabled) {
> + /*
> + * The translation has changed, flush the ATC. At this point the
> + * SMMU is translating for the new domain and both the old&new
> + * domain will issue invalidations.
> + */
> + arm_smmu_atc_inv_master(master);
> + }
> + master->ats_enabled = state->ats_enabled;
> +
> + arm_smmu_remove_master_domain(master, state->old_domain);
> }
>
> static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
> {
> int ret = 0;
> - unsigned long flags;
> struct arm_smmu_ste target;
> struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
> struct arm_smmu_device *smmu;
> struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> - struct arm_smmu_master_domain *master_domain;
> + struct arm_smmu_attach_state state = {
> + .old_domain = iommu_get_domain_for_dev(dev),
> + };
> struct arm_smmu_master *master;
> struct arm_smmu_cd *cdptr;
>
> if (!fwspec)
> return -ENOENT;
>
> - master = dev_iommu_priv_get(dev);
> + state.master = master = dev_iommu_priv_get(dev);
> smmu = master->smmu;
>
> /*
> @@ -2622,11 +2737,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
> return -ENOMEM;
> }
>
> - master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL);
> - if (!master_domain)
> - return -ENOMEM;
> - master_domain->master = master;
> -
> /*
> * Prevent arm_smmu_share_asid() from trying to change the ASID
> * of either the old or new domain while we are working on it.
> @@ -2635,13 +2745,11 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
> */
> mutex_lock(&arm_smmu_asid_lock);
>
> - arm_smmu_detach_dev(master);
> -
> - master->ats_enabled = arm_smmu_ats_supported(master);
> -
> - spin_lock_irqsave(&smmu_domain->devices_lock, flags);
> - list_add(&master_domain->devices_elm, &smmu_domain->devices);
> - spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
> + ret = arm_smmu_attach_prepare(&state, domain);
> + if (ret) {
> + mutex_unlock(&arm_smmu_asid_lock);
> + return ret;
> + }
>
> switch (smmu_domain->stage) {
> case ARM_SMMU_DOMAIN_S1: {
> @@ -2650,18 +2758,19 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
> arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
> arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr,
> &target_cd);
> - arm_smmu_make_cdtable_ste(&target, master);
> + arm_smmu_make_cdtable_ste(&target, master, state.ats_enabled);
> arm_smmu_install_ste_for_dev(master, &target);
> break;
> }
> case ARM_SMMU_DOMAIN_S2:
> - arm_smmu_make_s2_domain_ste(&target, master, smmu_domain);
> + arm_smmu_make_s2_domain_ste(&target, master, smmu_domain,
> + state.ats_enabled);
> arm_smmu_install_ste_for_dev(master, &target);
> arm_smmu_clear_cd(master, IOMMU_NO_PASID);
> break;
> }
>
> - arm_smmu_enable_ats(master, smmu_domain);
> + arm_smmu_attach_commit(&state);
> mutex_unlock(&arm_smmu_asid_lock);
> return 0;
> }
> @@ -2690,10 +2799,14 @@ void arm_smmu_remove_pasid(struct arm_smmu_master *master,
> arm_smmu_clear_cd(master, pasid);
> }
>
> -static int arm_smmu_attach_dev_ste(struct device *dev,
> - struct arm_smmu_ste *ste)
> +static int arm_smmu_attach_dev_ste(struct iommu_domain *domain,
> + struct device *dev, struct arm_smmu_ste *ste)
> {
> struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> + struct arm_smmu_attach_state state = {
> + .master = master,
> + .old_domain = iommu_get_domain_for_dev(dev),
> + };
>
> if (arm_smmu_master_sva_enabled(master))
> return -EBUSY;
> @@ -2704,16 +2817,9 @@ static int arm_smmu_attach_dev_ste(struct device *dev,
> */
> mutex_lock(&arm_smmu_asid_lock);
>
> - /*
> - * The SMMU does not support enabling ATS with bypass/abort. When the
> - * STE is in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests
> - * and Translated transactions are denied as though ATS is disabled for
> - * the stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and
> - * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry).
> - */
> - arm_smmu_detach_dev(master);
> -
> + arm_smmu_attach_prepare(&state, domain);
> arm_smmu_install_ste_for_dev(master, ste);
> + arm_smmu_attach_commit(&state);
> mutex_unlock(&arm_smmu_asid_lock);
>
> /*
> @@ -2732,7 +2838,7 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
> struct arm_smmu_master *master = dev_iommu_priv_get(dev);
>
> arm_smmu_make_bypass_ste(master->smmu, &ste);
> - return arm_smmu_attach_dev_ste(dev, &ste);
> + return arm_smmu_attach_dev_ste(domain, dev, &ste);
> }
>
> static const struct iommu_domain_ops arm_smmu_identity_ops = {
> @@ -2750,7 +2856,7 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
> struct arm_smmu_ste ste;
>
> arm_smmu_make_abort_ste(&ste);
> - return arm_smmu_attach_dev_ste(dev, &ste);
> + return arm_smmu_attach_dev_ste(domain, dev, &ste);
> }
>
> static const struct iommu_domain_ops arm_smmu_blocked_ops = {
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index 01769b5286a83a..f9b4bfb2e6b723 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -758,10 +758,12 @@ void arm_smmu_make_abort_ste(struct arm_smmu_ste *target);
> void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
> struct arm_smmu_ste *target);
> void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
> - struct arm_smmu_master *master);
> + struct arm_smmu_master *master,
> + bool ats_enabled);
> void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
> struct arm_smmu_master *master,
> - struct arm_smmu_domain *smmu_domain);
> + struct arm_smmu_domain *smmu_domain,
> + bool ats_enabled);
> void arm_smmu_make_sva_cd(struct arm_smmu_cd *target,
> struct arm_smmu_master *master, struct mm_struct *mm,
> u16 asid);
> --
> 2.45.2
>
Reviewed-by: Michael Shavit <mshavit at google.com>
More information about the linux-arm-kernel
mailing list