[PATCH v5 24/27] iommu/arm-smmu-v3: Bring back SVA BTM support
Michael Shavit
mshavit at google.com
Tue Mar 19 10:07:01 PDT 2024
On Tue, Mar 5, 2024 at 7:44 AM Jason Gunthorpe <jgg at nvidia.com> wrote:
>
> BTM support is a feature where the CPU TLB invalidation can be forwarded
> to the IOMMU and also invalidate the IOTLB. For this to work the CPU and
> IOMMU ASID must be the same.
>
> Retain the prior SVA design here of keeping the ASID allocator for the
> IOMMU private to SMMU and force SVA domains to set an ASID that matches
> the CPU ASID.
>
> This requires changing the ASID assigned to a S1 domain if it happens to
> be overlapping with the required CPU ASID. We hold on to the CPU ASID so
> long as the SVA iommu_domain exists, so SVA domain conflict is not
> possible.
>
> With the asid per-smmu we no longer have a problem that two per-smmu
> iommu_domain's would need to share a CPU ASID entry in the IOMMU's xarray.
>
> Use the same ASID move algorithm for the S1 domains as before with some
> streamlining around how the xarray is being used. Do not synchronize the
> ASID's if BTM mode is not supported. Just leave BTM features off
> everywhere.
>
> Audit all the places that touch cd->asid and think carefully about how the
> locking works with the change to the cd->asid by the move algorithm. Use
> xarray internal locking during xa_alloc() instead of double locking. Add a
> note that concurrent S1 invalidation doesn't fully work.
>
> Note that this is all still dead code, ARM_SMMU_FEAT_BTM is never set.
Clearly a lot of work/thought has gone into this patch, but I have to
wonder if this complexity is worth carrying forward for a feature that
was never enabled... Is moving this to part 3 or dropping it all
together an option?
>
> Tested-by: Nicolin Chen <nicolinc at nvidia.com>
> Signed-off-by: Jason Gunthorpe <jgg at nvidia.com>
> ---
> .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 133 ++++++++++++++++--
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 15 +-
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 2 +-
> 3 files changed, 129 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> index 3acd699433b7d8..0b5aeaa3a85575 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
> @@ -15,12 +15,33 @@
>
> static DEFINE_MUTEX(sva_lock);
>
> -static void __maybe_unused
> -arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain)
> +static int arm_smmu_realloc_s1_domain_asid(struct arm_smmu_device *smmu,
> + struct arm_smmu_domain *smmu_domain)
> {
> struct arm_smmu_master_domain *master_domain;
> + u32 old_asid = smmu_domain->cd.asid;
> struct arm_smmu_cd target_cd;
> unsigned long flags;
> + int ret;
> +
> + lockdep_assert_held(&smmu->asid_lock);
> +
> + /*
> + * FIXME: The unmap and invalidation path doesn't take any locks but
> + * this is not fully safe. Since updating the CD tables is not atomic
> + * there is always a hole where invalidating only one ASID of two active
> + * ASIDs during unmap will cause the IOTLB to become stale.
> + *
> + * This approach is to hopefully shift the racing CPUs to the new ASID
> + * before we start programming the HW. This increases the chance that
> + * racing IOPTE changes will pick up an invalidation for the new ASID
> + * and we achieve eventual consistency. For the brief period where the
> + * old ASID is still in the CD entries it will become incoherent.
> + */
> + ret = xa_alloc(&smmu->asid_map, &smmu_domain->cd.asid, smmu_domain,
> + XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
> + if (ret)
> + return ret;
>
> spin_lock_irqsave(&smmu_domain->devices_lock, flags);
> list_for_each_entry(master_domain, &smmu_domain->devices, devices_elm) {
> @@ -36,6 +57,10 @@ arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain)
> &target_cd);
> }
> spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
> +
> + /* Clean the ASID we are about to assign to a new translation */
> + arm_smmu_tlb_inv_asid(smmu, old_asid);
> + return 0;
> }
>
> static u64 page_size_to_cd(void)
> @@ -148,12 +173,12 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
> }
>
> if (!smmu_domain->btm_invalidation) {
> + ioasid_t asid = READ_ONCE(smmu_domain->cd.asid);
> +
> if (!size)
> - arm_smmu_tlb_inv_asid(smmu_domain->smmu,
> - smmu_domain->cd.asid);
> + arm_smmu_tlb_inv_asid(smmu_domain->smmu, asid);
> else
> - arm_smmu_tlb_inv_range_asid(start, size,
> - smmu_domain->cd.asid,
> + arm_smmu_tlb_inv_range_asid(start, size, asid,
> PAGE_SIZE, false,
> smmu_domain);
> }
> @@ -182,6 +207,8 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
> cdptr = arm_smmu_get_cd_ptr(master, master_domain->ssid);
> if (WARN_ON(!cdptr))
> continue;
> +
> + /* An SVA ASID never changes, no asid_lock required */
> arm_smmu_make_sva_cd(&target, master, NULL,
> smmu_domain->cd.asid,
> smmu_domain->btm_invalidation);
> @@ -388,6 +415,8 @@ static void arm_smmu_sva_domain_free(struct iommu_domain *domain)
> * unnecessary invalidation.
> */
> arm_smmu_domain_free_id(smmu_domain);
> + if (smmu_domain->btm_invalidation)
> + arm64_mm_context_put(domain->mm);
>
> /*
> * Actual free is defered to the SRCU callback
> @@ -401,13 +430,97 @@ static const struct iommu_domain_ops arm_smmu_sva_domain_ops = {
> .free = arm_smmu_sva_domain_free
> };
>
> +static int arm_smmu_share_asid(struct arm_smmu_device *smmu,
> + struct arm_smmu_domain *smmu_domain,
> + struct mm_struct *mm)
> +{
> + struct arm_smmu_domain *old_s1_domain;
> + int ret;
> +
> + /*
> + * Notice that BTM is never currently enabled, this is all dead code.
> + * The specification cautions:
> + *
> + * Note: Arm expects that SMMU stage 2 address spaces are generally
> + * shared with their respective PE virtual machine stage 2
> + * configuration. If broadcast invalidation is required to be avoided
> + * for a particular SMMU stage 2 address space, Arm recommends that a
> + * hypervisor configures the STE with a VMID that is not allocated for
> + * virtual machine use on the PEs
> + *
> + * However, in Linux, both KVM and SMMU think they own the VMID pool.
> + * Unfortunately the ARM design is problematic for Linux as we do not
> + * currently share the S2 table with KVM. This creates a situation where
> + * the S2 needs to have the same VMID as KVM in order to allow the guest
> + * to use BTM, however we must still invalidate the S2 directly since it
> + * is a different radix tree. What Linux would like is something like
> + * ASET for the STE to disable BTM only for the S2.
> + *
> + * Arguably in a system with BTM the driver should prefer to use a S1
> + * table in all cases execpt when explicitly asked to create a nesting
> + * parent. Then it should use the VMID of KVM to enable BTM in the
> + * guest. We cannot optimize away the resulting double invalidation of
> + * the S2 :( Or we simply ignore BTM entirely as we are doing now.
> + */
> + if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM))
> + return xa_alloc(&smmu->asid_map, &smmu_domain->cd.asid,
> + smmu_domain,
> + XA_LIMIT(1, (1 << smmu->asid_bits) - 1),
> + GFP_KERNEL);
> +
> + /* At this point the caller ensures we have a mmget() */
> + smmu_domain->cd.asid = arm64_mm_context_get(mm);
> +
> + mutex_lock(&smmu->asid_lock);
> + old_s1_domain = xa_store(&smmu->asid_map, smmu_domain->cd.asid,
> + smmu_domain, GFP_KERNEL);
> + if (xa_err(old_s1_domain)) {
> + ret = xa_err(old_s1_domain);
> + goto out_put_asid;
> + }
> +
> + /*
> + * In BTM mode the CPU ASID and the IOMMU ASID have to be the same.
> + * Unfortunately we run separate allocators for this and the IOMMU
> + * ASID can already have been assigned to a S1 domain. SVA domains
> + * always align to their CPU ASIDs. In this case we change
> + * the S1 domain's ASID, update the CD entry and flush the caches.
> + *
> + * This is a bit tricky, all the places writing to a S1 CD, reading the
> + * S1 ASID, or doing xa_erase must hold the asid_lock or xa_lock to
> + * avoid IOTLB incoherence.
> + */
> + if (old_s1_domain) {
> + if (WARN_ON(old_s1_domain->domain.type == IOMMU_DOMAIN_SVA)) {
> + ret = -EINVAL;
> + goto out_restore_s1;
> + }
> + ret = arm_smmu_realloc_s1_domain_asid(smmu, old_s1_domain);
> + if (ret)
> + goto out_restore_s1;
> + }
> +
> + smmu_domain->btm_invalidation = true;
> +
> + ret = 0;
> + goto out_unlock;
> +
> +out_restore_s1:
> + xa_store(&smmu->asid_map, smmu_domain->cd.asid, old_s1_domain,
> + GFP_KERNEL);
> +out_put_asid:
> + arm64_mm_context_put(mm);
> +out_unlock:
> + mutex_unlock(&smmu->asid_lock);
> + return ret;
> +}
> +
> struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev,
> struct mm_struct *mm)
> {
> struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> struct arm_smmu_device *smmu = master->smmu;
> struct arm_smmu_domain *smmu_domain;
> - u32 asid;
> int ret;
>
> smmu_domain = arm_smmu_domain_alloc();
> @@ -418,12 +531,10 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev,
> smmu_domain->domain.ops = &arm_smmu_sva_domain_ops;
> smmu_domain->smmu = smmu;
>
> - ret = xa_alloc(&smmu->asid_map, &asid, smmu_domain,
> - XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
> + ret = arm_smmu_share_asid(smmu, smmu_domain, mm);
> if (ret)
> goto err_free;
>
> - smmu_domain->cd.asid = asid;
> smmu_domain->mmu_notifier.ops = &arm_smmu_mmu_notifier_ops;
> ret = mmu_notifier_register(&smmu_domain->mmu_notifier, mm);
> if (ret)
> @@ -433,6 +544,8 @@ struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev,
>
> err_asid:
> arm_smmu_domain_free_id(smmu_domain);
> + if (smmu_domain->btm_invalidation)
> + arm64_mm_context_put(mm);
> err_free:
> kfree(smmu_domain);
> return ERR_PTR(ret);
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 1a72dd63e0ca14..01737a0fc82828 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -1314,6 +1314,8 @@ void arm_smmu_make_s1_cd(struct arm_smmu_cd *target,
> typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr =
> &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
>
> + lockdep_assert_held(&master->smmu->asid_lock);
> +
> memset(target, 0, sizeof(*target));
>
> target->data[0] = cpu_to_le64(
> @@ -2089,7 +2091,7 @@ static void arm_smmu_tlb_inv_context(void *cookie)
> * careful, 007.
> */
> if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
> - arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
> + arm_smmu_tlb_inv_asid(smmu, READ_ONCE(smmu_domain->cd.asid));
> } else {
> cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
> cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
> @@ -2334,17 +2336,10 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
> static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu,
> struct arm_smmu_domain *smmu_domain)
> {
> - int ret;
> - u32 asid;
> struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
>
> - /* Prevent SVA from modifying the ASID until it is written to the CD */
> - mutex_lock(&smmu->asid_lock);
> - ret = xa_alloc(&smmu->asid_map, &asid, smmu_domain,
> - XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
> - cd->asid = (u16)asid;
> - mutex_unlock(&smmu->asid_lock);
> - return ret;
> + return xa_alloc(&smmu->asid_map, &cd->asid, smmu_domain,
> + XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
> }
>
> static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu,
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index 6becdbae905598..a23593f1830106 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -586,7 +586,7 @@ struct arm_smmu_strtab_l1_desc {
> };
>
> struct arm_smmu_ctx_desc {
> - u16 asid;
> + u32 asid;
> };
>
> struct arm_smmu_l1_ctx_desc {
> --
> 2.43.2
>
More information about the linux-arm-kernel
mailing list