[PATCH v3 8/9] iommu/arm-smmu-v3: Support IOMMU_DOMAIN_NESTED
Mostafa Saleh
smostafa at google.com
Wed Oct 30 09:29:24 PDT 2024
Hi Jason,
On Wed, Oct 09, 2024 at 01:23:14PM -0300, Jason Gunthorpe wrote:
> For SMMUv3 a IOMMU_DOMAIN_NESTED is composed of a S2 iommu_domain acting
> as the parent and a user provided STE fragment that defines the CD table
> and related data with addresses translated by the S2 iommu_domain.
>
> The kernel only permits userspace to control certain allowed bits of the
> STE that are safe for user/guest control.
>
> IOTLB maintenance is a bit subtle here, the S1 implicitly includes the S2
> translation, but there is no way of knowing which S1 entries refer to a
> range of S2.
>
> For the IOTLB we follow ARM's guidance and issue a CMDQ_OP_TLBI_NH_ALL to
> flush all ASIDs from the VMID after flushing the S2 on any change to the
> S2.
>
> Signed-off-by: Jason Gunthorpe <jgg at nvidia.com>
> ---
> .../arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 172 ++++++++++++++++++
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 25 ++-
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 37 ++++
> include/uapi/linux/iommufd.h | 20 ++
> 4 files changed, 250 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
> index 3d2671031c9bb5..a9aa7514e65ce4 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c
> @@ -29,3 +29,175 @@ void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type)
>
> return info;
> }
> +
> +static void arm_smmu_make_nested_cd_table_ste(
> + struct arm_smmu_ste *target, struct arm_smmu_master *master,
> + struct arm_smmu_nested_domain *nested_domain, bool ats_enabled)
> +{
> + arm_smmu_make_s2_domain_ste(target, master, nested_domain->s2_parent,
> + ats_enabled);
> +
> + target->data[0] = cpu_to_le64(STRTAB_STE_0_V |
> + FIELD_PREP(STRTAB_STE_0_CFG,
> + STRTAB_STE_0_CFG_NESTED));
> + target->data[0] |= nested_domain->ste[0] &
> + ~cpu_to_le64(STRTAB_STE_0_CFG);
> + target->data[1] |= nested_domain->ste[1];
> +}
> +
> +/*
> + * Create a physical STE from the virtual STE that userspace provided when it
> + * created the nested domain. Using the vSTE userspace can request:
> + * - Non-valid STE
> + * - Abort STE
> + * - Bypass STE (install the S2, no CD table)
> + * - CD table STE (install the S2 and the userspace CD table)
> + */
> +static void arm_smmu_make_nested_domain_ste(
> + struct arm_smmu_ste *target, struct arm_smmu_master *master,
> + struct arm_smmu_nested_domain *nested_domain, bool ats_enabled)
> +{
> + unsigned int cfg =
> + FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0]));
> +
> + /*
> + * Userspace can request a non-valid STE through the nesting interface.
> + * We relay that into an abort physical STE with the intention that
> + * C_BAD_STE for this SID can be generated to userspace.
> + */
> + if (!(nested_domain->ste[0] & cpu_to_le64(STRTAB_STE_0_V)))
> + cfg = STRTAB_STE_0_CFG_ABORT;
> +
> + switch (cfg) {
> + case STRTAB_STE_0_CFG_S1_TRANS:
> + arm_smmu_make_nested_cd_table_ste(target, master, nested_domain,
> + ats_enabled);
> + break;
> + case STRTAB_STE_0_CFG_BYPASS:
> + arm_smmu_make_s2_domain_ste(
> + target, master, nested_domain->s2_parent, ats_enabled);
> + break;
> + case STRTAB_STE_0_CFG_ABORT:
> + default:
> + arm_smmu_make_abort_ste(target);
> + break;
> + }
> +}
> +
> +static int arm_smmu_attach_dev_nested(struct iommu_domain *domain,
> + struct device *dev)
> +{
> + struct arm_smmu_nested_domain *nested_domain =
> + to_smmu_nested_domain(domain);
> + struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> + struct arm_smmu_attach_state state = {
> + .master = master,
> + .old_domain = iommu_get_domain_for_dev(dev),
> + .ssid = IOMMU_NO_PASID,
> + /* Currently invalidation of ATC is not supported */
> + .disable_ats = true,
> + };
> + struct arm_smmu_ste ste;
> + int ret;
> +
> + if (nested_domain->s2_parent->smmu != master->smmu)
> + return -EINVAL;
> + if (arm_smmu_ssids_in_use(&master->cd_table))
> + return -EBUSY;
> +
> + mutex_lock(&arm_smmu_asid_lock);
> + ret = arm_smmu_attach_prepare(&state, domain);
> + if (ret) {
> + mutex_unlock(&arm_smmu_asid_lock);
> + return ret;
> + }
> +
> + arm_smmu_make_nested_domain_ste(&ste, master, nested_domain,
> + state.ats_enabled);
> + arm_smmu_install_ste_for_dev(master, &ste);
> + arm_smmu_attach_commit(&state);
> + mutex_unlock(&arm_smmu_asid_lock);
> + return 0;
> +}
> +
> +static void arm_smmu_domain_nested_free(struct iommu_domain *domain)
> +{
> + kfree(to_smmu_nested_domain(domain));
> +}
> +
> +static const struct iommu_domain_ops arm_smmu_nested_ops = {
> + .attach_dev = arm_smmu_attach_dev_nested,
> + .free = arm_smmu_domain_nested_free,
> +};
> +
> +static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg)
> +{
> + unsigned int cfg;
> +
> + if (!(arg->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) {
> + memset(arg->ste, 0, sizeof(arg->ste));
> + return 0;
> + }
> +
> + /* EIO is reserved for invalid STE data. */
> + if ((arg->ste[0] & ~STRTAB_STE_0_NESTING_ALLOWED) ||
> + (arg->ste[1] & ~STRTAB_STE_1_NESTING_ALLOWED))
> + return -EIO;
> +
> + cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(arg->ste[0]));
> + if (cfg != STRTAB_STE_0_CFG_ABORT && cfg != STRTAB_STE_0_CFG_BYPASS &&
> + cfg != STRTAB_STE_0_CFG_S1_TRANS)
> + return -EIO;
> + return 0;
> +}
> +
> +struct iommu_domain *
> +arm_smmu_domain_alloc_nesting(struct device *dev, u32 flags,
> + struct iommu_domain *parent,
> + const struct iommu_user_data *user_data)
> +{
> + struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> + struct arm_smmu_nested_domain *nested_domain;
> + struct arm_smmu_domain *smmu_parent;
> + struct iommu_hwpt_arm_smmuv3 arg;
> + int ret;
> +
> + if (flags || !(master->smmu->features & ARM_SMMU_FEAT_NESTING))
> + return ERR_PTR(-EOPNOTSUPP);
> +
> + /*
> + * Must support some way to prevent the VM from bypassing the cache
> + * because VFIO currently does not do any cache maintenance.
> + */
> + if (!arm_smmu_master_canwbs(master))
> + return ERR_PTR(-EOPNOTSUPP);
> +
> + /*
> + * The core code checks that parent was created with
> + * IOMMU_HWPT_ALLOC_NEST_PARENT
> + */
> + smmu_parent = to_smmu_domain(parent);
> + if (smmu_parent->smmu != master->smmu)
> + return ERR_PTR(-EINVAL);
> +
> + ret = iommu_copy_struct_from_user(&arg, user_data,
> + IOMMU_HWPT_DATA_ARM_SMMUV3, ste);
> + if (ret)
> + return ERR_PTR(ret);
> +
> + ret = arm_smmu_validate_vste(&arg);
> + if (ret)
> + return ERR_PTR(ret);
> +
> + nested_domain = kzalloc(sizeof(*nested_domain), GFP_KERNEL_ACCOUNT);
> + if (!nested_domain)
> + return ERR_PTR(-ENOMEM);
> +
> + nested_domain->domain.type = IOMMU_DOMAIN_NESTED;
> + nested_domain->domain.ops = &arm_smmu_nested_ops;
> + nested_domain->s2_parent = smmu_parent;
> + nested_domain->ste[0] = arg.ste[0];
> + nested_domain->ste[1] = arg.ste[1] & ~cpu_to_le64(STRTAB_STE_1_EATS);
> +
> + return &nested_domain->domain;
> +}
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index b4b03206afbf48..eb401a4adfedc8 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -295,6 +295,7 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
> case CMDQ_OP_TLBI_NH_ASID:
> cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
> fallthrough;
> + case CMDQ_OP_TLBI_NH_ALL:
> case CMDQ_OP_TLBI_S12_VMALL:
> cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
> break;
> @@ -2230,6 +2231,15 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
> }
> __arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
>
> + if (smmu_domain->nest_parent) {
Do we need a sync between the 2 invalidations to order them?
> + /*
> + * When the S2 domain changes all the nested S1 ASIDs have to be
> + * flushed too.
> + */
> + cmd.opcode = CMDQ_OP_TLBI_NH_ALL;
> + arm_smmu_cmdq_issue_cmd_with_sync(smmu_domain->smmu, &cmd);
> + }
> +
> /*
> * Unfortunately, this can't be leaf-only since we may have
> * zapped an entire table.
> @@ -2614,8 +2624,7 @@ static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
>
> static struct arm_smmu_master_domain *
> arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain,
> - struct arm_smmu_master *master,
> - ioasid_t ssid)
> + struct arm_smmu_master *master, ioasid_t ssid)
> {
> struct arm_smmu_master_domain *master_domain;
>
> @@ -2644,6 +2653,8 @@ to_smmu_domain_devices(struct iommu_domain *domain)
> if ((domain->type & __IOMMU_DOMAIN_PAGING) ||
> domain->type == IOMMU_DOMAIN_SVA)
> return to_smmu_domain(domain);
> + if (domain->type == IOMMU_DOMAIN_NESTED)
> + return to_smmu_nested_domain(domain)->s2_parent;
> return NULL;
> }
>
> @@ -2716,7 +2727,8 @@ int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
> * enabled if we have arm_smmu_domain, those always have page
> * tables.
> */
> - state->ats_enabled = arm_smmu_ats_supported(master);
> + state->ats_enabled = !state->disable_ats &&
> + arm_smmu_ats_supported(master);
> }
>
> if (smmu_domain) {
> @@ -3107,9 +3119,13 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
> struct arm_smmu_domain *smmu_domain;
> int ret;
>
> + if (parent)
> + return arm_smmu_domain_alloc_nesting(dev, flags, parent,
> + user_data);
> +
> if (flags & ~PAGING_FLAGS)
> return ERR_PTR(-EOPNOTSUPP);
> - if (parent || user_data)
> + if (user_data)
> return ERR_PTR(-EOPNOTSUPP);
>
> smmu_domain = arm_smmu_domain_alloc();
> @@ -3122,6 +3138,7 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
> goto err_free;
> }
> smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
> + smmu_domain->nest_parent = true;
> }
>
> smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> index c9e5290e995a64..b5dbf5acbfc4db 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
> @@ -243,6 +243,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid)
> #define STRTAB_STE_0_CFG_BYPASS 4
> #define STRTAB_STE_0_CFG_S1_TRANS 5
> #define STRTAB_STE_0_CFG_S2_TRANS 6
> +#define STRTAB_STE_0_CFG_NESTED 7
>
> #define STRTAB_STE_0_S1FMT GENMASK_ULL(5, 4)
> #define STRTAB_STE_0_S1FMT_LINEAR 0
> @@ -294,6 +295,15 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid)
>
> #define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4)
>
> +/* These bits can be controlled by userspace for STRTAB_STE_0_CFG_NESTED */
> +#define STRTAB_STE_0_NESTING_ALLOWED \
> + cpu_to_le64(STRTAB_STE_0_V | STRTAB_STE_0_CFG | STRTAB_STE_0_S1FMT | \
> + STRTAB_STE_0_S1CTXPTR_MASK | STRTAB_STE_0_S1CDMAX)
> +#define STRTAB_STE_1_NESTING_ALLOWED \
> + cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR | \
> + STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH | \
> + STRTAB_STE_1_S1STALLD)
> +
> /*
> * Context descriptors.
> *
> @@ -513,6 +523,7 @@ struct arm_smmu_cmdq_ent {
> };
> } cfgi;
>
> + #define CMDQ_OP_TLBI_NH_ALL 0x10
> #define CMDQ_OP_TLBI_NH_ASID 0x11
> #define CMDQ_OP_TLBI_NH_VA 0x12
> #define CMDQ_OP_TLBI_EL2_ALL 0x20
> @@ -814,10 +825,18 @@ struct arm_smmu_domain {
> struct list_head devices;
> spinlock_t devices_lock;
> bool enforce_cache_coherency : 1;
> + bool nest_parent : 1;
>
> struct mmu_notifier mmu_notifier;
> };
>
> +struct arm_smmu_nested_domain {
> + struct iommu_domain domain;
> + struct arm_smmu_domain *s2_parent;
> +
> + __le64 ste[2];
> +};
> +
> /* The following are exposed for testing purposes. */
> struct arm_smmu_entry_writer_ops;
> struct arm_smmu_entry_writer {
> @@ -862,6 +881,12 @@ static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
> return container_of(dom, struct arm_smmu_domain, domain);
> }
>
> +static inline struct arm_smmu_nested_domain *
> +to_smmu_nested_domain(struct iommu_domain *dom)
> +{
> + return container_of(dom, struct arm_smmu_nested_domain, domain);
> +}
> +
> extern struct xarray arm_smmu_asid_xa;
> extern struct mutex arm_smmu_asid_lock;
>
> @@ -908,6 +933,7 @@ struct arm_smmu_attach_state {
> struct iommu_domain *old_domain;
> struct arm_smmu_master *master;
> bool cd_needs_ats;
> + bool disable_ats;
> ioasid_t ssid;
> /* Resulting state */
> bool ats_enabled;
> @@ -978,8 +1004,19 @@ tegra241_cmdqv_probe(struct arm_smmu_device *smmu)
>
> #if IS_ENABLED(CONFIG_ARM_SMMU_V3_IOMMUFD)
> void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type);
> +struct iommu_domain *
> +arm_smmu_domain_alloc_nesting(struct device *dev, u32 flags,
> + struct iommu_domain *parent,
> + const struct iommu_user_data *user_data);
> #else
> #define arm_smmu_hw_info NULL
> +static inline struct iommu_domain *
> +arm_smmu_domain_alloc_nesting(struct device *dev, u32 flags,
> + struct iommu_domain *parent,
> + const struct iommu_user_data *user_data)
> +{
> + return ERR_PTR(-EOPNOTSUPP);
> +}
> #endif /* CONFIG_ARM_SMMU_V3_IOMMUFD */
>
> #endif /* _ARM_SMMU_V3_H */
> diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
> index b5c94fecb94ca5..cd4920886ad05e 100644
> --- a/include/uapi/linux/iommufd.h
> +++ b/include/uapi/linux/iommufd.h
> @@ -394,14 +394,34 @@ struct iommu_hwpt_vtd_s1 {
> __u32 __reserved;
> };
>
> +/**
> + * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 Context Descriptor Table info
> + * (IOMMU_HWPT_DATA_ARM_SMMUV3)
> + *
That’s supposed to be stream table?
Thanks,
Mostafa
> + * @ste: The first two double words of the user space Stream Table Entry for
> + * a user stage-1 Context Descriptor Table. Must be little-endian.
> + * Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec)
> + * - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax
> + * - word-1: S1DSS, S1CIR, S1COR, S1CSH, S1STALLD
> + *
> + * -EIO will be returned if @ste is not legal or contains any non-allowed field.
> + * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass
> + * nested domain will translate the same as the nesting parent.
> + */
> +struct iommu_hwpt_arm_smmuv3 {
> + __aligned_le64 ste[2];
> +};
> +
> /**
> * enum iommu_hwpt_data_type - IOMMU HWPT Data Type
> * @IOMMU_HWPT_DATA_NONE: no data
> * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table
> + * @IOMMU_HWPT_DATA_ARM_SMMUV3: ARM SMMUv3 Context Descriptor Table
> */
> enum iommu_hwpt_data_type {
> IOMMU_HWPT_DATA_NONE = 0,
> IOMMU_HWPT_DATA_VTD_S1 = 1,
> + IOMMU_HWPT_DATA_ARM_SMMUV3 = 2,
> };
>
> /**
> --
> 2.46.2
>
More information about the linux-arm-kernel
mailing list