[PATCH v2 3/4] iommu/arm-smmu-v3: Add support for dirty tracking in domain alloc
Ryan Roberts
ryan.roberts at arm.com
Tue Apr 23 09:27:09 PDT 2024
On 22/02/2024 09:49, Shameer Kolothum wrote:
> From: Joao Martins <joao.m.martins at oracle.com>
>
> This provides all the infrastructure to enable dirty tracking if the
> hardware has the capability and domain alloc request for it.
>
> Please note, we still report no support for IOMMU_CAP_DIRTY_TRACKING
> as it will finally be enabled in a subsequent patch.
>
> Signed-off-by: Joao Martins <joao.m.martins at oracle.com>
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi at huawei.com>
> ---
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 95 ++++++++++++++++-----
> include/linux/io-pgtable.h | 4 +
> 2 files changed, 77 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index bd30739e3588..058bbb0dbe2e 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -43,6 +43,7 @@ MODULE_PARM_DESC(disable_msipolling,
> "Disable MSI-based polling for CMD_SYNC completion.");
>
> static struct iommu_ops arm_smmu_ops;
> +static struct iommu_dirty_ops arm_smmu_dirty_ops;
>
> enum arm_smmu_msi_index {
> EVTQ_MSI_INDEX,
> @@ -86,7 +87,8 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
>
> static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu);
> static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
> - struct arm_smmu_device *smmu);
> + struct arm_smmu_device *smmu,
> + bool enable_dirty);
> static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
> static void arm_smmu_tlb_inv_all_s2(struct arm_smmu_domain *smmu_domain);
>
> @@ -2378,7 +2380,7 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
> struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> int ret;
>
> - ret = arm_smmu_domain_finalise(smmu_domain, master->smmu);
> + ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, false);
> if (ret) {
> kfree(smmu_domain);
> return ERR_PTR(ret);
> @@ -2445,10 +2447,11 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
> }
>
> static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
> - struct arm_smmu_device *smmu)
> + struct arm_smmu_device *smmu,
> + bool enable_dirty)
> {
> int ret;
> - unsigned long ias, oas;
> + unsigned long ias;
> enum io_pgtable_fmt fmt;
> struct io_pgtable_cfg pgtbl_cfg;
> struct io_pgtable_ops *pgtbl_ops;
> @@ -2459,31 +2462,31 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
> if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
> smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
>
> + pgtbl_cfg = (struct io_pgtable_cfg) {
> + .pgsize_bitmap = smmu->pgsize_bitmap,
> + .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
> + .tlb = &arm_smmu_flush_ops,
> + .iommu_dev = smmu->dev,
> + };
> +
> switch (smmu_domain->stage) {
> case ARM_SMMU_DOMAIN_S1:
> ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
> - ias = min_t(unsigned long, ias, VA_BITS);
> - oas = smmu->ias;
> + pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
I know this isn't changed by this patch, but do we really mean VA_BITS here?
Don't we want vabits_actual? I'm guessing we are intending to limit ias to the
size the kernel is using.
> + pgtbl_cfg.oas = smmu->ias;
> + if (enable_dirty)
> + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
> fmt = ARM_64_LPAE_S1;
> break;
> case ARM_SMMU_DOMAIN_S2:
> - ias = smmu->ias;
> - oas = smmu->oas;
> + pgtbl_cfg.ias = smmu->ias;
> + pgtbl_cfg.oas = smmu->oas;
> fmt = ARM_64_LPAE_S2;
Is it worth adding a WARN_ON(enable_dirty) here?
> break;
> default:
> return -EINVAL;
> }
>
> - pgtbl_cfg = (struct io_pgtable_cfg) {
> - .pgsize_bitmap = smmu->pgsize_bitmap,
> - .ias = ias,
> - .oas = oas,
> - .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
> - .tlb = &arm_smmu_flush_ops,
> - .iommu_dev = smmu->dev,
> - };
> -
> pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
> if (!pgtbl_ops)
> return -ENOMEM;
> @@ -2491,7 +2494,8 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
> smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
> smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
> smmu_domain->domain.geometry.force_aperture = true;
> -
> + if (enable_dirty && smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
> + smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops;
> ret = arm_smmu_domain_alloc_id(smmu, smmu_domain);
> if (ret < 0) {
> free_io_pgtable_ops(pgtbl_ops);
> @@ -2811,7 +2815,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
> mutex_lock(&smmu_domain->init_mutex);
>
> if (!smmu_domain->smmu) {
> - ret = arm_smmu_domain_finalise(smmu_domain, smmu);
> + ret = arm_smmu_domain_finalise(smmu_domain, smmu, false);
> } else if (smmu_domain->smmu != smmu)
> ret = -EINVAL;
>
> @@ -2876,7 +2880,7 @@ static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain,
>
> mutex_lock(&smmu_domain->init_mutex);
> if (!smmu_domain->smmu)
> - ret = arm_smmu_domain_finalise(smmu_domain, smmu);
> + ret = arm_smmu_domain_finalise(smmu_domain, smmu, false);
> else if (smmu_domain->smmu != smmu)
> ret = -EINVAL;
> mutex_unlock(&smmu_domain->init_mutex);
> @@ -3193,7 +3197,9 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
> const struct iommu_user_data *user_data)
> {
> struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> - const u32 paging_flags = IOMMU_HWPT_ALLOC_NEST_PARENT;
> + const u32 paging_flags = IOMMU_HWPT_ALLOC_NEST_PARENT |
> + IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
> + bool enforce_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
nit: It's called enable_dirty in other places; I think that is more appropriate
here?
> struct arm_smmu_domain *smmu_domain;
> int ret;
>
> @@ -3206,6 +3212,10 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
> if (user_data)
> return ERR_PTR(-EINVAL);
>
> + if (enforce_dirty &&
> + !device_iommu_capable(dev, IOMMU_CAP_DIRTY_TRACKING))
> + return ERR_PTR(-EOPNOTSUPP);
I'm guessing the intention is that only a stage 1 will ever be marked with
IOMMU_CAP_DIRTY_TRACKING (there are a few places that assume/check we are
dealing with S1)? But is there a reason why stage 2 can't be supported as well?
> +
> smmu_domain = arm_smmu_domain_alloc();
> if (!smmu_domain)
> return ERR_PTR(-ENOMEM);
> @@ -3221,7 +3231,7 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
>
> smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
> smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops;
> - ret = arm_smmu_domain_finalise(smmu_domain, master->smmu);
> + ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, enforce_dirty);
> if (ret)
> goto err_free;
> return &smmu_domain->domain;
> @@ -3470,6 +3480,42 @@ static void arm_smmu_release_device(struct device *dev)
> kfree(master);
> }
>
> +static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain,
> + unsigned long iova, size_t size,
> + unsigned long flags,
> + struct iommu_dirty_bitmap *dirty)
> +{
> + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> + struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
> +
> + if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
> + return -EINVAL;
You've only attached the dirty_ops if it was S1 in the first place, so this
check seems overkill to me.
> +
> + if (WARN_ON_ONCE(!ops || !ops->read_and_clear_dirty))
> + return -ENODEV;
And here; could this be moved to where you attach the dirty_ops?
> +
> + return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
> +}
> +
> +static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain,
> + bool enabled)
> +{
> + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> + struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
> +
> + if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
> + return -EINVAL;
> +
> + if (WARN_ON_ONCE(!ops))
> + return -ENODEV;
Same comments for the 2 checks.
> +
> + /*
> + * Always enabled and the dirty bitmap is cleared prior to
> + * set_dirty_tracking().
> + */
> + return 0;
> +}
> +
> static struct iommu_group *arm_smmu_device_group(struct device *dev)
> {
> struct iommu_group *group;
> @@ -3612,6 +3658,11 @@ static struct iommu_ops arm_smmu_ops = {
> }
> };
>
> +static struct iommu_dirty_ops arm_smmu_dirty_ops = {
> + .read_and_clear_dirty = arm_smmu_read_and_clear_dirty,
> + .set_dirty_tracking = arm_smmu_set_dirty_tracking,
> +};
> +
> /* Probing and initialisation functions */
> static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
> struct arm_smmu_queue *q,
> diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
> index 86cf1f7ae389..8e75f944f07a 100644
> --- a/include/linux/io-pgtable.h
> +++ b/include/linux/io-pgtable.h
> @@ -85,6 +85,8 @@ struct io_pgtable_cfg {
> *
> * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability
> * attributes set in the TCR for a non-coherent page-table walker.
> + *
> + * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable.
> */
> #define IO_PGTABLE_QUIRK_ARM_NS BIT(0)
> #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1)
> @@ -92,6 +94,8 @@ struct io_pgtable_cfg {
> #define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT BIT(4)
> #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5)
> #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6)
> + #define IO_PGTABLE_QUIRK_ARM_HD BIT(7)
> +
> unsigned long quirks;
> unsigned long pgsize_bitmap;
> unsigned int ias;
More information about the linux-arm-kernel
mailing list