[PATCH v2 3/4] iommu/arm-smmu-v3: Add support for dirty tracking in domain alloc

Shameerali Kolothum Thodi shameerali.kolothum.thodi at huawei.com
Wed Apr 24 01:27:21 PDT 2024



> -----Original Message-----
> From: Ryan Roberts <ryan.roberts at arm.com>
> Sent: Tuesday, April 23, 2024 5:27 PM
> To: Shameer Kolothum <shameerali.kolothum.thodi at huawei.com>;
> iommu at lists.linux.dev; linux-arm-kernel at lists.infradead.org
> Cc: joro at 8bytes.org; jgg at nvidia.com; kevin.tian at intel.com;
> nicolinc at nvidia.com; mshavit at google.com; robin.murphy at arm.com;
> will at kernel.org; joao.m.martins at oracle.com; jiangkunkun at huawei.com;
> zhukeqian1 at huawei.com; linuxarm at huawei.com
> Subject: Re: [PATCH v2 3/4] iommu/arm-smmu-v3: Add support for dirty tracking
> in domain alloc
> 
> On 22/02/2024 09:49, Shameer Kolothum wrote:
> > From: Joao Martins <joao.m.martins at oracle.com>
> >
> > This provides all the infrastructure to enable dirty tracking if the
> > hardware has the capability and domain alloc request for it.
> >
> > Please note, we still report no support for IOMMU_CAP_DIRTY_TRACKING
> > as it will finally be enabled in a subsequent patch.
> >
> > Signed-off-by: Joao Martins <joao.m.martins at oracle.com>
> > Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi at huawei.com>
> > ---
> >  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 95 ++++++++++++++++--
> ---
> >  include/linux/io-pgtable.h                  |  4 +
> >  2 files changed, 77 insertions(+), 22 deletions(-)
> >
> > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > index bd30739e3588..058bbb0dbe2e 100644
> > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> > @@ -43,6 +43,7 @@ MODULE_PARM_DESC(disable_msipolling,
> >  	"Disable MSI-based polling for CMD_SYNC completion.");
> >
> >  static struct iommu_ops arm_smmu_ops;
> > +static struct iommu_dirty_ops arm_smmu_dirty_ops;
> >
> >  enum arm_smmu_msi_index {
> >  	EVTQ_MSI_INDEX,
> > @@ -86,7 +87,8 @@ static struct arm_smmu_option_prop
> arm_smmu_options[] = {
> >
> >  static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device
> *smmu);
> >  static int arm_smmu_domain_finalise(struct arm_smmu_domain
> *smmu_domain,
> > -				    struct arm_smmu_device *smmu);
> > +				    struct arm_smmu_device *smmu,
> > +				    bool enable_dirty);
> >  static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
> >  static void arm_smmu_tlb_inv_all_s2(struct arm_smmu_domain
> *smmu_domain);
> >
> > @@ -2378,7 +2380,7 @@ static struct iommu_domain
> *arm_smmu_domain_alloc_paging(struct device *dev)
> >  		struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> >  		int ret;
> >
> > -		ret = arm_smmu_domain_finalise(smmu_domain, master-
> >smmu);
> > +		ret = arm_smmu_domain_finalise(smmu_domain, master-
> >smmu, false);
> >  		if (ret) {
> >  			kfree(smmu_domain);
> >  			return ERR_PTR(ret);
> > @@ -2445,10 +2447,11 @@ static void arm_smmu_domain_free(struct
> iommu_domain *domain)
> >  }
> >
> >  static int arm_smmu_domain_finalise(struct arm_smmu_domain
> *smmu_domain,
> > -				    struct arm_smmu_device *smmu)
> > +				    struct arm_smmu_device *smmu,
> > +				    bool enable_dirty)
> >  {
> >  	int ret;
> > -	unsigned long ias, oas;
> > +	unsigned long ias;
> >  	enum io_pgtable_fmt fmt;
> >  	struct io_pgtable_cfg pgtbl_cfg;
> >  	struct io_pgtable_ops *pgtbl_ops;
> > @@ -2459,31 +2462,31 @@ static int arm_smmu_domain_finalise(struct
> arm_smmu_domain *smmu_domain,
> >  	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
> >  		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
> >
> > +	pgtbl_cfg = (struct io_pgtable_cfg) {
> > +		.pgsize_bitmap	= smmu->pgsize_bitmap,
> > +		.coherent_walk	= smmu->features &
> ARM_SMMU_FEAT_COHERENCY,
> > +		.tlb		= &arm_smmu_flush_ops,
> > +		.iommu_dev	= smmu->dev,
> > +	};
> > +
> >  	switch (smmu_domain->stage) {
> >  	case ARM_SMMU_DOMAIN_S1:
> >  		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
> > -		ias = min_t(unsigned long, ias, VA_BITS);
> > -		oas = smmu->ias;
> > +		pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
> 
> I know this isn't changed by this patch, but do we really mean VA_BITS here?
> Don't we want vabits_actual? I'm guessing we are intending to limit ias to the
> size the kernel is using.

I see Jason has replied to this.

> 
> > +		pgtbl_cfg.oas = smmu->ias;
> > +		if (enable_dirty)
> > +			pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
> >  		fmt = ARM_64_LPAE_S1;
> >  		break;
> >  	case ARM_SMMU_DOMAIN_S2:
> > -		ias = smmu->ias;
> > -		oas = smmu->oas;
> > +		pgtbl_cfg.ias = smmu->ias;
> > +		pgtbl_cfg.oas = smmu->oas;
> >  		fmt = ARM_64_LPAE_S2;
> 
> Is it worth adding a WARN_ON(enable_dirty) here?

Not sure it makes a difference as we don’t set the quirk flag here.

> 
> >  		break;
> >  	default:
> >  		return -EINVAL;
> >  	}
> >
> > -	pgtbl_cfg = (struct io_pgtable_cfg) {
> > -		.pgsize_bitmap	= smmu->pgsize_bitmap,
> > -		.ias		= ias,
> > -		.oas		= oas,
> > -		.coherent_walk	= smmu->features &
> ARM_SMMU_FEAT_COHERENCY,
> > -		.tlb		= &arm_smmu_flush_ops,
> > -		.iommu_dev	= smmu->dev,
> > -	};
> > -
> >  	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
> >  	if (!pgtbl_ops)
> >  		return -ENOMEM;
> > @@ -2491,7 +2494,8 @@ static int arm_smmu_domain_finalise(struct
> arm_smmu_domain *smmu_domain,
> >  	smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
> >  	smmu_domain->domain.geometry.aperture_end = (1UL <<
> pgtbl_cfg.ias) - 1;
> >  	smmu_domain->domain.geometry.force_aperture = true;
> > -
> > +	if (enable_dirty && smmu_domain->stage ==
> ARM_SMMU_DOMAIN_S1)
> > +		smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops;
> >  	ret = arm_smmu_domain_alloc_id(smmu, smmu_domain);
> >  	if (ret < 0) {
> >  		free_io_pgtable_ops(pgtbl_ops);
> > @@ -2811,7 +2815,7 @@ static int arm_smmu_attach_dev(struct
> iommu_domain *domain, struct device *dev)
> >  	mutex_lock(&smmu_domain->init_mutex);
> >
> >  	if (!smmu_domain->smmu) {
> > -		ret = arm_smmu_domain_finalise(smmu_domain, smmu);
> > +		ret = arm_smmu_domain_finalise(smmu_domain, smmu, false);
> >  	} else if (smmu_domain->smmu != smmu)
> >  		ret = -EINVAL;
> >
> > @@ -2876,7 +2880,7 @@ static int arm_smmu_s1_set_dev_pasid(struct
> iommu_domain *domain,
> >
> >  	mutex_lock(&smmu_domain->init_mutex);
> >  	if (!smmu_domain->smmu)
> > -		ret = arm_smmu_domain_finalise(smmu_domain, smmu);
> > +		ret = arm_smmu_domain_finalise(smmu_domain, smmu, false);
> >  	else if (smmu_domain->smmu != smmu)
> >  		ret = -EINVAL;
> >  	mutex_unlock(&smmu_domain->init_mutex);
> > @@ -3193,7 +3197,9 @@ arm_smmu_domain_alloc_user(struct device *dev,
> u32 flags,
> >  			   const struct iommu_user_data *user_data)
> >  {
> >  	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> > -	const u32 paging_flags = IOMMU_HWPT_ALLOC_NEST_PARENT;
> > +	const u32 paging_flags = IOMMU_HWPT_ALLOC_NEST_PARENT |
> > +				 IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
> > +	bool enforce_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
> 
> nit: It's called enable_dirty in other places; I think that is more appropriate
> here?

Ok.

> >  	struct arm_smmu_domain *smmu_domain;
> >  	int ret;
> >
> > @@ -3206,6 +3212,10 @@ arm_smmu_domain_alloc_user(struct device
> *dev, u32 flags,
> >  	if (user_data)
> >  		return ERR_PTR(-EINVAL);
> >
> > +	if (enforce_dirty &&
> > +	    !device_iommu_capable(dev, IOMMU_CAP_DIRTY_TRACKING))
> > +		return ERR_PTR(-EOPNOTSUPP);
> 
> I'm guessing the intention is that only a stage 1 will ever be marked with
> IOMMU_CAP_DIRTY_TRACKING (there are a few places that assume/check we
> are
> dealing with S1)? But is there a reason why stage 2 can't be supported as well?

We don’t have nested support yet. S2 support will be added in future.
 
> > +
> >  	smmu_domain = arm_smmu_domain_alloc();
> >  	if (!smmu_domain)
> >  		return ERR_PTR(-ENOMEM);
> > @@ -3221,7 +3231,7 @@ arm_smmu_domain_alloc_user(struct device *dev,
> u32 flags,
> >
> >  	smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
> >  	smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops;
> > -	ret = arm_smmu_domain_finalise(smmu_domain, master->smmu);
> > +	ret = arm_smmu_domain_finalise(smmu_domain, master->smmu,
> enforce_dirty);
> >  	if (ret)
> >  		goto err_free;
> >  	return &smmu_domain->domain;
> > @@ -3470,6 +3480,42 @@ static void arm_smmu_release_device(struct
> device *dev)
> >  	kfree(master);
> >  }
> >
> > +static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain,
> > +					 unsigned long iova, size_t size,
> > +					 unsigned long flags,
> > +					 struct iommu_dirty_bitmap *dirty)
> > +{
> > +	struct arm_smmu_domain *smmu_domain =
> to_smmu_domain(domain);
> > +	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
> > +
> > +	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
> > +		return -EINVAL;
> 
> You've only attached the dirty_ops if it was S1 in the first place, so this
> check seems overkill to me.
> 
> > +
> > +	if (WARN_ON_ONCE(!ops || !ops->read_and_clear_dirty))
> > +		return -ENODEV;
> 
> And here; could this be moved to where you attach the dirty_ops?

Yes. Jason has also made the same comment on this and  will be removed in
next revision.

> > +
> > +	return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
> > +}
> > +
> > +static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain,
> > +				       bool enabled)
> > +{
> > +	struct arm_smmu_domain *smmu_domain =
> to_smmu_domain(domain);
> > +	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
> > +
> > +	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
> > +		return -EINVAL;
> > +
> > +	if (WARN_ON_ONCE(!ops))
> > +		return -ENODEV;
> 
> Same comments for the 2 checks.

Sure.
 
Thanks,
Shameer


More information about the linux-arm-kernel mailing list