[PATCH v2 3/4] iommu/arm-smmu-v3: Add support for dirty tracking in domain alloc

Ryan Roberts ryan.roberts at arm.com
Tue Apr 23 09:27:09 PDT 2024


On 22/02/2024 09:49, Shameer Kolothum wrote:
> From: Joao Martins <joao.m.martins at oracle.com>
> 
> This provides all the infrastructure to enable dirty tracking if the
> hardware has the capability and domain alloc request for it.
> 
> Please note, we still report no support for IOMMU_CAP_DIRTY_TRACKING
> as it will finally be enabled in a subsequent patch.
> 
> Signed-off-by: Joao Martins <joao.m.martins at oracle.com>
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi at huawei.com>
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 95 ++++++++++++++++-----
>  include/linux/io-pgtable.h                  |  4 +
>  2 files changed, 77 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index bd30739e3588..058bbb0dbe2e 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -43,6 +43,7 @@ MODULE_PARM_DESC(disable_msipolling,
>  	"Disable MSI-based polling for CMD_SYNC completion.");
>  
>  static struct iommu_ops arm_smmu_ops;
> +static struct iommu_dirty_ops arm_smmu_dirty_ops;
>  
>  enum arm_smmu_msi_index {
>  	EVTQ_MSI_INDEX,
> @@ -86,7 +87,8 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
>  
>  static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu);
>  static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
> -				    struct arm_smmu_device *smmu);
> +				    struct arm_smmu_device *smmu,
> +				    bool enable_dirty);
>  static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
>  static void arm_smmu_tlb_inv_all_s2(struct arm_smmu_domain *smmu_domain);
>  
> @@ -2378,7 +2380,7 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev)
>  		struct arm_smmu_master *master = dev_iommu_priv_get(dev);
>  		int ret;
>  
> -		ret = arm_smmu_domain_finalise(smmu_domain, master->smmu);
> +		ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, false);
>  		if (ret) {
>  			kfree(smmu_domain);
>  			return ERR_PTR(ret);
> @@ -2445,10 +2447,11 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
>  }
>  
>  static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
> -				    struct arm_smmu_device *smmu)
> +				    struct arm_smmu_device *smmu,
> +				    bool enable_dirty)
>  {
>  	int ret;
> -	unsigned long ias, oas;
> +	unsigned long ias;
>  	enum io_pgtable_fmt fmt;
>  	struct io_pgtable_cfg pgtbl_cfg;
>  	struct io_pgtable_ops *pgtbl_ops;
> @@ -2459,31 +2462,31 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
>  	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
>  		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
>  
> +	pgtbl_cfg = (struct io_pgtable_cfg) {
> +		.pgsize_bitmap	= smmu->pgsize_bitmap,
> +		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
> +		.tlb		= &arm_smmu_flush_ops,
> +		.iommu_dev	= smmu->dev,
> +	};
> +
>  	switch (smmu_domain->stage) {
>  	case ARM_SMMU_DOMAIN_S1:
>  		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
> -		ias = min_t(unsigned long, ias, VA_BITS);
> -		oas = smmu->ias;
> +		pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);

I know this isn't changed by this patch, but do we really mean VA_BITS here?
Don't we want vabits_actual? I'm guessing we are intending to limit ias to the
size the kernel is using.

> +		pgtbl_cfg.oas = smmu->ias;
> +		if (enable_dirty)
> +			pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
>  		fmt = ARM_64_LPAE_S1;
>  		break;
>  	case ARM_SMMU_DOMAIN_S2:
> -		ias = smmu->ias;
> -		oas = smmu->oas;
> +		pgtbl_cfg.ias = smmu->ias;
> +		pgtbl_cfg.oas = smmu->oas;
>  		fmt = ARM_64_LPAE_S2;

Is it worth adding a WARN_ON(enable_dirty) here?

>  		break;
>  	default:
>  		return -EINVAL;
>  	}
>  
> -	pgtbl_cfg = (struct io_pgtable_cfg) {
> -		.pgsize_bitmap	= smmu->pgsize_bitmap,
> -		.ias		= ias,
> -		.oas		= oas,
> -		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
> -		.tlb		= &arm_smmu_flush_ops,
> -		.iommu_dev	= smmu->dev,
> -	};
> -
>  	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
>  	if (!pgtbl_ops)
>  		return -ENOMEM;
> @@ -2491,7 +2494,8 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
>  	smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
>  	smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
>  	smmu_domain->domain.geometry.force_aperture = true;
> -
> +	if (enable_dirty && smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
> +		smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops;
>  	ret = arm_smmu_domain_alloc_id(smmu, smmu_domain);
>  	if (ret < 0) {
>  		free_io_pgtable_ops(pgtbl_ops);
> @@ -2811,7 +2815,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
>  	mutex_lock(&smmu_domain->init_mutex);
>  
>  	if (!smmu_domain->smmu) {
> -		ret = arm_smmu_domain_finalise(smmu_domain, smmu);
> +		ret = arm_smmu_domain_finalise(smmu_domain, smmu, false);
>  	} else if (smmu_domain->smmu != smmu)
>  		ret = -EINVAL;
>  
> @@ -2876,7 +2880,7 @@ static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain,
>  
>  	mutex_lock(&smmu_domain->init_mutex);
>  	if (!smmu_domain->smmu)
> -		ret = arm_smmu_domain_finalise(smmu_domain, smmu);
> +		ret = arm_smmu_domain_finalise(smmu_domain, smmu, false);
>  	else if (smmu_domain->smmu != smmu)
>  		ret = -EINVAL;
>  	mutex_unlock(&smmu_domain->init_mutex);
> @@ -3193,7 +3197,9 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
>  			   const struct iommu_user_data *user_data)
>  {
>  	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> -	const u32 paging_flags = IOMMU_HWPT_ALLOC_NEST_PARENT;
> +	const u32 paging_flags = IOMMU_HWPT_ALLOC_NEST_PARENT |
> +				 IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
> +	bool enforce_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;

nit: It's called enable_dirty in other places; I think that is more appropriate
here?

>  	struct arm_smmu_domain *smmu_domain;
>  	int ret;
>  
> @@ -3206,6 +3212,10 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
>  	if (user_data)
>  		return ERR_PTR(-EINVAL);
>  
> +	if (enforce_dirty &&
> +	    !device_iommu_capable(dev, IOMMU_CAP_DIRTY_TRACKING))
> +		return ERR_PTR(-EOPNOTSUPP);

I'm guessing the intention is that only a stage 1 will ever be marked with
IOMMU_CAP_DIRTY_TRACKING (there are a few places that assume/check we are
dealing with S1)? But is there a reason why stage 2 can't be supported as well?

> +
>  	smmu_domain = arm_smmu_domain_alloc();
>  	if (!smmu_domain)
>  		return ERR_PTR(-ENOMEM);
> @@ -3221,7 +3231,7 @@ arm_smmu_domain_alloc_user(struct device *dev, u32 flags,
>  
>  	smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
>  	smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops;
> -	ret = arm_smmu_domain_finalise(smmu_domain, master->smmu);
> +	ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, enforce_dirty);
>  	if (ret)
>  		goto err_free;
>  	return &smmu_domain->domain;
> @@ -3470,6 +3480,42 @@ static void arm_smmu_release_device(struct device *dev)
>  	kfree(master);
>  }
>  
> +static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain,
> +					 unsigned long iova, size_t size,
> +					 unsigned long flags,
> +					 struct iommu_dirty_bitmap *dirty)
> +{
> +	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> +	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
> +
> +	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
> +		return -EINVAL;

You've only attached the dirty_ops if it was S1 in the first place, so this
check seems overkill to me.

> +
> +	if (WARN_ON_ONCE(!ops || !ops->read_and_clear_dirty))
> +		return -ENODEV;

And here; could this be moved to where you attach the dirty_ops?

> +
> +	return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
> +}
> +
> +static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain,
> +				       bool enabled)
> +{
> +	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
> +	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
> +
> +	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
> +		return -EINVAL;
> +
> +	if (WARN_ON_ONCE(!ops))
> +		return -ENODEV;

Same comments for the 2 checks.

> +
> +	/*
> +	 * Always enabled and the dirty bitmap is cleared prior to
> +	 * set_dirty_tracking().
> +	 */
> +	return 0;
> +}
> +
>  static struct iommu_group *arm_smmu_device_group(struct device *dev)
>  {
>  	struct iommu_group *group;
> @@ -3612,6 +3658,11 @@ static struct iommu_ops arm_smmu_ops = {
>  	}
>  };
>  
> +static struct iommu_dirty_ops arm_smmu_dirty_ops = {
> +	.read_and_clear_dirty	= arm_smmu_read_and_clear_dirty,
> +	.set_dirty_tracking     = arm_smmu_set_dirty_tracking,
> +};
> +
>  /* Probing and initialisation functions */
>  static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
>  				   struct arm_smmu_queue *q,
> diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
> index 86cf1f7ae389..8e75f944f07a 100644
> --- a/include/linux/io-pgtable.h
> +++ b/include/linux/io-pgtable.h
> @@ -85,6 +85,8 @@ struct io_pgtable_cfg {
>  	 *
>  	 * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability
>  	 *	attributes set in the TCR for a non-coherent page-table walker.
> +	 *
> +	 * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable.
>  	 */
>  	#define IO_PGTABLE_QUIRK_ARM_NS			BIT(0)
>  	#define IO_PGTABLE_QUIRK_NO_PERMS		BIT(1)
> @@ -92,6 +94,8 @@ struct io_pgtable_cfg {
>  	#define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT	BIT(4)
>  	#define IO_PGTABLE_QUIRK_ARM_TTBR1		BIT(5)
>  	#define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA		BIT(6)
> +	#define IO_PGTABLE_QUIRK_ARM_HD			BIT(7)
> +
>  	unsigned long			quirks;
>  	unsigned long			pgsize_bitmap;
>  	unsigned int			ias;




More information about the linux-arm-kernel mailing list