[PATCH v7 5/9] iommu/arm-smmu-v3: Make arm_smmu_alloc_cd_ptr()

Mostafa Saleh smostafa at google.com
Fri Apr 19 14:14:21 PDT 2024


Hi Jason,

On Tue, Apr 16, 2024 at 04:28:16PM -0300, Jason Gunthorpe wrote:
> Only the attach callers can perform an allocation for the CD table entry,
> the other callers must not do so, they do not have the correct locking and
> they cannot sleep. Split up the functions so this is clear.
> 
> arm_smmu_get_cd_ptr() will return pointer to a CD table entry without
> doing any kind of allocation.
> 
> arm_smmu_alloc_cd_ptr() will allocate the table and any required
> leaf.
> 
> A following patch will add lockdep assertions to arm_smmu_alloc_cd_ptr()
> once the restructuring is completed and arm_smmu_alloc_cd_ptr() is never
> called in the wrong context.
> 
> Signed-off-by: Jason Gunthorpe <jgg at nvidia.com>
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 61 +++++++++++++--------
>  1 file changed, 39 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index f3df1ec8d258dc..a0d1237272936f 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -98,6 +98,7 @@ static struct arm_smmu_option_prop arm_smmu_options[] = {
>  
>  static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
>  				    struct arm_smmu_device *smmu);
> +static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
>  
>  static void parse_driver_options(struct arm_smmu_device *smmu)
>  {
> @@ -1207,29 +1208,51 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst,
>  struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
>  					u32 ssid)
>  {
> -	__le64 *l1ptr;
> -	unsigned int idx;
>  	struct arm_smmu_l1_ctx_desc *l1_desc;
> -	struct arm_smmu_device *smmu = master->smmu;
>  	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
>  
> +	if (!cd_table->cdtab)
> +		return NULL;
> +
>  	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
>  		return (struct arm_smmu_cd *)(cd_table->cdtab +
>  					      ssid * CTXDESC_CD_DWORDS);
>  
> -	idx = ssid >> CTXDESC_SPLIT;
> -	l1_desc = &cd_table->l1_desc[idx];
> -	if (!l1_desc->l2ptr) {
> -		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
> -			return NULL;
> +	l1_desc = &cd_table->l1_desc[ssid / CTXDESC_L2_ENTRIES];

These operations used to be shift and bit masking which made sense as it does
what hardware does, is there any reason you changed it to division and modulo?
I checked the disassembly and gcc does the right thing as constants are power
of 2, but I am just curious.

> +	if (!l1_desc->l2ptr)
> +		return NULL;
> +	return &l1_desc->l2ptr[ssid % CTXDESC_L2_ENTRIES];
> +}
>  
> -		l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
> -		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
> -		/* An invalid L1CD can be cached */
> -		arm_smmu_sync_cd(master, ssid, false);
> +static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master,
> +						 u32 ssid)
> +{
> +	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
> +	struct arm_smmu_device *smmu = master->smmu;
> +
> +	if (!cd_table->cdtab) {
> +		if (arm_smmu_alloc_cd_tables(master))
> +			return NULL;
>  	}
> -	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
> -	return &l1_desc->l2ptr[idx];
> +
> +	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) {
> +		unsigned int idx = ssid >> CTXDESC_SPLIT;

Ok, now it’s a shift, I think we should be consistent with how we
calculate the index.

> +		struct arm_smmu_l1_ctx_desc *l1_desc;
> +
> +		l1_desc = &cd_table->l1_desc[idx];
> +		if (!l1_desc->l2ptr) {
> +			__le64 *l1ptr;
> +
> +			if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
> +				return NULL;
> +
> +			l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
> +			arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
> +			/* An invalid L1CD can be cached */
> +			arm_smmu_sync_cd(master, ssid, false);
> +		}
> +	}
> +	return arm_smmu_get_cd_ptr(master, ssid);
>  }
>  
>  struct arm_smmu_cd_writer {
> @@ -1357,7 +1380,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
>  	if (WARN_ON(ssid >= (1 << cd_table->s1cdmax)))
>  		return -E2BIG;
>  
> -	cd_table_entry = arm_smmu_get_cd_ptr(master, ssid);
> +	cd_table_entry = arm_smmu_alloc_cd_ptr(master, ssid);

The only path allocates the main table is “arm_smmu_attach_dev”, I guess
it would be more robust to leave that as is and have 2 versions of get_cd,
one that allocates leaf and one that is not allocating, what do you think?

Thanks,
Mostafa



>  	if (!cd_table_entry)
>  		return -ENOMEM;
>  
> @@ -2687,13 +2710,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
>  		struct arm_smmu_cd target_cd;
>  		struct arm_smmu_cd *cdptr;
>  
> -		if (!master->cd_table.cdtab) {
> -			ret = arm_smmu_alloc_cd_tables(master);
> -			if (ret)
> -				goto out_list_del;
> -		}
> -
> -		cdptr = arm_smmu_get_cd_ptr(master, IOMMU_NO_PASID);
> +		cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID);
>  		if (!cdptr) {
>  			ret = -ENOMEM;
>  			goto out_list_del;
> -- 
> 2.43.2
> 



More information about the linux-arm-kernel mailing list