[PATCH v5 05/27] iommu/arm-smmu-v3: Make CD programming use arm_smmu_write_entry()

Mostafa Saleh smostafa at google.com
Sat Mar 23 06:02:15 PDT 2024


Hi Jason,

On Mon, Mar 04, 2024 at 07:43:53PM -0400, Jason Gunthorpe wrote:
> CD table entries and STE's have the same essential programming sequence,
> just with different types and sizes.
> 
> Have arm_smmu_write_ctx_desc() generate a target CD and call
> arm_smmu_write_entry() to do the programming. Due to the way the
> target CD is generated by modifying the existing CD this alone is not
> enough for the CD callers to be freed of the ordering requirements.
> 
> The following patches will make the rest of the CD flow mirror the STE
> flow with precise CD contents generated in all cases.
> 
> Currently the logic can't ensure that the CD always conforms to the used
> requirements until all the CD generation is moved to the new method. Add a
> temporary no_used_check to disable the assertions.
> 

I am still going through the patches, but is it possible to
reorder/squash to avoid that, so it is easier to review?

> Signed-off-by: Michael Shavit <mshavit at google.com>
> Tested-by: Nicolin Chen <nicolinc at nvidia.com>
> Signed-off-by: Jason Gunthorpe <jgg at nvidia.com>
> ---
>  drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 101 ++++++++++++++------
>  1 file changed, 74 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index b7f947e36f596f..237fd6d92c880b 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -57,11 +57,14 @@ struct arm_smmu_entry_writer {
>  struct arm_smmu_entry_writer_ops {
>  	unsigned int num_entry_qwords;
>  	__le64 v_bit;
> +	bool no_used_check;
>  	void (*get_used)(const __le64 *entry, __le64 *used);
>  	void (*sync)(struct arm_smmu_entry_writer *writer);
>  };
>  
> -#define NUM_ENTRY_QWORDS (sizeof(struct arm_smmu_ste) / sizeof(u64))
> +#define NUM_ENTRY_QWORDS                                                \
> +	(max(sizeof(struct arm_smmu_ste), sizeof(struct arm_smmu_cd)) / \
> +	 sizeof(u64))
>  
>  static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
>  	[EVTQ_MSI_INDEX] = {
> @@ -1056,7 +1059,8 @@ static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer,
>  		 * allowed to set a bit to 1 if the used function doesn't say it
>  		 * is used.
>  		 */
> -		WARN_ON_ONCE(target[i] & ~target_used[i]);
> +		if (!writer->ops->no_used_check)
> +			WARN_ON_ONCE(target[i] & ~target_used[i]);
>  
>  		/* Bits can change because they are not currently being used */
>  		unused_update[i] = (entry[i] & cur_used[i]) |
> @@ -1065,7 +1069,8 @@ static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer,
>  		 * Each bit indicates that a used bit in a qword needs to be
>  		 * changed after unused_update is applied.
>  		 */
> -		if ((unused_update[i] & target_used[i]) != target[i])
> +		if ((unused_update[i] & target_used[i]) !=
> +		    (target[i] & target_used[i]))
>  			used_qword_diff |= 1 << i;
>  	}
>  	return used_qword_diff;
> @@ -1161,8 +1166,11 @@ static void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer,
>  		 * in the entry. The target was already sanity checked by
>  		 * compute_qword_diff().
>  		 */
> -		WARN_ON_ONCE(
> -			entry_set(writer, entry, target, 0, num_entry_qwords));
> +		if (writer->ops->no_used_check)
> +			entry_set(writer, entry, target, 0, num_entry_qwords);
> +		else
> +			WARN_ON_ONCE(entry_set(writer, entry, target, 0,
> +					       num_entry_qwords));
>  	}
>  }
>  
> @@ -1242,6 +1250,59 @@ static struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
>  	return &l1_desc->l2ptr[idx];
>  }
>  
> +struct arm_smmu_cd_writer {
> +	struct arm_smmu_entry_writer writer;
> +	unsigned int ssid;
> +};
> +
> +static void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits)
> +{
> +	used_bits[0] = cpu_to_le64(CTXDESC_CD_0_V);
> +	if (!(ent[0] & cpu_to_le64(CTXDESC_CD_0_V)))
> +		return;
> +	memset(used_bits, 0xFF, sizeof(struct arm_smmu_cd));

This is a slightly different approach than what the driver does for STEs,
where it explicitly sets the used bits. Is there a reason for that?

> +
> +	/* EPD0 means T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED */
> +	if (ent[0] & cpu_to_le64(CTXDESC_CD_0_TCR_EPD0)) {
> +		used_bits[0] &= ~cpu_to_le64(
> +			CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 |
> +			CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 |
> +			CTXDESC_CD_0_TCR_SH0);
> +		used_bits[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK);
> +	}
> +}

We should add a comment about EPD1 maybe?
> +
> +static void arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer *writer)
> +{
> +	struct arm_smmu_cd_writer *cd_writer =
> +		container_of(writer, struct arm_smmu_cd_writer, writer);
> +
> +	arm_smmu_sync_cd(writer->master, cd_writer->ssid, true);
> +}
> +
> +static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = {
> +	.sync = arm_smmu_cd_writer_sync_entry,
> +	.get_used = arm_smmu_get_cd_used,
> +	.v_bit = cpu_to_le64(CTXDESC_CD_0_V),
> +	.no_used_check = true,
> +	.num_entry_qwords = sizeof(struct arm_smmu_cd) / sizeof(u64),
> +};
> +
> +static void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
> +				    struct arm_smmu_cd *cdptr,
> +				    const struct arm_smmu_cd *target)
> +{
> +	struct arm_smmu_cd_writer cd_writer = {
> +		.writer = {
> +			.ops = &arm_smmu_cd_writer_ops,
> +			.master = master,
> +		},
> +		.ssid = ssid,
> +	};
> +
> +	arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data);
> +}
> +
>  int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
>  			    struct arm_smmu_ctx_desc *cd)
>  {
> @@ -1258,17 +1319,20 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
>  	 */
>  	u64 val;
>  	bool cd_live;
> -	struct arm_smmu_cd *cdptr;
> +	struct arm_smmu_cd target;
> +	struct arm_smmu_cd *cdptr = ⌖
> +	struct arm_smmu_cd *cd_table_entry;
>  	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
>  	struct arm_smmu_device *smmu = master->smmu;
>  
>  	if (WARN_ON(ssid >= (1 << cd_table->s1cdmax)))
>  		return -E2BIG;
>  
> -	cdptr = arm_smmu_get_cd_ptr(master, ssid);
> -	if (!cdptr)
> +	cd_table_entry = arm_smmu_get_cd_ptr(master, ssid);
> +	if (!cd_table_entry)
>  		return -ENOMEM;
>  
> +	target = *cd_table_entry;
>  	val = le64_to_cpu(cdptr->data[0]);
>  	cd_live = !!(val & CTXDESC_CD_0_V);
>  
> @@ -1290,13 +1354,6 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
>  		cdptr->data[2] = 0;
>  		cdptr->data[3] = cpu_to_le64(cd->mair);
>  
> -		/*
> -		 * STE may be live, and the SMMU might read dwords of this CD in any
> -		 * order. Ensure that it observes valid values before reading
> -		 * V=1.
> -		 */
> -		arm_smmu_sync_cd(master, ssid, true);
> -
>  		val = cd->tcr |
>  #ifdef __BIG_ENDIAN
>  			CTXDESC_CD_0_ENDI |
> @@ -1310,18 +1367,8 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
>  		if (cd_table->stall_enabled)
>  			val |= CTXDESC_CD_0_S;
>	}
> -
> -	/*
> -	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
> -	 * "Configuration structures and configuration invalidation completion"
> -	 *
> -	 *   The size of single-copy atomic reads made by the SMMU is
> -	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
> -	 *   field within an aligned 64-bit span of a structure can be altered
> -	 *   without first making the structure invalid.
> -	 */
> -	WRITE_ONCE(cdptr->data[0], cpu_to_le64(val));
> -	arm_smmu_sync_cd(master, ssid, true);
> +	cdptr->data[0] = cpu_to_le64(val);
> +	arm_smmu_write_cd_entry(master, ssid, cd_table_entry, &target);
>  	return 0;
>  }
>  
> -- 
> 2.43.2 

Thanks,
Mostafa



More information about the linux-arm-kernel mailing list