[PATCH v7 2/9] iommu/arm-smmu-v3: Make CD programming use arm_smmu_write_entry()
Mostafa Saleh
smostafa at google.com
Fri Apr 19 14:07:19 PDT 2024
Hi Jason,
On Tue, Apr 16, 2024 at 04:28:13PM -0300, Jason Gunthorpe wrote:
> CD table entries and STE's have the same essential programming sequence,
> just with different types.
>
> Have arm_smmu_write_ctx_desc() generate a target CD and call
> arm_smmu_write_entry() to do the programming. Due to the way the target CD
> is generated by modifying the existing CD this alone is not enough for the
> CD callers to be freed of the ordering requirements.
>
> The following patches will make the rest of the CD flow mirror the STE
> flow with precise CD contents generated in all cases.
>
> Signed-off-by: Michael Shavit <mshavit at google.com>
> Tested-by: Nicolin Chen <nicolinc at nvidia.com>
> Tested-by: Shameer Kolothum <shameerali.kolothum.thodi at huawei.com>
> Reviewed-by: Moritz Fischer <moritzf at google.com>
> Signed-off-by: Jason Gunthorpe <jgg at nvidia.com>
> ---
> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 94 ++++++++++++++++-----
> 1 file changed, 74 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index bf105e914d38b1..3983de90c2fa01 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -56,6 +56,7 @@ struct arm_smmu_entry_writer_ops {
>
> #define NUM_ENTRY_QWORDS 8
> static_assert(sizeof(struct arm_smmu_ste) == NUM_ENTRY_QWORDS * sizeof(u64));
> +static_assert(sizeof(struct arm_smmu_cd) == NUM_ENTRY_QWORDS * sizeof(u64));
>
> static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
> [EVTQ_MSI_INDEX] = {
> @@ -1231,6 +1232,67 @@ static struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
> return &l1_desc->l2ptr[idx];
> }
>
> +struct arm_smmu_cd_writer {
> + struct arm_smmu_entry_writer writer;
> + unsigned int ssid;
> +};
> +
> +static void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits)
> +{
> + used_bits[0] = cpu_to_le64(CTXDESC_CD_0_V);
> + if (!(ent[0] & cpu_to_le64(CTXDESC_CD_0_V)))
> + return;
> + memset(used_bits, 0xFF, sizeof(struct arm_smmu_cd));
> +
> + /* EPD0 means T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED */
> + if (ent[0] & cpu_to_le64(CTXDESC_CD_0_TCR_EPD0)) {
> + used_bits[0] &= ~cpu_to_le64(
> + CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 |
> + CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 |
> + CTXDESC_CD_0_TCR_SH0);
> + used_bits[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK);
> + }
> +}
> +
> +static void arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer *writer)
> +{
> + struct arm_smmu_cd_writer *cd_writer =
> + container_of(writer, struct arm_smmu_cd_writer, writer);
> +
> + arm_smmu_sync_cd(writer->master, cd_writer->ssid, true);
> +}
> +
> +static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = {
> + .sync = arm_smmu_cd_writer_sync_entry,
> + .get_used = arm_smmu_get_cd_used,
> + .v_bit = cpu_to_le64(CTXDESC_CD_0_V),
> +};
> +
> +static void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
> + struct arm_smmu_cd *cdptr,
> + const struct arm_smmu_cd *target)
> +{
> + struct arm_smmu_cd_writer cd_writer = {
> + .writer = {
> + .ops = &arm_smmu_cd_writer_ops,
> + .master = master,
> + },
> + .ssid = ssid,
> + };
> +
> + arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data);
> +}
> +
> +static void arm_smmu_clean_cd_entry(struct arm_smmu_cd *target)
> +{
> + struct arm_smmu_cd used = {};
> + int i;
> +
> + arm_smmu_get_cd_used(target->data, used.data);
> + for (i = 0; i != ARRAY_SIZE(target->data); i++)
> + target->data[i] &= used.data[i];
> +}
> +
> int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
> struct arm_smmu_ctx_desc *cd)
> {
> @@ -1247,17 +1309,20 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
> */
> u64 val;
> bool cd_live;
> - struct arm_smmu_cd *cdptr;
> + struct arm_smmu_cd target;
> + struct arm_smmu_cd *cdptr = ⌖
> + struct arm_smmu_cd *cd_table_entry;
> struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
> struct arm_smmu_device *smmu = master->smmu;
>
> if (WARN_ON(ssid >= (1 << cd_table->s1cdmax)))
> return -E2BIG;
>
> - cdptr = arm_smmu_get_cd_ptr(master, ssid);
> - if (!cdptr)
> + cd_table_entry = arm_smmu_get_cd_ptr(master, ssid);
> + if (!cd_table_entry)
> return -ENOMEM;
>
> + target = *cd_table_entry;
As this changes the logic where all CD manipulation is not on the actual
CD, I believe a comment would be helpful here.
> val = le64_to_cpu(cdptr->data[0]);
> cd_live = !!(val & CTXDESC_CD_0_V);
>
> @@ -1279,13 +1344,6 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
> cdptr->data[2] = 0;
> cdptr->data[3] = cpu_to_le64(cd->mair);
>
> - /*
> - * STE may be live, and the SMMU might read dwords of this CD in any
> - * order. Ensure that it observes valid values before reading
> - * V=1.
> - */
> - arm_smmu_sync_cd(master, ssid, true);
> -
> val = cd->tcr |
> #ifdef __BIG_ENDIAN
> CTXDESC_CD_0_ENDI |
> @@ -1299,18 +1357,14 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
> if (cd_table->stall_enabled)
> val |= CTXDESC_CD_0_S;
> }
> -
> + cdptr->data[0] = cpu_to_le64(val);
> /*
> - * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
> - * "Configuration structures and configuration invalidation completion"
> - *
> - * The size of single-copy atomic reads made by the SMMU is
> - * IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
> - * field within an aligned 64-bit span of a structure can be altered
> - * without first making the structure invalid.
> + * Since the above is updating the CD entry based on the current value
> + * without zeroing unused bits it needs fixing before being passed to
> + * the programming logic.
> */
> - WRITE_ONCE(cdptr->data[0], cpu_to_le64(val));
> - arm_smmu_sync_cd(master, ssid, true);
> + arm_smmu_clean_cd_entry(&target);
I am not sure I understand the logic here, is that only needed for entry[0]
As I see the other entries are set and not reused.
If so, I think it’d be better to make that clear, also as used_bits are always 0xff
for all cases, I believe the EPD0 logic should be integrated in populating the CD so
it is correct by construction, as this looks like a hack to me.
Thanks,
Mostafa
> + arm_smmu_write_cd_entry(master, ssid, cd_table_entry, &target);
> return 0;
> }
>
> --
> 2.43.2
>
More information about the linux-arm-kernel
mailing list