[PATCH v4 24/24] iommu/arm-smmu-v3: Block ATS upon an ATC invalidation timeout
Nicolin Chen
nicolinc at nvidia.com
Mon May 18 20:39:07 PDT 2026
Currently, when GERROR_CMDQ_ERR occurs, the arm_smmu_cmdq_skip_err() won't
do anything for the CMDQ_ERR_CERROR_ATC_INV_IDX.
When a device wasn't responsive to an ATC invalidation request, this often
results in constant CMDQ errors:
unexpected global error reported (0x00000001), this could be serious
CMDQ error (cons 0x0302bb84): ATC invalidate timeout
unexpected global error reported (0x00000001), this could be serious
CMDQ error (cons 0x0302bb88): ATC invalidate timeout
unexpected global error reported (0x00000001), this could be serious
CMDQ error (cons 0x0302bb8c): ATC invalidate timeout
...
An ATC invalidation timeout indicates that the device failed to respond to
a protocol-critical coherency request, which means that device's internal
ATS state is desynchronized from the SMMU.
Furthermore, ignoring the timeout leaves the system in an unsafe state, as
the device cache may retain stale ATC entries for memory pages that the OS
has already reclaimed and reassigned. This might lead to data corruption.
Isolate the device that is confirmed to be unresponsive by a surgical STE
update to unset its EATS bit so as to reject any further ATS transaction,
which could corrupt the memory.
Also, set the master->ats_broken flag that is revertible after the device
completes a reset. This flag avoids further ATS requests and invalidations
from happening.
Finally, report this broken device to the IOMMU core to isolate the device
in the core level too.
Since the three steps above are invoked in an invalidation path (which can
be an atomic context), hold the ats_broken_lock instead of any mutex.
For batched ATC_INV commands, SMMU hardware only reports a timeout at the
CMD_SYNC, which could follow the batch issued for multiple devices. So, it
isn't straightforward to identify which command in a batch resulted in the
timeout. Fortunately, the invs array has a sorted list of ATC entries. So,
the issued batch must be sorted as well. This makes it possible to retry
the ATC_INV command for each unique Stream ID in the batch to identify the
unresponsive master.
Signed-off-by: Nicolin Chen <nicolinc at nvidia.com>
---
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 18 +++
drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 118 +++++++++++++++++++-
2 files changed, 133 insertions(+), 3 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index e3eb4c4a62d3a..43d4a35500500 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -831,6 +831,24 @@ arm_smmu_invs_iter_next(struct arm_smmu_invs *invs, size_t next, size_t *idx)
for (cur = arm_smmu_invs_iter_next(invs, 0, &(idx)); cur; \
cur = arm_smmu_invs_iter_next(invs, idx + 1, &(idx)))
+static inline struct arm_smmu_master *
+arm_smmu_invs_find_ats_master(struct arm_smmu_invs *invs,
+ struct arm_smmu_device *smmu, u32 sid)
+{
+ struct arm_smmu_inv *cur;
+ size_t i;
+
+ if (!invs->has_ats)
+ return NULL;
+
+ arm_smmu_invs_for_each_entry(invs, i, cur) {
+ if (cur->smmu == smmu && arm_smmu_inv_is_ats(cur) &&
+ cur->id == sid)
+ return cur->master;
+ }
+ return NULL;
+}
+
static inline struct arm_smmu_invs *arm_smmu_invs_alloc(size_t num_invs)
{
struct arm_smmu_invs *new_invs;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index ee864046f0baa..0323fd3f33b7f 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -107,8 +107,13 @@ static const char * const event_class_str[] = {
[3] = "Reserved",
};
+static struct arm_smmu_ste *
+arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid);
static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
static bool arm_smmu_ats_supported(struct arm_smmu_master *master);
+static void arm_smmu_cmdq_batch_retry(struct arm_smmu_device *smmu,
+ struct arm_smmu_invs *invs,
+ struct arm_smmu_cmdq_batch *cmds);
static void parse_driver_options(struct arm_smmu_device *smmu)
{
@@ -905,8 +910,13 @@ static int arm_smmu_cmdq_batch_issue(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds,
bool sync)
{
- return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
- cmds->num, sync);
+ int ret = arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
+ cmds->num, sync);
+
+ /* Identify the timed-out master via cmds->invs */
+ if (ret == -EIO && cmds->invs)
+ arm_smmu_cmdq_batch_retry(smmu, cmds->invs, cmds);
+ return ret;
}
static void arm_smmu_cmdq_batch_add_cmd_p(struct arm_smmu_device *smmu,
@@ -924,7 +934,11 @@ static void arm_smmu_cmdq_batch_add_cmd_p(struct arm_smmu_device *smmu,
}
if (cmds->num == CMDQ_BATCH_ENTRIES) {
- arm_smmu_cmdq_batch_issue(smmu, cmds, false);
+ /*
+ * Force sync for ATS-bearing batches so the timeout is caught
+ * here, not at a later unrelated batch's CMD_SYNC.
+ */
+ arm_smmu_cmdq_batch_issue(smmu, cmds, cmds->has_ats);
arm_smmu_cmdq_batch_init_cmd(smmu, cmds, cmd, cmds->invs);
}
@@ -945,6 +959,104 @@ static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
return arm_smmu_cmdq_batch_issue(smmu, cmds, true);
}
+static void arm_smmu_master_disable_ats(struct arm_smmu_master *master)
+{
+ struct arm_smmu_cmd cmd = arm_smmu_make_cmd_op(CMDQ_OP_CFGI_STE);
+ struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_cmdq_batch cmds;
+ struct arm_smmu_inv *cur;
+ size_t i;
+
+ lockdep_assert_held(&master->ats_broken_lock);
+
+ /* Disable STE.EATS on every SID */
+ arm_smmu_cmdq_batch_init_cmd(smmu, &cmds, &cmd, NULL);
+ arm_smmu_invs_for_each_entry(master->ats_invs, i, cur) {
+ struct arm_smmu_ste *step =
+ arm_smmu_get_step_for_sid(smmu, cur->id);
+
+ /* EATS is safe to update. See arm_smmu_get_ste_update_safe() */
+ WRITE_ONCE(step->data[1],
+ step->data[1] & ~cpu_to_le64(STRTAB_STE_1_EATS));
+
+ arm_smmu_cmdq_batch_add_cmd(
+ smmu, &cmds, arm_smmu_make_cmd_cfgi_ste(cur->id, true));
+ }
+ if (arm_smmu_cmdq_batch_submit(smmu, &cmds))
+ dev_err_ratelimited(smmu->dev,
+ "failed to disable ATS for master\n");
+
+ /* Pair with lockless readers */
+ WRITE_ONCE(master->ats_broken, true);
+
+ /* Lastly, report to the core to schedule a full blocking procedure */
+ iommu_report_device_broken(master->dev);
+
+ /*
+ * When a concurrent pci_dev_reset_iommu_done() runs after this report
+ * (e.g. an AER recovery in flight), the broken_worker may transiently
+ * block a recovering device. pci_dev_reset_iommu_done() will lift it
+ * immediately. Net end-state is correct.
+ */
+}
+
+static void arm_smmu_cmdq_batch_retry(struct arm_smmu_device *smmu,
+ struct arm_smmu_invs *invs,
+ struct arm_smmu_cmdq_batch *cmds)
+{
+ struct arm_smmu_cmd atc = {};
+ int i;
+
+ /* Only a timed out ATC_INV command needs a retry */
+ if (!invs->has_ats)
+ return;
+
+ for (i = 0; i < cmds->num; i++) {
+ struct arm_smmu_cmdq *cmdq = cmds->cmdq;
+ struct arm_smmu_master *master = NULL;
+ unsigned long flags;
+ u32 sid;
+ int ret;
+
+ /* Only need to retry ATC invalidations */
+ if (FIELD_GET(CMDQ_0_OP, cmds->cmds[i].data[0]) !=
+ CMDQ_OP_ATC_INV)
+ continue;
+
+ /* Only need to retry with one ATC_INV per Stream ID (device) */
+ sid = FIELD_GET(CMDQ_ATC_0_SID, cmds->cmds[i].data[0]);
+ if (atc.data[0] &&
+ sid == FIELD_GET(CMDQ_ATC_0_SID, atc.data[0]))
+ continue;
+
+ master = arm_smmu_invs_find_ats_master(invs, smmu, sid);
+ if (WARN_ON(!master))
+ continue;
+
+ atc = cmds->cmds[i];
+ /*
+ * Hold ats_broken_lock across the per-master re-issue and the
+ * possible disable_ats, so a concurrent reset_device_done()
+ * cannot clear ats_broken between the timeout observation and
+ * the quarantine action.
+ */
+ spin_lock_irqsave(&master->ats_broken_lock, flags);
+ /*
+ * A previous retry on a sibling SID may have already disabled
+ * ATS across all the STEs owned by this master's SIDs. Skip it.
+ */
+ if (master->ats_broken) {
+ spin_unlock_irqrestore(&master->ats_broken_lock, flags);
+ continue;
+ }
+
+ ret = arm_smmu_cmdq_issue_cmdlist(smmu, cmdq, &atc, 1, true);
+ if (ret == -EIO)
+ arm_smmu_master_disable_ats(master);
+ spin_unlock_irqrestore(&master->ats_broken_lock, flags);
+ }
+}
+
static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused,
struct iommu_page_response *resp)
{
--
2.43.0
More information about the linux-arm-kernel
mailing list