[PATCH v4 15/24] iommu/arm-smmu-v3: Co-clear pending CMDQ_ERR when CMD_SYNC times out

Nicolin Chen nicolinc at nvidia.com
Mon May 18 20:38:58 PDT 2026


Once arm_smmu_cmdq_poll_until_sync() returns, arm_smmu_cmdq_issue_cmdlist()
tests its CMD_SYNC slot in atc_sync_timeouts to decide whether there was an
ATC_INV timeout.

On the other hand, when that poll timed out, the GERROR ISR might have been
delayed past the poll deadline, so the atc_sync_timeouts test could miss an
ATC_INV timeout, classifying it as a generic CMD_SYNC timeout and bypassing
the per-device quarantine.

Add two cmdq_err_handler impl functions:
 - arm_smmu_cmdq_err_handler() reads SMMU GERROR/GERRORN.
 - tegra241_vcmdq_handle_cmdq_err() reads VCMDQ GERROR/GERRORN.

Co-clear any pending CMDQ_ERR in the issuer, when the polling on a CMD_SYNC
times out. Each cmdq impl serializes the synchronous drain against its own
IRQ handler with cmdq->cmdq_err_lock.

Assisted-by: Claude:claude-opus-4-7
Signed-off-by: Nicolin Chen <nicolinc at nvidia.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   | 36 ++++++++++++++++++-
 .../iommu/arm/arm-smmu-v3/tegra241-cmdqv.c    | 23 +++++++++++-
 2 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index fc0757359b783..7f81fd2e92480 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -813,6 +813,15 @@ int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
 		sync_prod = llq.prod;
 		ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq);
 
+		/*
+		 * When the poll above timed out, the GERROR ISR might have been
+		 * delayed past the poll deadline, so the atc_sync_timeouts test
+		 * below could miss our ATC_INV timeout. Thus, drain any pending
+		 * CMDQ_ERR synchronously first via the per-cmdq callback.
+		 */
+		if (ret && cmdq->cmdq_err_handler)
+			cmdq->cmdq_err_handler(smmu, cmdq);
+
 		/*
 		 * Test atc_sync_timeouts first and see if there is ATC timeout
 		 * resulted from this cmdlist. Return -EIO to separate from the
@@ -2251,6 +2260,31 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
 
 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
 
+/*
+ * Drain a pending CMDQ_ERR on the primary cmdq. Installed as the primary
+ * cmdq's cmdq_err_handler so arm_smmu_cmdq_issue_cmdlist() can drain after
+ * a CMD_SYNC poll timeout; serialized against arm_smmu_gerror_handler() by
+ * cmdq->cmdq_err_lock.
+ */
+static void arm_smmu_cmdq_err_handler(struct arm_smmu_device *smmu,
+				      struct arm_smmu_cmdq *cmdq)
+{
+	u32 gerror, gerrorn;
+
+	guard(raw_spinlock_irqsave)(&cmdq->cmdq_err_lock);
+
+	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
+	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
+
+	if (!((gerror ^ gerrorn) & GERROR_CMDQ_ERR))
+		return;
+
+	__arm_smmu_cmdq_skip_err(smmu, cmdq);
+
+	/* Toggle only the CMDQ_ERR bit; other bits are left for the ISR. */
+	writel(gerrorn ^ GERROR_CMDQ_ERR, smmu->base + ARM_SMMU_GERRORN);
+}
+
 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
 {
 	u32 gerror, gerrorn, active;
@@ -4399,7 +4433,7 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
 	if (ret)
 		return ret;
 
-	ret = arm_smmu_cmdq_init(smmu, &smmu->cmdq, NULL);
+	ret = arm_smmu_cmdq_init(smmu, &smmu->cmdq, arm_smmu_cmdq_err_handler);
 	if (ret)
 		return ret;
 
diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
index fb2f8f68fa344..e04107f0490c9 100644
--- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
+++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
@@ -337,6 +337,27 @@ static void tegra241_vintf0_handle_error(struct tegra241_vintf *vintf)
 	}
 }
 
+static void tegra241_vcmdq_handle_cmdq_err(struct arm_smmu_device *smmu,
+					   struct arm_smmu_cmdq *cmdq)
+{
+	struct tegra241_vcmdq *vcmdq =
+		container_of(cmdq, struct tegra241_vcmdq, cmdq);
+	u32 gerror, gerrorn;
+
+	guard(raw_spinlock_irqsave)(&cmdq->cmdq_err_lock);
+
+	gerror = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR));
+	gerrorn = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN));
+
+	if (!((gerror ^ gerrorn) & GERROR_CMDQ_ERR))
+		return;
+
+	__arm_smmu_cmdq_skip_err(smmu, cmdq);
+
+	/* Toggle only the CMDQ_ERR bit on this VCMDQ's GERRORN */
+	writel(gerrorn ^ GERROR_CMDQ_ERR, REG_VCMDQ_PAGE0(vcmdq, GERRORN));
+}
+
 static irqreturn_t tegra241_cmdqv_isr(int irq, void *devid)
 {
 	struct tegra241_cmdqv *cmdqv = (struct tegra241_cmdqv *)devid;
@@ -652,7 +673,7 @@ static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq)
 	q->q_base = q->base_dma & VCMDQ_ADDR;
 	q->q_base |= FIELD_PREP(VCMDQ_LOG2SIZE, q->llq.max_n_shift);
 
-	return arm_smmu_cmdq_init(smmu, cmdq, NULL);
+	return arm_smmu_cmdq_init(smmu, cmdq, tegra241_vcmdq_handle_cmdq_err);
 }
 
 /* VINTF Logical VCMDQ Resource Helpers */
-- 
2.43.0




More information about the linux-arm-kernel mailing list