[PATCH 2/2] iommu/arm-smmu-v3: Issue CFGI/TLBI twice on Tegra264

Ashish Mhetre amhetre at nvidia.com
Thu May 28 03:16:17 PDT 2026


Apply the workaround for Tegra264 erratum by issuing every CFGI/TLBI
command twice on affected SMMU instances, with CMD_SYNC after each.
The erratum requires this exact sequencing:

    TLBI/CFGI ... CMD_SYNC TLBI/CFGI ... CMD_SYNC

To get this sequence with minimal surgery, hook the workaround into
arm_smmu_cmdq_issue_cmdlist(). Rename the original function to
__arm_smmu_cmdq_issue_cmdlist() and add a thin wrapper that, on
affected SMMUs and when @sync is true, re-issues the same cmdlist a
second time.

A new arm_smmu_cmd_needs_tlbi_twice() helper classifies which opcodes
need the doubling: CFGI_* and TLBI_*.

For batches that exceed CMDQ_BATCH_ENTRIES commands,
arm_smmu_cmdq_batch_add_cmd_p() normally flushes the full buffer with
sync=false, deferring the SYNC to the eventual batch_submit(). On
affected SMMUs this would leave the first chunk's commands issued
only once, since the WAR hook in arm_smmu_cmdq_issue_cmdlist() only
fires on synced submissions. Force a SYNC on the capacity rollover
when the buffer carries CFGI/TLBI commands so every flushed chunk is
correctly doubled.

Signed-off-by: Ashish Mhetre <amhetre at nvidia.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 66 +++++++++++++++++++--
 1 file changed, 61 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 88296c0a5337..38d45f175a2c 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -698,10 +698,10 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq,
  *   insert their own list of commands then all of the commands from one
  *   CPU will appear before any of the commands from the other CPU.
  */
-int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
-				struct arm_smmu_cmdq *cmdq,
-				struct arm_smmu_cmd *cmds, int n,
-				bool sync)
+static int __arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
+					 struct arm_smmu_cmdq *cmdq,
+					 struct arm_smmu_cmd *cmds, int n,
+					 bool sync)
 {
 	struct arm_smmu_cmd cmd_sync;
 	u32 prod;
@@ -820,6 +820,52 @@ int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
 	return ret;
 }
 
+/*
+ * Returns true if @opcode is a CFGI_* or TLBI_* command, i.e. one of the
+ * invalidations covered by Tegra264 erratum (see ARM_SMMU_OPT_TLBI_TWICE).
+ */
+static bool arm_smmu_cmd_needs_tlbi_twice(u8 opcode)
+{
+	switch (opcode) {
+	case CMDQ_OP_CFGI_STE:
+	case CMDQ_OP_CFGI_ALL:
+	case CMDQ_OP_CFGI_CD:
+	case CMDQ_OP_CFGI_CD_ALL:
+	case CMDQ_OP_TLBI_NH_ALL:
+	case CMDQ_OP_TLBI_NH_ASID:
+	case CMDQ_OP_TLBI_NH_VA:
+	case CMDQ_OP_TLBI_NH_VAA:
+	case CMDQ_OP_TLBI_EL2_ALL:
+	case CMDQ_OP_TLBI_EL2_ASID:
+	case CMDQ_OP_TLBI_EL2_VA:
+	case CMDQ_OP_TLBI_S12_VMALL:
+	case CMDQ_OP_TLBI_S2_IPA:
+	case CMDQ_OP_TLBI_NSNH_ALL:
+		return true;
+	default:
+		return false;
+	}
+}
+
+int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
+				struct arm_smmu_cmdq *cmdq,
+				struct arm_smmu_cmd *cmds, int n,
+				bool sync)
+{
+	int ret = __arm_smmu_cmdq_issue_cmdlist(smmu, cmdq, cmds, n, sync);
+
+	/*
+	 * The driver's batch invariants keep a single submission's
+	 * opcode class uniform, so checking the first command is enough.
+	 */
+	if (!ret && sync && (smmu->options & ARM_SMMU_OPT_TLBI_TWICE) &&
+	    arm_smmu_cmd_needs_tlbi_twice(FIELD_GET(CMDQ_0_OP,
+						    cmds[0].data[0])))
+		ret = __arm_smmu_cmdq_issue_cmdlist(smmu, cmdq, cmds, n, sync);
+
+	return ret;
+}
+
 static int arm_smmu_cmdq_issue_cmd_p(struct arm_smmu_device *smmu,
 				     struct arm_smmu_cmd *cmd, bool sync)
 {
@@ -863,8 +909,18 @@ static void arm_smmu_cmdq_batch_add_cmd_p(struct arm_smmu_device *smmu,
 	}
 
 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
+		/*
+		 * Force a SYNC only when the batch carries commands that
+		 * have to be doubled (see ARM_SMMU_OPT_TLBI_TWICE).
+		 * The batch holds a uniform opcode class, so checking
+		 * the first command is sufficient.
+		 */
+		bool need_sync = (smmu->options & ARM_SMMU_OPT_TLBI_TWICE) &&
+				 arm_smmu_cmd_needs_tlbi_twice(FIELD_GET(CMDQ_0_OP,
+									 cmds->cmds[0].data[0]));
+
 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
-					    cmds->num, false);
+					    cmds->num, need_sync);
 		arm_smmu_cmdq_batch_init_cmd(smmu, cmds, cmd);
 	}
 
-- 
2.50.1




More information about the linux-arm-kernel mailing list