[PATCH 7/7] iommu/riscv: Add NAPOT range invalidation support

Jason Gunthorpe jgg at nvidia.com
Fri Apr 10 08:57:08 PDT 2026


Use the RISC-V IOMMU Address Range Invalidation extension
(capabilities.S, spec section 9.3) to invalidate an IOVA range with
a single IOTINVAL.VMA command using NAPOT-encoded addressing.

One iommu_iotlb_gather maps to one NAPOT invalidation command. The
smallest power-of-two aligned range covering the gather is used since
over-invalidation is always safe.

S and NL seem to be orthogonal in the spec, so if NL is not
supported then global invalidation is probably always going to happen
as wiping a large range without a table change is not common.

Signed-off-by: Jason Gunthorpe <jgg at nvidia.com>
---
 drivers/iommu/riscv/iommu-bits.h | 17 +++++++++++++
 drivers/iommu/riscv/iommu.c      | 43 +++++++++++++++++++++++++++-----
 2 files changed, 54 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/riscv/iommu-bits.h b/drivers/iommu/riscv/iommu-bits.h
index f01b49ac815586..32b3ad3ac9ae59 100644
--- a/drivers/iommu/riscv/iommu-bits.h
+++ b/drivers/iommu/riscv/iommu-bits.h
@@ -64,6 +64,7 @@
 #define RISCV_IOMMU_CAPABILITIES_PD17		BIT_ULL(39)
 #define RISCV_IOMMU_CAPABILITIES_PD20		BIT_ULL(40)
 #define RISCV_IOMMU_CAPABILITIES_NL		BIT_ULL(42)
+#define RISCV_IOMMU_CAPABILITIES_S		BIT_ULL(43)
 
 /**
  * enum riscv_iommu_igs_settings - Interrupt Generation Support Settings
@@ -475,6 +476,7 @@ struct riscv_iommu_command {
 #define RISCV_IOMMU_CMD_IOTINVAL_GV		BIT_ULL(33)
 #define RISCV_IOMMU_CMD_IOTINVAL_GSCID		GENMASK_ULL(59, 44)
 #define RISCV_IOMMU_CMD_IOTINVAL_NL		BIT_ULL(34)
+#define RISCV_IOMMU_CMD_IOTINVAL_S		BIT_ULL(9)
 /* dword1[61:10] is the 4K-aligned page address */
 #define RISCV_IOMMU_CMD_IOTINVAL_ADDR		GENMASK_ULL(61, 10)
 
@@ -731,6 +733,21 @@ static inline void riscv_iommu_cmd_inval_set_nl(struct riscv_iommu_command *cmd)
 	cmd->dword0 |= RISCV_IOMMU_CMD_IOTINVAL_NL;
 }
 
+/*
+ * Set NAPOT-encoded address for range invalidation (S=1).
+ * sz_lg2: log2 of total range in bytes, must be >= 13 (8KiB, 2 pages).
+ * addr must be naturally aligned to 2^sz_lg2.
+ */
+static inline void riscv_iommu_cmd_inval_set_napot(
+	struct riscv_iommu_command *cmd, u64 addr, unsigned int sz_lg2)
+{
+	u64 pfn = addr >> 12;
+
+	pfn |= BIT_U64(sz_lg2 - 13) - 1;
+	cmd->dword1 = FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_ADDR, pfn);
+	cmd->dword0 |= RISCV_IOMMU_CMD_IOTINVAL_AV | RISCV_IOMMU_CMD_IOTINVAL_S;
+}
+
 static inline void riscv_iommu_cmd_inval_set_pscid(struct riscv_iommu_command *cmd,
 						   int pscid)
 {
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index ea14630430451a..41e26e267a7fd6 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -929,6 +929,10 @@ struct riscv_iommu_tlbi {
 		u8 stride_lg2;
 		unsigned int num;
 	} single;
+	struct {
+		u8 sz_lg2;
+		u64 addr;
+	} range;
 };
 
 static void riscv_iommu_tlbi_calc(struct riscv_iommu_tlbi *tlbi,
@@ -945,9 +949,23 @@ static void riscv_iommu_tlbi_calc(struct riscv_iommu_tlbi *tlbi,
 	/* No level information available */
 	if (!combined) {
 		tlbi->single.use_global = true;
+		tlbi->range.sz_lg2 = 0;
 		return;
 	}
 
+	/*
+	 * Calculate the smallest NAPOT range containing [start, last].
+	 * NAPOT encoding requires a power-of-two sized, naturally aligned
+	 * range. Over-invalidation is always safe.
+	 */
+	tlbi->range.sz_lg2 = fls64(tlbi->start ^ tlbi->last);
+	if (unlikely(tlbi->range.sz_lg2 >= 64)) {
+		tlbi->single.use_global = true;
+		tlbi->range.sz_lg2 = 0;
+		return;
+	}
+	tlbi->range.addr = tlbi->start & ~(BIT_U64(tlbi->range.sz_lg2) - 1);
+
 	/*
 	 * Calculate stride from the lowest changed level. RISC-V uses 4KiB
 	 * granule with 9 bits per level.
@@ -969,7 +987,6 @@ static void riscv_iommu_iotlb_inval_iommu(struct riscv_iommu_device *iommu,
 	bool use_nl = tlbi->non_leaf &&
 		      (iommu->caps & RISCV_IOMMU_CAPABILITIES_NL);
 	struct riscv_iommu_command cmd;
-	unsigned long iova;
 	unsigned int i;
 
 	riscv_iommu_cmd_inval_vma(&cmd);
@@ -979,16 +996,30 @@ static void riscv_iommu_iotlb_inval_iommu(struct riscv_iommu_device *iommu,
 	 * If non-leaf entries were changed and the IOMMU doesn't
 	 * support NL, we must fall back to global invalidation (AV=0).
 	 */
-	if (tlbi->single.use_global || (tlbi->non_leaf && !use_nl))
+	if (tlbi->non_leaf && !use_nl)
 		goto global;
 
-	iova = tlbi->start;
-	for (i = 0; i < tlbi->single.num; i++) {
-		riscv_iommu_cmd_inval_set_addr(&cmd, iova);
+	if (iommu->caps & RISCV_IOMMU_CAPABILITIES_S &&
+	    tlbi->range.sz_lg2 >= 13) {
+		riscv_iommu_cmd_inval_set_napot(&cmd, tlbi->range.addr,
+						tlbi->range.sz_lg2);
 		if (use_nl)
 			riscv_iommu_cmd_inval_set_nl(&cmd);
 		riscv_iommu_cmd_send(iommu, &cmd);
-		iova += 1ULL << tlbi->single.stride_lg2;
+	} else {
+		unsigned long iova;
+
+		if (tlbi->single.use_global)
+			goto global;
+
+		iova = tlbi->start;
+		for (i = 0; i < tlbi->single.num; i++) {
+			riscv_iommu_cmd_inval_set_addr(&cmd, iova);
+			if (use_nl)
+				riscv_iommu_cmd_inval_set_nl(&cmd);
+			riscv_iommu_cmd_send(iommu, &cmd);
+			iova += 1ULL << tlbi->single.stride_lg2;
+		}
 	}
 	return;
 global:
-- 
2.43.0




More information about the linux-riscv mailing list