[PATCH v2 8/8] iommu/riscv: Add NAPOT range invalidation support
Jason Gunthorpe
jgg at nvidia.com
Fri May 8 07:53:07 PDT 2026
Use the RISC-V IOMMU Address Range Invalidation extension
(capabilities.S, spec section 9.3) to invalidate an IOVA range with
a single IOTINVAL.VMA command using NAPOT-encoded addressing.
One iommu_iotlb_gather maps to one NAPOT invalidation command. The
smallest power-of-two aligned range covering the gather is used since
over-invalidation is always safe.
S and NL seem to be orthogonal in the spec, so if NL is not
supported then global invalidation is probably always going to happen
as wiping a large range without a table change is not common.
Reviewed-by: Tomasz Jeznach <tjeznach at rivosinc.com>
Signed-off-by: Jason Gunthorpe <jgg at nvidia.com>
---
drivers/iommu/riscv/iommu-bits.h | 18 +++++++++++++
drivers/iommu/riscv/iommu.c | 43 +++++++++++++++++++++++++++-----
2 files changed, 55 insertions(+), 6 deletions(-)
diff --git a/drivers/iommu/riscv/iommu-bits.h b/drivers/iommu/riscv/iommu-bits.h
index 8c60780363da72..f2ef9bd3cde960 100644
--- a/drivers/iommu/riscv/iommu-bits.h
+++ b/drivers/iommu/riscv/iommu-bits.h
@@ -64,6 +64,7 @@
#define RISCV_IOMMU_CAPABILITIES_PD17 BIT_ULL(39)
#define RISCV_IOMMU_CAPABILITIES_PD20 BIT_ULL(40)
#define RISCV_IOMMU_CAPABILITIES_NL BIT_ULL(42)
+#define RISCV_IOMMU_CAPABILITIES_S BIT_ULL(43)
/**
* enum riscv_iommu_igs_settings - Interrupt Generation Support Settings
@@ -475,6 +476,7 @@ struct riscv_iommu_command {
#define RISCV_IOMMU_CMD0_IOTINVAL_GV BIT_ULL(33)
#define RISCV_IOMMU_CMD0_IOTINVAL_GSCID GENMASK_ULL(59, 44)
#define RISCV_IOMMU_CMD0_IOTINVAL_NL BIT_ULL(34)
+#define RISCV_IOMMU_CMD1_IOTINVAL_S BIT_ULL(9)
/* dword1[61:10] is the 4K-aligned page address */
#define RISCV_IOMMU_CMD1_IOTINVAL_ADDR GENMASK_ULL(61, 10)
@@ -731,6 +733,22 @@ static inline void riscv_iommu_cmd_inval_set_nl(struct riscv_iommu_command *cmd)
cmd->dword0 |= RISCV_IOMMU_CMD0_IOTINVAL_NL;
}
+/*
+ * Set NAPOT-encoded address for range invalidation (S=1).
+ * sz_lg2: log2 of total range in bytes, must be >= 13 (8KiB, 2 pages).
+ * addr must be naturally aligned to 2^sz_lg2.
+ */
+static inline void riscv_iommu_cmd_inval_set_napot(
+ struct riscv_iommu_command *cmd, u64 addr, unsigned int sz_lg2)
+{
+ u64 pfn = addr >> 12;
+
+ pfn |= BIT_U64(sz_lg2 - 13) - 1;
+ cmd->dword1 = FIELD_PREP(RISCV_IOMMU_CMD1_IOTINVAL_ADDR, pfn) |
+ RISCV_IOMMU_CMD1_IOTINVAL_S;
+ cmd->dword0 |= RISCV_IOMMU_CMD0_IOTINVAL_AV;
+}
+
static inline void riscv_iommu_cmd_inval_set_pscid(struct riscv_iommu_command *cmd,
int pscid)
{
diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
index ea14630430451a..41e26e267a7fd6 100644
--- a/drivers/iommu/riscv/iommu.c
+++ b/drivers/iommu/riscv/iommu.c
@@ -929,6 +929,10 @@ struct riscv_iommu_tlbi {
u8 stride_lg2;
unsigned int num;
} single;
+ struct {
+ u8 sz_lg2;
+ u64 addr;
+ } range;
};
static void riscv_iommu_tlbi_calc(struct riscv_iommu_tlbi *tlbi,
@@ -945,9 +949,23 @@ static void riscv_iommu_tlbi_calc(struct riscv_iommu_tlbi *tlbi,
/* No level information available */
if (!combined) {
tlbi->single.use_global = true;
+ tlbi->range.sz_lg2 = 0;
return;
}
+ /*
+ * Calculate the smallest NAPOT range containing [start, last].
+ * NAPOT encoding requires a power-of-two sized, naturally aligned
+ * range. Over-invalidation is always safe.
+ */
+ tlbi->range.sz_lg2 = fls64(tlbi->start ^ tlbi->last);
+ if (unlikely(tlbi->range.sz_lg2 >= 64)) {
+ tlbi->single.use_global = true;
+ tlbi->range.sz_lg2 = 0;
+ return;
+ }
+ tlbi->range.addr = tlbi->start & ~(BIT_U64(tlbi->range.sz_lg2) - 1);
+
/*
* Calculate stride from the lowest changed level. RISC-V uses 4KiB
* granule with 9 bits per level.
@@ -969,7 +987,6 @@ static void riscv_iommu_iotlb_inval_iommu(struct riscv_iommu_device *iommu,
bool use_nl = tlbi->non_leaf &&
(iommu->caps & RISCV_IOMMU_CAPABILITIES_NL);
struct riscv_iommu_command cmd;
- unsigned long iova;
unsigned int i;
riscv_iommu_cmd_inval_vma(&cmd);
@@ -979,16 +996,30 @@ static void riscv_iommu_iotlb_inval_iommu(struct riscv_iommu_device *iommu,
* If non-leaf entries were changed and the IOMMU doesn't
* support NL, we must fall back to global invalidation (AV=0).
*/
- if (tlbi->single.use_global || (tlbi->non_leaf && !use_nl))
+ if (tlbi->non_leaf && !use_nl)
goto global;
- iova = tlbi->start;
- for (i = 0; i < tlbi->single.num; i++) {
- riscv_iommu_cmd_inval_set_addr(&cmd, iova);
+ if (iommu->caps & RISCV_IOMMU_CAPABILITIES_S &&
+ tlbi->range.sz_lg2 >= 13) {
+ riscv_iommu_cmd_inval_set_napot(&cmd, tlbi->range.addr,
+ tlbi->range.sz_lg2);
if (use_nl)
riscv_iommu_cmd_inval_set_nl(&cmd);
riscv_iommu_cmd_send(iommu, &cmd);
- iova += 1ULL << tlbi->single.stride_lg2;
+ } else {
+ unsigned long iova;
+
+ if (tlbi->single.use_global)
+ goto global;
+
+ iova = tlbi->start;
+ for (i = 0; i < tlbi->single.num; i++) {
+ riscv_iommu_cmd_inval_set_addr(&cmd, iova);
+ if (use_nl)
+ riscv_iommu_cmd_inval_set_nl(&cmd);
+ riscv_iommu_cmd_send(iommu, &cmd);
+ iova += 1ULL << tlbi->single.stride_lg2;
+ }
}
return;
global:
--
2.43.0
More information about the linux-riscv
mailing list