[PATCH 7/7] iommu/riscv: Add NAPOT range invalidation support
Tomasz Jeznach
tjeznach at rivosinc.com
Tue May 5 09:36:02 PDT 2026
On Fri, Apr 10, 2026 at 8:57 AM Jason Gunthorpe <jgg at nvidia.com> wrote:
>
> Use the RISC-V IOMMU Address Range Invalidation extension
> (capabilities.S, spec section 9.3) to invalidate an IOVA range with
> a single IOTINVAL.VMA command using NAPOT-encoded addressing.
>
> One iommu_iotlb_gather maps to one NAPOT invalidation command. The
> smallest power-of-two aligned range covering the gather is used since
> over-invalidation is always safe.
>
> S and NL seem to be orthogonal in the spec, so if NL is not
> supported then global invalidation is probably always going to happen
> as wiping a large range without a table change is not common.
>
> Signed-off-by: Jason Gunthorpe <jgg at nvidia.com>
> ---
> drivers/iommu/riscv/iommu-bits.h | 17 +++++++++++++
> drivers/iommu/riscv/iommu.c | 43 +++++++++++++++++++++++++++-----
> 2 files changed, 54 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/iommu/riscv/iommu-bits.h b/drivers/iommu/riscv/iommu-bits.h
> index f01b49ac815586..32b3ad3ac9ae59 100644
> --- a/drivers/iommu/riscv/iommu-bits.h
> +++ b/drivers/iommu/riscv/iommu-bits.h
> @@ -64,6 +64,7 @@
> #define RISCV_IOMMU_CAPABILITIES_PD17 BIT_ULL(39)
> #define RISCV_IOMMU_CAPABILITIES_PD20 BIT_ULL(40)
> #define RISCV_IOMMU_CAPABILITIES_NL BIT_ULL(42)
> +#define RISCV_IOMMU_CAPABILITIES_S BIT_ULL(43)
>
> /**
> * enum riscv_iommu_igs_settings - Interrupt Generation Support Settings
> @@ -475,6 +476,7 @@ struct riscv_iommu_command {
> #define RISCV_IOMMU_CMD_IOTINVAL_GV BIT_ULL(33)
> #define RISCV_IOMMU_CMD_IOTINVAL_GSCID GENMASK_ULL(59, 44)
> #define RISCV_IOMMU_CMD_IOTINVAL_NL BIT_ULL(34)
> +#define RISCV_IOMMU_CMD_IOTINVAL_S BIT_ULL(9)
> /* dword1[61:10] is the 4K-aligned page address */
> #define RISCV_IOMMU_CMD_IOTINVAL_ADDR GENMASK_ULL(61, 10)
>
> @@ -731,6 +733,21 @@ static inline void riscv_iommu_cmd_inval_set_nl(struct riscv_iommu_command *cmd)
> cmd->dword0 |= RISCV_IOMMU_CMD_IOTINVAL_NL;
> }
>
> +/*
> + * Set NAPOT-encoded address for range invalidation (S=1).
> + * sz_lg2: log2 of total range in bytes, must be >= 13 (8KiB, 2 pages).
> + * addr must be naturally aligned to 2^sz_lg2.
> + */
> +static inline void riscv_iommu_cmd_inval_set_napot(
> + struct riscv_iommu_command *cmd, u64 addr, unsigned int sz_lg2)
> +{
> + u64 pfn = addr >> 12;
> +
> + pfn |= BIT_U64(sz_lg2 - 13) - 1;
> + cmd->dword1 = FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_ADDR, pfn);
> + cmd->dword0 |= RISCV_IOMMU_CMD_IOTINVAL_AV | RISCV_IOMMU_CMD_IOTINVAL_S;
> +}
> +
> static inline void riscv_iommu_cmd_inval_set_pscid(struct riscv_iommu_command *cmd,
> int pscid)
> {
> diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
> index ea14630430451a..41e26e267a7fd6 100644
> --- a/drivers/iommu/riscv/iommu.c
> +++ b/drivers/iommu/riscv/iommu.c
> @@ -929,6 +929,10 @@ struct riscv_iommu_tlbi {
> u8 stride_lg2;
> unsigned int num;
> } single;
> + struct {
> + u8 sz_lg2;
> + u64 addr;
> + } range;
> };
>
> static void riscv_iommu_tlbi_calc(struct riscv_iommu_tlbi *tlbi,
> @@ -945,9 +949,23 @@ static void riscv_iommu_tlbi_calc(struct riscv_iommu_tlbi *tlbi,
> /* No level information available */
> if (!combined) {
> tlbi->single.use_global = true;
> + tlbi->range.sz_lg2 = 0;
> return;
> }
>
> + /*
> + * Calculate the smallest NAPOT range containing [start, last].
> + * NAPOT encoding requires a power-of-two sized, naturally aligned
> + * range. Over-invalidation is always safe.
> + */
> + tlbi->range.sz_lg2 = fls64(tlbi->start ^ tlbi->last);
> + if (unlikely(tlbi->range.sz_lg2 >= 64)) {
> + tlbi->single.use_global = true;
> + tlbi->range.sz_lg2 = 0;
> + return;
> + }
> + tlbi->range.addr = tlbi->start & ~(BIT_U64(tlbi->range.sz_lg2) - 1);
> +
> /*
> * Calculate stride from the lowest changed level. RISC-V uses 4KiB
> * granule with 9 bits per level.
> @@ -969,7 +987,6 @@ static void riscv_iommu_iotlb_inval_iommu(struct riscv_iommu_device *iommu,
> bool use_nl = tlbi->non_leaf &&
> (iommu->caps & RISCV_IOMMU_CAPABILITIES_NL);
> struct riscv_iommu_command cmd;
> - unsigned long iova;
> unsigned int i;
>
> riscv_iommu_cmd_inval_vma(&cmd);
> @@ -979,16 +996,30 @@ static void riscv_iommu_iotlb_inval_iommu(struct riscv_iommu_device *iommu,
> * If non-leaf entries were changed and the IOMMU doesn't
> * support NL, we must fall back to global invalidation (AV=0).
> */
> - if (tlbi->single.use_global || (tlbi->non_leaf && !use_nl))
> + if (tlbi->non_leaf && !use_nl)
> goto global;
>
> - iova = tlbi->start;
> - for (i = 0; i < tlbi->single.num; i++) {
> - riscv_iommu_cmd_inval_set_addr(&cmd, iova);
> + if (iommu->caps & RISCV_IOMMU_CAPABILITIES_S &&
> + tlbi->range.sz_lg2 >= 13) {
> + riscv_iommu_cmd_inval_set_napot(&cmd, tlbi->range.addr,
> + tlbi->range.sz_lg2);
> if (use_nl)
> riscv_iommu_cmd_inval_set_nl(&cmd);
> riscv_iommu_cmd_send(iommu, &cmd);
> - iova += 1ULL << tlbi->single.stride_lg2;
> + } else {
> + unsigned long iova;
> +
> + if (tlbi->single.use_global)
> + goto global;
> +
> + iova = tlbi->start;
> + for (i = 0; i < tlbi->single.num; i++) {
> + riscv_iommu_cmd_inval_set_addr(&cmd, iova);
> + if (use_nl)
> + riscv_iommu_cmd_inval_set_nl(&cmd);
> + riscv_iommu_cmd_send(iommu, &cmd);
> + iova += 1ULL << tlbi->single.stride_lg2;
> + }
> }
> return;
> global:
> --
> 2.43.0
>
Reviewed-by: Tomasz Jeznach <tjeznach at rivosinc.com>
Thank you,
- Tomasz
More information about the linux-riscv
mailing list