[PATCH 7/7] iommu/riscv: Add NAPOT range invalidation support

Tomasz Jeznach tjeznach at rivosinc.com
Tue May 5 09:36:02 PDT 2026


On Fri, Apr 10, 2026 at 8:57 AM Jason Gunthorpe <jgg at nvidia.com> wrote:
>
> Use the RISC-V IOMMU Address Range Invalidation extension
> (capabilities.S, spec section 9.3) to invalidate an IOVA range with
> a single IOTINVAL.VMA command using NAPOT-encoded addressing.
>
> One iommu_iotlb_gather maps to one NAPOT invalidation command. The
> smallest power-of-two aligned range covering the gather is used since
> over-invalidation is always safe.
>
> S and NL seem to be orthogonal in the spec, so if NL is not
> supported then global invalidation is probably always going to happen
> as wiping a large range without a table change is not common.
>
> Signed-off-by: Jason Gunthorpe <jgg at nvidia.com>
> ---
>  drivers/iommu/riscv/iommu-bits.h | 17 +++++++++++++
>  drivers/iommu/riscv/iommu.c      | 43 +++++++++++++++++++++++++++-----
>  2 files changed, 54 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/iommu/riscv/iommu-bits.h b/drivers/iommu/riscv/iommu-bits.h
> index f01b49ac815586..32b3ad3ac9ae59 100644
> --- a/drivers/iommu/riscv/iommu-bits.h
> +++ b/drivers/iommu/riscv/iommu-bits.h
> @@ -64,6 +64,7 @@
>  #define RISCV_IOMMU_CAPABILITIES_PD17          BIT_ULL(39)
>  #define RISCV_IOMMU_CAPABILITIES_PD20          BIT_ULL(40)
>  #define RISCV_IOMMU_CAPABILITIES_NL            BIT_ULL(42)
> +#define RISCV_IOMMU_CAPABILITIES_S             BIT_ULL(43)
>
>  /**
>   * enum riscv_iommu_igs_settings - Interrupt Generation Support Settings
> @@ -475,6 +476,7 @@ struct riscv_iommu_command {
>  #define RISCV_IOMMU_CMD_IOTINVAL_GV            BIT_ULL(33)
>  #define RISCV_IOMMU_CMD_IOTINVAL_GSCID         GENMASK_ULL(59, 44)
>  #define RISCV_IOMMU_CMD_IOTINVAL_NL            BIT_ULL(34)
> +#define RISCV_IOMMU_CMD_IOTINVAL_S             BIT_ULL(9)
>  /* dword1[61:10] is the 4K-aligned page address */
>  #define RISCV_IOMMU_CMD_IOTINVAL_ADDR          GENMASK_ULL(61, 10)
>
> @@ -731,6 +733,21 @@ static inline void riscv_iommu_cmd_inval_set_nl(struct riscv_iommu_command *cmd)
>         cmd->dword0 |= RISCV_IOMMU_CMD_IOTINVAL_NL;
>  }
>
> +/*
> + * Set NAPOT-encoded address for range invalidation (S=1).
> + * sz_lg2: log2 of total range in bytes, must be >= 13 (8KiB, 2 pages).
> + * addr must be naturally aligned to 2^sz_lg2.
> + */
> +static inline void riscv_iommu_cmd_inval_set_napot(
> +       struct riscv_iommu_command *cmd, u64 addr, unsigned int sz_lg2)
> +{
> +       u64 pfn = addr >> 12;
> +
> +       pfn |= BIT_U64(sz_lg2 - 13) - 1;
> +       cmd->dword1 = FIELD_PREP(RISCV_IOMMU_CMD_IOTINVAL_ADDR, pfn);
> +       cmd->dword0 |= RISCV_IOMMU_CMD_IOTINVAL_AV | RISCV_IOMMU_CMD_IOTINVAL_S;
> +}
> +
>  static inline void riscv_iommu_cmd_inval_set_pscid(struct riscv_iommu_command *cmd,
>                                                    int pscid)
>  {
> diff --git a/drivers/iommu/riscv/iommu.c b/drivers/iommu/riscv/iommu.c
> index ea14630430451a..41e26e267a7fd6 100644
> --- a/drivers/iommu/riscv/iommu.c
> +++ b/drivers/iommu/riscv/iommu.c
> @@ -929,6 +929,10 @@ struct riscv_iommu_tlbi {
>                 u8 stride_lg2;
>                 unsigned int num;
>         } single;
> +       struct {
> +               u8 sz_lg2;
> +               u64 addr;
> +       } range;
>  };
>
>  static void riscv_iommu_tlbi_calc(struct riscv_iommu_tlbi *tlbi,
> @@ -945,9 +949,23 @@ static void riscv_iommu_tlbi_calc(struct riscv_iommu_tlbi *tlbi,
>         /* No level information available */
>         if (!combined) {
>                 tlbi->single.use_global = true;
> +               tlbi->range.sz_lg2 = 0;
>                 return;
>         }
>
> +       /*
> +        * Calculate the smallest NAPOT range containing [start, last].
> +        * NAPOT encoding requires a power-of-two sized, naturally aligned
> +        * range. Over-invalidation is always safe.
> +        */
> +       tlbi->range.sz_lg2 = fls64(tlbi->start ^ tlbi->last);
> +       if (unlikely(tlbi->range.sz_lg2 >= 64)) {
> +               tlbi->single.use_global = true;
> +               tlbi->range.sz_lg2 = 0;
> +               return;
> +       }
> +       tlbi->range.addr = tlbi->start & ~(BIT_U64(tlbi->range.sz_lg2) - 1);
> +
>         /*
>          * Calculate stride from the lowest changed level. RISC-V uses 4KiB
>          * granule with 9 bits per level.
> @@ -969,7 +987,6 @@ static void riscv_iommu_iotlb_inval_iommu(struct riscv_iommu_device *iommu,
>         bool use_nl = tlbi->non_leaf &&
>                       (iommu->caps & RISCV_IOMMU_CAPABILITIES_NL);
>         struct riscv_iommu_command cmd;
> -       unsigned long iova;
>         unsigned int i;
>
>         riscv_iommu_cmd_inval_vma(&cmd);
> @@ -979,16 +996,30 @@ static void riscv_iommu_iotlb_inval_iommu(struct riscv_iommu_device *iommu,
>          * If non-leaf entries were changed and the IOMMU doesn't
>          * support NL, we must fall back to global invalidation (AV=0).
>          */
> -       if (tlbi->single.use_global || (tlbi->non_leaf && !use_nl))
> +       if (tlbi->non_leaf && !use_nl)
>                 goto global;
>
> -       iova = tlbi->start;
> -       for (i = 0; i < tlbi->single.num; i++) {
> -               riscv_iommu_cmd_inval_set_addr(&cmd, iova);
> +       if (iommu->caps & RISCV_IOMMU_CAPABILITIES_S &&
> +           tlbi->range.sz_lg2 >= 13) {
> +               riscv_iommu_cmd_inval_set_napot(&cmd, tlbi->range.addr,
> +                                               tlbi->range.sz_lg2);
>                 if (use_nl)
>                         riscv_iommu_cmd_inval_set_nl(&cmd);
>                 riscv_iommu_cmd_send(iommu, &cmd);
> -               iova += 1ULL << tlbi->single.stride_lg2;
> +       } else {
> +               unsigned long iova;
> +
> +               if (tlbi->single.use_global)
> +                       goto global;
> +
> +               iova = tlbi->start;
> +               for (i = 0; i < tlbi->single.num; i++) {
> +                       riscv_iommu_cmd_inval_set_addr(&cmd, iova);
> +                       if (use_nl)
> +                               riscv_iommu_cmd_inval_set_nl(&cmd);
> +                       riscv_iommu_cmd_send(iommu, &cmd);
> +                       iova += 1ULL << tlbi->single.stride_lg2;
> +               }
>         }
>         return;
>  global:
> --
> 2.43.0
>

Reviewed-by: Tomasz Jeznach <tjeznach at rivosinc.com>

Thank you,
- Tomasz



More information about the linux-riscv mailing list