[PATCH] iommu/arm-smmu-v3: Add SMMUv3.2 range invalidation support

Rob Herring robh at kernel.org
Wed Jan 15 06:02:22 PST 2020


On Wed, Jan 15, 2020 at 3:21 AM Auger Eric <eric.auger at redhat.com> wrote:
>
> Hi Rob,
>
> On 1/13/20 3:39 PM, Rob Herring wrote:
> > Arm SMMUv3.2 adds support for TLB range invalidate operations.
> > Support for range invalidate is determined by the RIL bit in the IDR3
> > register.
> >
> > The range invalidate is in units of the leaf page size and operates on
> > 1-32 chunks of a power of 2 multiple pages. First we determine from the
> > size what power of 2 multiple we can use and then adjust the granule to
> > 32x that size.
> >
> > Cc: Eric Auger <eric.auger at redhat.com>
> > Cc: Jean-Philippe Brucker <jean-philippe at linaro.org>
> > Cc: Will Deacon <will at kernel.org>
> > Cc: Robin Murphy <robin.murphy at arm.com>
> > Cc: Joerg Roedel <joro at 8bytes.org>
> > Signed-off-by: Rob Herring <robh at kernel.org>
> > ---
> >  drivers/iommu/arm-smmu-v3.c | 53 +++++++++++++++++++++++++++++++++++++
> >  1 file changed, 53 insertions(+)
> >
> > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> > index e91b4a098215..8b6b3e2aa383 100644
> > --- a/drivers/iommu/arm-smmu-v3.c
> > +++ b/drivers/iommu/arm-smmu-v3.c
> > @@ -70,6 +70,9 @@
> >  #define IDR1_SSIDSIZE                        GENMASK(10, 6)
> >  #define IDR1_SIDSIZE                 GENMASK(5, 0)
> >
> > +#define ARM_SMMU_IDR3                        0xc
> > +#define IDR3_RIL                     (1 << 10)
> > +
> >  #define ARM_SMMU_IDR5                        0x14
> >  #define IDR5_STALL_MAX                       GENMASK(31, 16)
> >  #define IDR5_GRAN64K                 (1 << 6)
> > @@ -327,9 +330,14 @@
> >  #define CMDQ_CFGI_1_LEAF             (1UL << 0)
> >  #define CMDQ_CFGI_1_RANGE            GENMASK_ULL(4, 0)
> >
> > +#define CMDQ_TLBI_0_NUM                      GENMASK_ULL(16, 12)
> > +#define CMDQ_TLBI_RANGE_NUM_MAX              32
> > +#define CMDQ_TLBI_0_SCALE            GENMASK_ULL(24, 20)
> >  #define CMDQ_TLBI_0_VMID             GENMASK_ULL(47, 32)
> >  #define CMDQ_TLBI_0_ASID             GENMASK_ULL(63, 48)
> >  #define CMDQ_TLBI_1_LEAF             (1UL << 0)
> > +#define CMDQ_TLBI_1_TTL                      GENMASK_ULL(9, 8)
> > +#define CMDQ_TLBI_1_TG                       GENMASK_ULL(11, 10)
> >  #define CMDQ_TLBI_1_VA_MASK          GENMASK_ULL(63, 12)
> >  #define CMDQ_TLBI_1_IPA_MASK         GENMASK_ULL(51, 12)
> >
> > @@ -455,9 +463,13 @@ struct arm_smmu_cmdq_ent {
> >               #define CMDQ_OP_TLBI_S2_IPA     0x2a
> >               #define CMDQ_OP_TLBI_NSNH_ALL   0x30
> >               struct {
> > +                     u8                      num;
> > +                     u8                      scale;
> >                       u16                     asid;
> >                       u16                     vmid;
> >                       bool                    leaf;
> > +                     u8                      ttl;
> > +                     u8                      tg;
> >                       u64                     addr;
> >               } tlbi;
> >
> > @@ -595,6 +607,7 @@ struct arm_smmu_device {
> >  #define ARM_SMMU_FEAT_HYP            (1 << 12)
> >  #define ARM_SMMU_FEAT_STALL_FORCE    (1 << 13)
> >  #define ARM_SMMU_FEAT_VAX            (1 << 14)
> > +#define ARM_SMMU_FEAT_RANGE_INV              (1 << 15)
> >       u32                             features;
> >
> >  #define ARM_SMMU_OPT_SKIP_PREFETCH   (1 << 0)
> > @@ -856,13 +869,21 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
> >               cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
> >               break;
> >       case CMDQ_OP_TLBI_NH_VA:
> > +             cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
> > +             cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
> >               cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
> >               cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
> > +             cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
> > +             cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
> >               cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
> >               break;
> >       case CMDQ_OP_TLBI_S2_IPA:
> > +             cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
> > +             cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
> >               cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
> >               cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
> > +             cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
> > +             cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
> >               cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
> >               break;
> >       case CMDQ_OP_TLBI_NH_ASID:
> > @@ -2022,12 +2043,39 @@ static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
> >               cmd.tlbi.vmid   = smmu_domain->s2_cfg.vmid;
> >       }
> >
> > +     if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
> > +             unsigned long tg, scale;
> > +
> > +             /* Get the leaf page size */
> > +             tg = __ffs(smmu_domain->domain.pgsize_bitmap);
> it is unclear to me why you can't set tg with the granule parameter.

granule could be 2MB sections if THP is enabled, right?

> > +
> > +             /* Determine the power of 2 multiple number of pages */
> > +             scale = __ffs(size / (1UL << tg));
> > +             cmd.tlbi.scale = scale;
> > +
> > +             cmd.tlbi.num = CMDQ_TLBI_RANGE_NUM_MAX - 1;
> Also could you explain why you use CMDQ_TLBI_RANGE_NUM_MAX.

How's this:
/* The invalidation loop defaults to the maximum range */

And perhaps I'll move it next to setting granule.

> > +
> > +             /* Convert page size of 12,14,16 (log2) to 1,2,3 */
> > +             cmd.tlbi.tg = ((tg - ilog2(SZ_4K)) / 2) + 1;
> > +
> > +             /* Determine what level the granule is at */
> > +             cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
> > +
> > +             /* Adjust granule to the maximum range */
> > +             granule = CMDQ_TLBI_RANGE_NUM_MAX * (1 << scale) * (1UL << tg);
> spec says
> Range = ((NUM+1)*2 ^ SCALE )*Translation_Granule_Size

(NUM+1) can be 1-32. I went with the logical max for
CMDQ_TLBI_RANGE_NUM_MAX rather than the NUM field value max.

Rob



More information about the linux-arm-kernel mailing list