[PATCH] iommu/arm-smmu-v3: Add SMMUv3.2 range invalidation support

Auger Eric eric.auger at redhat.com
Wed Jan 15 01:21:25 PST 2020


Hi Rob,

On 1/13/20 3:39 PM, Rob Herring wrote:
> Arm SMMUv3.2 adds support for TLB range invalidate operations.
> Support for range invalidate is determined by the RIL bit in the IDR3
> register.
> 
> The range invalidate is in units of the leaf page size and operates on
> 1-32 chunks of a power of 2 multiple pages. First we determine from the
> size what power of 2 multiple we can use and then adjust the granule to
> 32x that size.
> 
> Cc: Eric Auger <eric.auger at redhat.com>
> Cc: Jean-Philippe Brucker <jean-philippe at linaro.org>
> Cc: Will Deacon <will at kernel.org>
> Cc: Robin Murphy <robin.murphy at arm.com>
> Cc: Joerg Roedel <joro at 8bytes.org>
> Signed-off-by: Rob Herring <robh at kernel.org>
> ---
>  drivers/iommu/arm-smmu-v3.c | 53 +++++++++++++++++++++++++++++++++++++
>  1 file changed, 53 insertions(+)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index e91b4a098215..8b6b3e2aa383 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -70,6 +70,9 @@
>  #define IDR1_SSIDSIZE			GENMASK(10, 6)
>  #define IDR1_SIDSIZE			GENMASK(5, 0)
>  
> +#define ARM_SMMU_IDR3			0xc
> +#define IDR3_RIL			(1 << 10)
> +
>  #define ARM_SMMU_IDR5			0x14
>  #define IDR5_STALL_MAX			GENMASK(31, 16)
>  #define IDR5_GRAN64K			(1 << 6)
> @@ -327,9 +330,14 @@
>  #define CMDQ_CFGI_1_LEAF		(1UL << 0)
>  #define CMDQ_CFGI_1_RANGE		GENMASK_ULL(4, 0)
>  
> +#define CMDQ_TLBI_0_NUM			GENMASK_ULL(16, 12)
> +#define CMDQ_TLBI_RANGE_NUM_MAX		32
> +#define CMDQ_TLBI_0_SCALE		GENMASK_ULL(24, 20)
>  #define CMDQ_TLBI_0_VMID		GENMASK_ULL(47, 32)
>  #define CMDQ_TLBI_0_ASID		GENMASK_ULL(63, 48)
>  #define CMDQ_TLBI_1_LEAF		(1UL << 0)
> +#define CMDQ_TLBI_1_TTL			GENMASK_ULL(9, 8)
> +#define CMDQ_TLBI_1_TG			GENMASK_ULL(11, 10)
>  #define CMDQ_TLBI_1_VA_MASK		GENMASK_ULL(63, 12)
>  #define CMDQ_TLBI_1_IPA_MASK		GENMASK_ULL(51, 12)
>  
> @@ -455,9 +463,13 @@ struct arm_smmu_cmdq_ent {
>  		#define CMDQ_OP_TLBI_S2_IPA	0x2a
>  		#define CMDQ_OP_TLBI_NSNH_ALL	0x30
>  		struct {
> +			u8			num;
> +			u8			scale;
>  			u16			asid;
>  			u16			vmid;
>  			bool			leaf;
> +			u8			ttl;
> +			u8			tg;
>  			u64			addr;
>  		} tlbi;
>  
> @@ -595,6 +607,7 @@ struct arm_smmu_device {
>  #define ARM_SMMU_FEAT_HYP		(1 << 12)
>  #define ARM_SMMU_FEAT_STALL_FORCE	(1 << 13)
>  #define ARM_SMMU_FEAT_VAX		(1 << 14)
> +#define ARM_SMMU_FEAT_RANGE_INV		(1 << 15)
>  	u32				features;
>  
>  #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
> @@ -856,13 +869,21 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
>  		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
>  		break;
>  	case CMDQ_OP_TLBI_NH_VA:
> +		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
> +		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
>  		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
>  		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
> +		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
> +		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
>  		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
>  		break;
>  	case CMDQ_OP_TLBI_S2_IPA:
> +		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
> +		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
>  		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
>  		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
> +		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
> +		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
>  		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
>  		break;
>  	case CMDQ_OP_TLBI_NH_ASID:
> @@ -2022,12 +2043,39 @@ static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
>  		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
>  	}
>  
> +	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
> +		unsigned long tg, scale;
> +
> +		/* Get the leaf page size */
> +		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
it is unclear to me why you can't set tg with the granule parameter.
> +
> +		/* Determine the power of 2 multiple number of pages */
> +		scale = __ffs(size / (1UL << tg));
> +		cmd.tlbi.scale = scale;
> +
> +		cmd.tlbi.num = CMDQ_TLBI_RANGE_NUM_MAX - 1;
Also could you explain why you use CMDQ_TLBI_RANGE_NUM_MAX.


> +
> +		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
> +		cmd.tlbi.tg = ((tg - ilog2(SZ_4K)) / 2) + 1;
> +
> +		/* Determine what level the granule is at */
> +		cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
> +
> +		/* Adjust granule to the maximum range */
> +		granule = CMDQ_TLBI_RANGE_NUM_MAX * (1 << scale) * (1UL << tg);
spec says
Range = ((NUM+1)*2 ^ SCALE )*Translation_Granule_Size

Thanks

Eric

> +	}
> +
>  	while (iova < end) {
>  		if (i == CMDQ_BATCH_ENTRIES) {
>  			arm_smmu_cmdq_issue_cmdlist(smmu, cmds, i, false);
>  			i = 0;
>  		}
>  
> +		/* Only possible for range invalidate */
> +		if (granule > (end - iova))
> +			cmd.tlbi.num = ((end - iova) /
> +					(granule / CMDQ_TLBI_RANGE_NUM_MAX)) - 1;
> +
>  		cmd.tlbi.addr = iova;
>  		arm_smmu_cmdq_build_cmd(&cmds[i * CMDQ_ENT_DWORDS], &cmd);
>  		iova += granule;
> @@ -3449,6 +3497,11 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
>  	if (smmu->sid_bits <= STRTAB_SPLIT)
>  		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
>  
> +	/* IDR3 */
> +	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
> +	if (FIELD_GET(IDR3_RIL, reg))
> +		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
> +
>  	/* IDR5 */
>  	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
>  
> 




More information about the linux-arm-kernel mailing list