[RFC PATCH 33/45] iommu/arm-smmu-v3: Use single pages for level-2 stream tables

Jean-Philippe Brucker jean-philippe at linaro.org
Wed Feb 1 04:53:17 PST 2023


Rather than using a fixed split point for the stream tables, base it on
the page size. It's easier for the KVM driver to pass single pages to
the hypervisor when lazily allocating stream tables.

Signed-off-by: Jean-Philippe Brucker <jean-philippe at linaro.org>
---
 arch/arm64/include/asm/arm-smmu-v3-regs.h     |  1 -
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  1 +
 .../arm/arm-smmu-v3/arm-smmu-v3-common.c      | 21 ++++++++++++-------
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   | 10 ++++-----
 4 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/include/asm/arm-smmu-v3-regs.h b/arch/arm64/include/asm/arm-smmu-v3-regs.h
index 646a734f2554..357e52f4038f 100644
--- a/arch/arm64/include/asm/arm-smmu-v3-regs.h
+++ b/arch/arm64/include/asm/arm-smmu-v3-regs.h
@@ -168,7 +168,6 @@
  *       256 lazy entries per table (each table covers a PCI bus)
  */
 #define STRTAB_L1_SZ_SHIFT		20
-#define STRTAB_SPLIT			8
 
 #define STRTAB_L1_DESC_DWORDS		1
 #define STRTAB_L1_DESC_SPAN		GENMASK_ULL(4, 0)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 87034da361ca..3a4649f43839 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -163,6 +163,7 @@ struct arm_smmu_strtab_cfg {
 
 	u64				strtab_base;
 	u32				strtab_base_cfg;
+	u8				split;
 };
 
 /* An SMMUv3 instance */
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-common.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-common.c
index 7faf28c5a8b4..c44075015979 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-common.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-common.c
@@ -254,11 +254,14 @@ int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
 	smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
 
+	/* Use one page per level-2 table */
+	smmu->strtab_cfg.split = PAGE_SHIFT - (ilog2(STRTAB_STE_DWORDS) + 3);
+
 	/*
 	 * If the SMMU supports fewer bits than would fill a single L2 stream
 	 * table, use a linear table instead.
 	 */
-	if (smmu->sid_bits <= STRTAB_SPLIT)
+	if (smmu->sid_bits <= smmu->strtab_cfg.split)
 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
 
 	/* IDR3 */
@@ -470,15 +473,17 @@ int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
 	size_t size;
 	void *strtab;
 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
-	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
+	struct arm_smmu_strtab_l1_desc *desc =
+		&cfg->l1_desc[sid >> smmu->strtab_cfg.split];
 
 	if (desc->l2ptr)
 		return 0;
 
-	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
-	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
+	size = 1 << (smmu->strtab_cfg.split + ilog2(STRTAB_STE_DWORDS) + 3);
+	strtab = &cfg->strtab[(sid >> smmu->strtab_cfg.split) *
+		 STRTAB_L1_DESC_DWORDS];
 
-	desc->span = STRTAB_SPLIT + 1;
+	desc->span = smmu->strtab_cfg.split + 1;
 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
 					  GFP_KERNEL);
 	if (!desc->l2ptr) {
@@ -520,10 +525,10 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
 
 	/* Calculate the L1 size, capped to the SIDSIZE. */
 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
-	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
+	size = min(size, smmu->sid_bits - smmu->strtab_cfg.split);
 	cfg->num_l1_ents = 1 << size;
 
-	size += STRTAB_SPLIT;
+	size += smmu->strtab_cfg.split;
 	if (size < smmu->sid_bits)
 		dev_warn(smmu->dev,
 			 "2-level strtab only covers %u/%u bits of SID\n",
@@ -543,7 +548,7 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
 	/* Configure strtab_base_cfg for 2 levels */
 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
-	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
+	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, smmu->strtab_cfg.split);
 	cfg->strtab_base_cfg = reg;
 
 	return arm_smmu_init_l1_strtab(smmu);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index a972c00700cc..19f170088268 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2156,9 +2156,9 @@ static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
 		int idx;
 
 		/* Two-level walk */
-		idx = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
+		idx = (sid >> smmu->strtab_cfg.split) * STRTAB_L1_DESC_DWORDS;
 		l1_desc = &cfg->l1_desc[idx];
-		idx = (sid & ((1 << STRTAB_SPLIT) - 1)) * STRTAB_STE_DWORDS;
+		idx = (sid & ((1 << smmu->strtab_cfg.split) - 1)) * STRTAB_STE_DWORDS;
 		step = &l1_desc->l2ptr[idx];
 	} else {
 		/* Simple linear lookup */
@@ -2439,7 +2439,7 @@ static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
 
 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
-		limit *= 1UL << STRTAB_SPLIT;
+		limit *= 1UL << smmu->strtab_cfg.split;
 
 	return sid < limit;
 }
@@ -2460,8 +2460,8 @@ static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
 		if (ret)
 			return ret;
 
-		desc = &smmu->strtab_cfg.l1_desc[sid >> STRTAB_SPLIT];
-		arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT,
+		desc = &smmu->strtab_cfg.l1_desc[sid >> smmu->strtab_cfg.split];
+		arm_smmu_init_bypass_stes(desc->l2ptr, 1 << smmu->strtab_cfg.split,
 					  false);
 	}
 
-- 
2.39.0




More information about the linux-arm-kernel mailing list