[PATCH v2] iommu/arm-smmu-v3: disable MSI polling if SEV polling is faster

Barry Song song.bao.hua at hisilicon.com
Fri Jul 31 04:33:43 EDT 2020


Different implementations may show different performance by using SEV
polling or MSI polling.
On the implementation of hi1620, tests show disabling MSI polling can
bring performance improvement.
Using 16 threads to run netperf on hns3 100G NIC with UDP packet size
in 32768bytes and set iommu to strict, TX throughput can improve from
25Gbps to 27Gbps by this patch.
This patch adds a generic function to support implementation options
based on IIDR and disables MSI polling if IIDR matches the specific
implementation tested.

Cc: Prime Zeng <prime.zeng at hisilicon.com>
Signed-off-by: Barry Song <song.bao.hua at hisilicon.com>
---
 -v2: rather than disabling msipolling globally, only disable it for
 specific implementation based on IIDR

 drivers/iommu/arm-smmu-v3.c | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index f578677a5c41..ed5a6774eb45 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -88,6 +88,12 @@
 #define IDR5_VAX			GENMASK(11, 10)
 #define IDR5_VAX_52_BIT			1
 
+#define ARM_SMMU_IIDR			0x18
+#define IIDR_VARIANT			GENMASK(19, 16)
+#define IIDR_REVISION			GENMASK(15, 12)
+#define IIDR_IMPLEMENTER		GENMASK(11, 0)
+#define IMPLEMENTER_HISILICON		0x736
+
 #define ARM_SMMU_CR0			0x20
 #define CR0_ATSCHK			(1 << 4)
 #define CR0_CMDQEN			(1 << 3)
@@ -652,6 +658,7 @@ struct arm_smmu_device {
 
 #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
 #define ARM_SMMU_OPT_PAGE0_REGS_ONLY	(1 << 1)
+#define ARM_SMMU_OPT_DISABLE_MSIPOLL    (1 << 2)
 	u32				options;
 
 	struct arm_smmu_cmdq		cmdq;
@@ -992,7 +999,8 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
 	 * payload, so the write will zero the entire command on that platform.
 	 */
-	if (smmu->features & ARM_SMMU_FEAT_MSI &&
+	if (!(smmu->options & ARM_SMMU_OPT_DISABLE_MSIPOLL) &&
+	    smmu->features & ARM_SMMU_FEAT_MSI &&
 	    smmu->features & ARM_SMMU_FEAT_COHERENCY) {
 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
 				   q->ent_dwords * 8;
@@ -1332,7 +1340,8 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
 					 struct arm_smmu_ll_queue *llq)
 {
-	if (smmu->features & ARM_SMMU_FEAT_MSI &&
+	if (!(smmu->options & ARM_SMMU_OPT_DISABLE_MSIPOLL) &&
+	    smmu->features & ARM_SMMU_FEAT_MSI &&
 	    smmu->features & ARM_SMMU_FEAT_COHERENCY)
 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
 
@@ -3693,6 +3702,21 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 	return 0;
 }
 
+static void acpi_smmu_get_implementation_options(struct arm_smmu_device *smmu)
+{
+	/*
+	 * IIDR provides information about the implementation and implementer of
+	 * the SMMU
+	 */
+	u32 iidr = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
+	u32 implementer = FIELD_GET(IIDR_IMPLEMENTER, iidr);
+	u32 variant = FIELD_GET(IIDR_VARIANT, iidr);
+	u32 revision = FIELD_GET(IIDR_REVISION, iidr);
+
+	if (implementer == IMPLEMENTER_HISILICON && variant == 3 && revision == 0)
+		smmu->options |= ARM_SMMU_OPT_DISABLE_MSIPOLL;
+}
+
 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 {
 	u32 reg;
@@ -3892,6 +3916,9 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 
 	smmu->ias = max(smmu->ias, smmu->oas);
 
+	/* set implementation-related options according to IIDR */
+	acpi_smmu_get_implementation_options(smmu);
+
 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
 		 smmu->ias, smmu->oas, smmu->features);
 	return 0;
-- 
2.27.0





More information about the linux-arm-kernel mailing list