[PATCH v2] iommu/arm-smmu-v3: disable MSI polling if SEV polling is faster

John Garry john.garry at huawei.com
Fri Jul 31 06:21:14 EDT 2020


On 31/07/2020 09:33, Barry Song wrote:
> Different implementations may show different performance by using SEV
> polling or MSI polling.
> On the implementation of hi1620, tests show disabling MSI polling can
> bring performance improvement.
> Using 16 threads to run netperf on hns3 100G NIC with UDP packet size
> in 32768bytes and set iommu to strict, TX throughput can improve from
> 25Gbps to 27Gbps by this patch.
> This patch adds a generic function to support implementation options
> based on IIDR and disables MSI polling if IIDR matches the specific
> implementation tested.
Not sure if we should do checks like this on an implementation basis. 
I'm sure maintainers will decide.

> 
> Cc: Prime Zeng <prime.zeng at hisilicon.com>
> Signed-off-by: Barry Song <song.bao.hua at hisilicon.com>
> ---
>   -v2: rather than disabling msipolling globally, only disable it for
>   specific implementation based on IIDR
> 
>   drivers/iommu/arm-smmu-v3.c | 31 +++++++++++++++++++++++++++++--

this file has moved, check linux-next

>   1 file changed, 29 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
> index f578677a5c41..ed5a6774eb45 100644
> --- a/drivers/iommu/arm-smmu-v3.c
> +++ b/drivers/iommu/arm-smmu-v3.c
> @@ -88,6 +88,12 @@
>   #define IDR5_VAX			GENMASK(11, 10)
>   #define IDR5_VAX_52_BIT			1
>   
> +#define ARM_SMMU_IIDR			0x18
> +#define IIDR_VARIANT			GENMASK(19, 16)
> +#define IIDR_REVISION			GENMASK(15, 12)
> +#define IIDR_IMPLEMENTER		GENMASK(11, 0)
> +#define IMPLEMENTER_HISILICON		0x736
> +
>   #define ARM_SMMU_CR0			0x20
>   #define CR0_ATSCHK			(1 << 4)
>   #define CR0_CMDQEN			(1 << 3)
> @@ -652,6 +658,7 @@ struct arm_smmu_device {
>   
>   #define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
>   #define ARM_SMMU_OPT_PAGE0_REGS_ONLY	(1 << 1)
> +#define ARM_SMMU_OPT_DISABLE_MSIPOLL    (1 << 2)
>   	u32				options;
>   
>   	struct arm_smmu_cmdq		cmdq;
> @@ -992,7 +999,8 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
>   	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
>   	 * payload, so the write will zero the entire command on that platform.
>   	 */
> -	if (smmu->features & ARM_SMMU_FEAT_MSI &&
> +	if (!(smmu->options & ARM_SMMU_OPT_DISABLE_MSIPOLL) &&
> +	    smmu->features & ARM_SMMU_FEAT_MSI &&

I don't know why you check MSIPOLL disabled and then MSI poll supported. 
Surely for native non-MSI poll (like hi1616), the ARM_SMMU_FEAT_MSI 
check first makes sense. This is fastpath, albeit fast to maybe wait..

>   	    smmu->features & ARM_SMMU_FEAT_COHERENCY) {
>   		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
>   				   q->ent_dwords * 8;
> @@ -1332,7 +1340,8 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
>   static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
>   					 struct arm_smmu_ll_queue *llq)
>   {
> -	if (smmu->features & ARM_SMMU_FEAT_MSI &&
> +	if (!(smmu->options & ARM_SMMU_OPT_DISABLE_MSIPOLL) &&
> +	    smmu->features & ARM_SMMU_FEAT_MSI &&
>   	    smmu->features & ARM_SMMU_FEAT_COHERENCY)
>   		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
>   
> @@ -3693,6 +3702,21 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
>   	return 0;
>   }
>   
> +static void acpi_smmu_get_implementation_options(struct arm_smmu_device *smmu)
> +{
> +	/*
> +	 * IIDR provides information about the implementation and implementer of
> +	 * the SMMU
> +	 */
> +	u32 iidr = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
> +	u32 implementer = FIELD_GET(IIDR_IMPLEMENTER, iidr);
> +	u32 variant = FIELD_GET(IIDR_VARIANT, iidr);
> +	u32 revision = FIELD_GET(IIDR_REVISION, iidr);

why not check the product ID also, i.e. the complete register contents?

> +
> +	if (implementer == IMPLEMENTER_HISILICON && variant == 3 && revision == 0)
> +		smmu->options |= ARM_SMMU_OPT_DISABLE_MSIPOLL;
> +}
> +
>   static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
>   {
>   	u32 reg;
> @@ -3892,6 +3916,9 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
>   
>   	smmu->ias = max(smmu->ias, smmu->oas);
>   
> +	/* set implementation-related options according to IIDR */
> +	acpi_smmu_get_implementation_options(smmu);
> +
>   	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
>   		 smmu->ias, smmu->oas, smmu->features);
>   	return 0;
> 




More information about the linux-arm-kernel mailing list