[PATCH net-next 2/3] net: hns3: add support for TX push mode

Will Deacon will at kernel.org
Tue Jun 22 05:16:11 PDT 2021


On Tue, Jun 22, 2021 at 07:11:10PM +0800, Guangbin Huang wrote:
> From: Huazhong Tan <tanhuazhong at huawei.com>
> 
> For the device that supports the TX push capability, the BD can
> be directly copied to the device memory. However, due to hardware
> restrictions, the push mode can be used only when there are no
> more than two BDs, otherwise, the doorbell mode based on device
> memory is used.
> 
> Signed-off-by: Huazhong Tan <tanhuazhong at huawei.com>
> Signed-off-by: Yufeng Mo <moyufeng at huawei.com>
> ---
>  drivers/net/ethernet/hisilicon/hns3/hnae3.h        |  1 +
>  drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 83 ++++++++++++++++++++--
>  drivers/net/ethernet/hisilicon/hns3/hns3_enet.h    |  6 ++
>  drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c |  2 +
>  .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c |  2 +
>  .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 11 ++-
>  .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |  8 +++
>  .../ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c   |  2 +
>  .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c  | 11 ++-
>  .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h  |  8 +++
>  10 files changed, 126 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
> index 0b202f4def83..3979d5d2e842 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
> +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
> @@ -163,6 +163,7 @@ struct hnae3_handle;
>  
>  struct hnae3_queue {
>  	void __iomem *io_base;
> +	void __iomem *mem_base;
>  	struct hnae3_ae_algo *ae_algo;
>  	struct hnae3_handle *handle;
>  	int tqp_index;		/* index in a handle */
> diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> index cdb5f14fb6bc..8649bd8e1b57 100644
> --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
> @@ -2002,9 +2002,77 @@ static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring,
>  	return bd_num;
>  }
>  
> +static void hns3_tx_push_bd(struct hns3_enet_ring *ring, int num)
> +{
> +#define HNS3_BYTES_PER_64BIT		8
> +
> +	struct hns3_desc desc[HNS3_MAX_PUSH_BD_NUM] = {};
> +	int offset = 0;
> +
> +	/* make sure everything is visible to device before
> +	 * excuting tx push or updating doorbell
> +	 */
> +	dma_wmb();
> +
> +	do {
> +		int idx = (ring->next_to_use - num + ring->desc_num) %
> +			  ring->desc_num;
> +
> +		u64_stats_update_begin(&ring->syncp);
> +		ring->stats.tx_push++;
> +		u64_stats_update_end(&ring->syncp);
> +		memcpy(&desc[offset], &ring->desc[idx],
> +		       sizeof(struct hns3_desc));
> +		offset++;
> +	} while (--num);
> +
> +	__iowrite64_copy(ring->tqp->mem_base, desc,
> +			 (sizeof(struct hns3_desc) * HNS3_MAX_PUSH_BD_NUM) /
> +			 HNS3_BYTES_PER_64BIT);
> +
> +#if defined(CONFIG_ARM64)
> +	dgh();
> +#endif

It looks a bit weird putting this at the end of the function, given that
it's supposed to do something to a pair of accesses. Please can you explain
what it's doing, and also provide some numbers to show that it's worthwhile
(given that it's a performance hint not a correctness thing afaict).

> +}
> +
> +static void hns3_tx_mem_doorbell(struct hns3_enet_ring *ring)
> +{
> +#define HNS3_MEM_DOORBELL_OFFSET	64
> +
> +	__le64 bd_num = cpu_to_le64((u64)ring->pending_buf);
> +
> +	/* make sure everything is visible to device before
> +	 * excuting tx push or updating doorbell
> +	 */
> +	dma_wmb();
> +
> +	__iowrite64_copy(ring->tqp->mem_base + HNS3_MEM_DOORBELL_OFFSET,
> +			 &bd_num, 1);
> +	u64_stats_update_begin(&ring->syncp);
> +	ring->stats.tx_mem_doorbell += ring->pending_buf;
> +	u64_stats_update_end(&ring->syncp);
> +
> +#if defined(CONFIG_ARM64)
> +	dgh();
> +#endif

Same here.

Thanks,

Will



More information about the linux-arm-kernel mailing list