[PATCH net-next] net: axienet: Use NAPI for TX completion path

Radhey Shyam Pandey radheys at xilinx.com
Mon May 2 12:30:51 PDT 2022


> -----Original Message-----
> From: Robert Hancock <robert.hancock at calian.com>
> Sent: Saturday, April 30, 2022 3:59 AM
> To: netdev at vger.kernel.org
> Cc: Radhey Shyam Pandey <radheys at xilinx.com>; davem at davemloft.net;
> edumazet at google.com; kuba at kernel.org; pabeni at redhat.com; Michal Simek
> <michals at xilinx.com>; linux-arm-kernel at lists.infradead.org; Robert Hancock
> <robert.hancock at calian.com>
> Subject: [PATCH net-next] net: axienet: Use NAPI for TX completion path
> 
> This driver was using the TX IRQ handler to perform all TX completion
> tasks. Under heavy TX network load, this can cause significant irqs-off
> latencies (found to be in the hundreds of microseconds using ftrace).
> This can cause other issues, such as overrunning serial UART FIFOs when
> using high baud rates with limited UART FIFO sizes.
> 
> Switch to using the NAPI poll handler to perform the TX completion work
> to get this out of hard IRQ context and avoid the IRQ latency impact.

Thanks for the patch. I assume for simulating heavy network load we
are using netperf/iperf. Do we have some details on the benchmark
before and after adding TX NAPI? I want to see the impact on
throughput.

> 
> Signed-off-by: Robert Hancock <robert.hancock at calian.com>
> ---
>  drivers/net/ethernet/xilinx/xilinx_axienet.h  |  2 +
>  .../net/ethernet/xilinx/xilinx_axienet_main.c | 56 ++++++++++++-------
>  2 files changed, 37 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h
> b/drivers/net/ethernet/xilinx/xilinx_axienet.h
> index d5c1e5c4a508..6e58d034fe90 100644
> --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
> +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
> @@ -397,6 +397,7 @@ struct axidma_bd {
>   * @regs:	Base address for the axienet_local device address space
>   * @dma_regs:	Base address for the axidma device address space
>   * @rx_dma_cr:  Nominal content of RX DMA control register
> + * @tx_dma_cr:  Nominal content of TX DMA control register
>   * @dma_err_task: Work structure to process Axi DMA errors
>   * @tx_irq:	Axidma TX IRQ number
>   * @rx_irq:	Axidma RX IRQ number
> @@ -454,6 +455,7 @@ struct axienet_local {
>  	void __iomem *dma_regs;
> 
>  	u32 rx_dma_cr;
> +	u32 tx_dma_cr;
> 
>  	struct work_struct dma_err_task;
> 
> diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> index d6fc3f7acdf0..a52e616275e4 100644
> --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> @@ -254,8 +254,6 @@ static u32 axienet_usec_to_timer(struct axienet_local
> *lp, u32 coalesce_usec)
>   */
>  static void axienet_dma_start(struct axienet_local *lp)
>  {
> -	u32 tx_cr;
> -
>  	/* Start updating the Rx channel control register */
>  	lp->rx_dma_cr = (lp->coalesce_count_rx <<
> XAXIDMA_COALESCE_SHIFT) |
>  			XAXIDMA_IRQ_IOC_MASK |
> XAXIDMA_IRQ_ERROR_MASK;
> @@ -269,16 +267,16 @@ static void axienet_dma_start(struct axienet_local
> *lp)
>  	axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp->rx_dma_cr);
> 
>  	/* Start updating the Tx channel control register */
> -	tx_cr = (lp->coalesce_count_tx << XAXIDMA_COALESCE_SHIFT) |
> -		XAXIDMA_IRQ_IOC_MASK | XAXIDMA_IRQ_ERROR_MASK;
> +	lp->tx_dma_cr = (lp->coalesce_count_tx <<
> XAXIDMA_COALESCE_SHIFT) |
> +			XAXIDMA_IRQ_IOC_MASK |
> XAXIDMA_IRQ_ERROR_MASK;
>  	/* Only set interrupt delay timer if not generating an interrupt on
>  	 * the first TX packet. Otherwise leave at 0 to disable delay interrupt.
>  	 */
>  	if (lp->coalesce_count_tx > 1)
> -		tx_cr |= (axienet_usec_to_timer(lp, lp->coalesce_usec_tx)
> -				<< XAXIDMA_DELAY_SHIFT) |
> -			 XAXIDMA_IRQ_DELAY_MASK;
> -	axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, tx_cr);
> +		lp->tx_dma_cr |= (axienet_usec_to_timer(lp, lp-
> >coalesce_usec_tx)
> +					<< XAXIDMA_DELAY_SHIFT) |
> +				 XAXIDMA_IRQ_DELAY_MASK;
> +	axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, lp->tx_dma_cr);
> 
>  	/* Populate the tail pointer and bring the Rx Axi DMA engine out of
>  	 * halted state. This will make the Rx side ready for reception.
> @@ -294,8 +292,8 @@ static void axienet_dma_start(struct axienet_local *lp)
>  	 * tail pointer register that the Tx channel will start transmitting.
>  	 */
>  	axienet_dma_out_addr(lp, XAXIDMA_TX_CDESC_OFFSET, lp-
> >tx_bd_p);
> -	tx_cr |= XAXIDMA_CR_RUNSTOP_MASK;
> -	axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, tx_cr);
> +	lp->tx_dma_cr |= XAXIDMA_CR_RUNSTOP_MASK;
> +	axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, lp->tx_dma_cr);
>  }
> 
>  /**
> @@ -671,13 +669,14 @@ static int axienet_device_reset(struct net_device
> *ndev)
>   * @nr_bds:	Number of descriptors to clean up, can be -1 if unknown.
>   * @sizep:	Pointer to a u32 filled with the total sum of all bytes
>   * 		in all cleaned-up descriptors. Ignored if NULL.
> + * @budget:	NAPI budget (use 0 when not called from NAPI poll)
>   *
>   * Would either be called after a successful transmit operation, or after
>   * there was an error when setting up the chain.
>   * Returns the number of descriptors handled.
>   */
>  static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd,
> -				 int nr_bds, u32 *sizep)
> +				 int nr_bds, u32 *sizep, int budget)
>  {
>  	struct axienet_local *lp = netdev_priv(ndev);
>  	struct axidma_bd *cur_p;
> @@ -707,7 +706,7 @@ static int axienet_free_tx_chain(struct net_device
> *ndev, u32 first_bd,
>  				 DMA_TO_DEVICE);
> 
>  		if (cur_p->skb && (status &
> XAXIDMA_BD_STS_COMPLETE_MASK))
> -			dev_consume_skb_irq(cur_p->skb);
> +			napi_consume_skb(cur_p->skb, budget);
> 
>  		cur_p->app0 = 0;
>  		cur_p->app1 = 0;
> @@ -756,20 +755,24 @@ static inline int axienet_check_tx_bd_space(struct
> axienet_local *lp,
>   * axienet_start_xmit_done - Invoked once a transmit is completed by the
>   * Axi DMA Tx channel.
>   * @ndev:	Pointer to the net_device structure
> + * @budget:	NAPI budget
>   *
> - * This function is invoked from the Axi DMA Tx isr to notify the completion
> + * This function is invoked from the NAPI processing to notify the completion
>   * of transmit operation. It clears fields in the corresponding Tx BDs and
>   * unmaps the corresponding buffer so that CPU can regain ownership of the
>   * buffer. It finally invokes "netif_wake_queue" to restart transmission if
>   * required.
>   */
> -static void axienet_start_xmit_done(struct net_device *ndev)
> +static void axienet_start_xmit_done(struct net_device *ndev, int budget)
>  {
>  	struct axienet_local *lp = netdev_priv(ndev);
>  	u32 packets = 0;
>  	u32 size = 0;
> 
> -	packets = axienet_free_tx_chain(ndev, lp->tx_bd_ci, -1, &size);
> +	packets = axienet_free_tx_chain(ndev, lp->tx_bd_ci, -1, &size, budget);
> +
> +	if (!packets)
> +		return;
> 
>  	lp->tx_bd_ci += packets;
>  	if (lp->tx_bd_ci >= lp->tx_bd_num)
> @@ -865,7 +868,7 @@ axienet_start_xmit(struct sk_buff *skb, struct
> net_device *ndev)
>  				netdev_err(ndev, "TX DMA mapping error\n");
>  			ndev->stats.tx_dropped++;
>  			axienet_free_tx_chain(ndev, orig_tail_ptr, ii + 1,
> -					      NULL);
> +					      NULL, 0);
>  			lp->tx_bd_tail = orig_tail_ptr;
> 
>  			return NETDEV_TX_OK;
> @@ -899,9 +902,9 @@ axienet_start_xmit(struct sk_buff *skb, struct
> net_device *ndev)
>  }
> 
>  /**
> - * axienet_poll - Triggered by RX ISR to complete the received BD processing.
> + * axienet_poll - Triggered by RX/TX ISR to complete the BD processing.
>   * @napi:	Pointer to NAPI structure.
> - * @budget:	Max number of packets to process.
> + * @budget:	Max number of RX packets to process.
>   *
>   * Return: Number of RX packets processed.
>   */
> @@ -916,6 +919,8 @@ static int axienet_poll(struct napi_struct *napi, int
> budget)
>  	struct sk_buff *skb, *new_skb;
>  	struct axienet_local *lp = container_of(napi, struct axienet_local,
> napi);
> 
> +	axienet_start_xmit_done(lp->ndev, budget);
> +
>  	cur_p = &lp->rx_bd_v[lp->rx_bd_ci];
> 
>  	while (packets < budget && (cur_p->status &
> XAXIDMA_BD_STS_COMPLETE_MASK)) {
> @@ -1001,11 +1006,12 @@ static int axienet_poll(struct napi_struct *napi, int
> budget)
>  		axienet_dma_out_addr(lp, XAXIDMA_RX_TDESC_OFFSET,
> tail_p);
> 
>  	if (packets < budget && napi_complete_done(napi, packets)) {
> -		/* Re-enable RX completion interrupts. This should
> -		 * cause an immediate interrupt if any RX packets are
> +		/* Re-enable RX/TX completion interrupts. This should
> +		 * cause an immediate interrupt if any RX/TX packets are
>  		 * already pending.
>  		 */
>  		axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, lp-
> >rx_dma_cr);
> +		axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, lp-
> >tx_dma_cr);
>  	}
>  	return packets;
>  }
> @@ -1040,7 +1046,15 @@ static irqreturn_t axienet_tx_irq(int irq, void
> *_ndev)
>  			   (lp->tx_bd_v[lp->tx_bd_ci]).phys);
>  		schedule_work(&lp->dma_err_task);
>  	} else {
> -		axienet_start_xmit_done(lp->ndev);
> +		/* Disable further TX completion interrupts and schedule
> +		 * NAPI to handle the completions.
> +		 */
> +		u32 cr = lp->tx_dma_cr;
> +
> +		cr &= ~(XAXIDMA_IRQ_IOC_MASK |
> XAXIDMA_IRQ_DELAY_MASK);
> +		axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr);
> +
> +		napi_schedule(&lp->napi);
>  	}
> 
>  	return IRQ_HANDLED;
> --
> 2.31.1




More information about the linux-arm-kernel mailing list