[PATCH net 2/2] net: xilinx: axienet: Fix BQL accounting for multi-BD TX packets

Gupta, Suraj Suraj.Gupta2 at amd.com
Tue Mar 24 22:30:40 PDT 2026


[Public]

> -----Original Message-----
> From: Sean Anderson <sean.anderson at linux.dev>
> Sent: Tuesday, March 24, 2026 9:39 PM
> To: Gupta, Suraj <Suraj.Gupta2 at amd.com>; andrew+netdev at lunn.ch;
> davem at davemloft.net; edumazet at google.com; kuba at kernel.org;
> pabeni at redhat.com; Simek, Michal <michal.simek at amd.com>; Pandey,
> Radhey Shyam <radhey.shyam.pandey at amd.com>; horms at kernel.org
> Cc: netdev at vger.kernel.org; linux-arm-kernel at lists.infradead.org; linux-
> kernel at vger.kernel.org; Katakam, Harini <harini.katakam at amd.com>
> Subject: Re: [PATCH net 2/2] net: xilinx: axienet: Fix BQL accounting for multi-BD
> TX packets
>
> Caution: This message originated from an External Source. Use proper caution
> when opening attachments, clicking links, or responding.
>
>
> On 3/24/26 10:53, Suraj Gupta wrote:
> > When a TX packet spans multiple buffer descriptors (scatter-gather),
> > the per-BD byte count is accumulated into a local variable that resets
> > on each NAPI poll. If the BDs for a single packet complete across
> > different polls, the earlier bytes are lost and never credited to BQL.
> > This causes BQL to think bytes are permanently in-flight, eventually
> > stalling the TX queue.
> >
> > Fix this by replacing the local accumulator with a persistent counter
> > (tx_compl_bytes) that survives across polls and is reset only after
> > updating BQL and stats.
>
> Do we need this? Can't we just do something like
>

Nope, the 'size' variable passed to axienet_free_tx_chain() is local to axienet_tx_poll() and goes out of scope between different polls. This means it can't track completion bytes across multiple NAPI polls.

Regards,
Suraj

> diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> index 415e9bc252527..1ea8a6592bce1 100644
> --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> @@ -768,6 +768,7 @@ static int axienet_free_tx_chain(struct axienet_local
> *lp, u32 *sizep, int budge
>                 if (cur_p->skb) {
>                         struct axienet_cb *cb = (void *)cur_p->skb->cb;
>
> +                       *sizep += skb->len;
>                         dma_unmap_sgtable(lp->dev, &cb->sgt, DMA_TO_DEVICE, 0);
>                         sg_free_table_chained(&cb->sgt, XAE_INLINE_SG_CNT);
>                         napi_consume_skb(cur_p->skb, budget); @@ -783,8 +784,6 @@
> static int axienet_free_tx_chain(struct axienet_local *lp, u32 *sizep, int budge
>                 wmb();
>                 cur_p->cntrl = 0;
>                 cur_p->status = 0;
> -
> -               *sizep += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
>         }
>
>         smp_store_release(&lp->tx_bd_ci, (ci + i) & (lp->tx_bd_num - 1));
>
> > Fixes: c900e49d58eb ("net: xilinx: axienet: Implement BQL")
> > Signed-off-by: Suraj Gupta <suraj.gupta2 at amd.com>
> > ---
> >  drivers/net/ethernet/xilinx/xilinx_axienet.h  |  3 +++
> > .../net/ethernet/xilinx/xilinx_axienet_main.c | 20 +++++++++----------
> >  2 files changed, 13 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h
> > b/drivers/net/ethernet/xilinx/xilinx_axienet.h
> > index 602389843342..a4444c939451 100644
> > --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
> > +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
> > @@ -509,6 +509,8 @@ struct skbuf_dma_descriptor {
> >   *           complete. Only updated at runtime by TX NAPI poll.
> >   * @tx_bd_tail:      Stores the index of the next Tx buffer descriptor in the ring
> >   *              to be populated.
> > + * @tx_compl_bytes: Accumulates TX completion length until a full packet is
> > + *              reported to the stack.
> >   * @tx_packets: TX packet count for statistics
> >   * @tx_bytes:        TX byte count for statistics
> >   * @tx_stat_sync: Synchronization object for TX stats @@ -592,6
> > +594,7 @@ struct axienet_local {
> >       u32 tx_bd_num;
> >       u32 tx_bd_ci;
> >       u32 tx_bd_tail;
> > +     u32 tx_compl_bytes;
> >       u64_stats_t tx_packets;
> >       u64_stats_t tx_bytes;
> >       struct u64_stats_sync tx_stat_sync; diff --git
> > a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> > b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> > index b06e4c37ff61..95bf61986cb7 100644
> > --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> > +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
> > @@ -692,6 +692,8 @@ static void axienet_dma_stop(struct axienet_local
> *lp)
> >       axienet_lock_mii(lp);
> >       __axienet_device_reset(lp);
> >       axienet_unlock_mii(lp);
> > +
> > +     lp->tx_compl_bytes = 0;
> >  }
> >
> >  /**
> > @@ -770,8 +772,6 @@ static int axienet_device_reset(struct net_device
> *ndev)
> >   * @first_bd:        Index of first descriptor to clean up
> >   * @nr_bds:  Max number of descriptors to clean up
> >   * @force:   Whether to clean descriptors even if not complete
> > - * @sizep:   Pointer to a u32 filled with the total sum of all bytes
> > - *           in all cleaned-up descriptors. Ignored if NULL.
> >   * @budget:  NAPI budget (use 0 when not called from NAPI poll)
> >   *
> >   * Would either be called after a successful transmit operation, or
> > after @@ -780,7 +780,7 @@ static int axienet_device_reset(struct net_device
> *ndev)
> >   * Return: The number of packets handled.
> >   */
> >  static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd,
> > -                              int nr_bds, bool force, u32 *sizep, int budget)
> > +                              int nr_bds, bool force, int budget)
> >  {
> >       struct axidma_bd *cur_p;
> >       unsigned int status;
> > @@ -819,8 +819,8 @@ static int axienet_free_tx_chain(struct axienet_local
> *lp, u32 first_bd,
> >               cur_p->cntrl = 0;
> >               cur_p->status = 0;
> >
> > -             if (sizep)
> > -                     *sizep += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
> > +             if (!force)
> > +                     lp->tx_compl_bytes += status &
> > + XAXIDMA_BD_STS_ACTUAL_LEN_MASK;
> >       }
> >
> >       if (!force) {
> > @@ -999,18 +999,18 @@ static int axienet_tx_poll(struct napi_struct
> > *napi, int budget)  {
> >       struct axienet_local *lp = container_of(napi, struct axienet_local, napi_tx);
> >       struct net_device *ndev = lp->ndev;
> > -     u32 size = 0;
> >       int packets;
> >
> >       packets = axienet_free_tx_chain(lp, lp->tx_bd_ci, lp->tx_bd_num, false,
> > -                                     &size, budget);
> > +                                     budget);
> >
> >       if (packets) {
> > -             netdev_completed_queue(ndev, packets, size);
> > +             netdev_completed_queue(ndev, packets,
> > + lp->tx_compl_bytes);
> >               u64_stats_update_begin(&lp->tx_stat_sync);
> >               u64_stats_add(&lp->tx_packets, packets);
> > -             u64_stats_add(&lp->tx_bytes, size);
> > +             u64_stats_add(&lp->tx_bytes, lp->tx_compl_bytes);
> >               u64_stats_update_end(&lp->tx_stat_sync);
> > +             lp->tx_compl_bytes = 0;
> >
> >               /* Matches barrier in axienet_start_xmit */
> >               smp_mb();
> > @@ -1115,7 +1115,7 @@ axienet_start_xmit(struct sk_buff *skb, struct
> net_device *ndev)
> >                               netdev_err(ndev, "TX DMA mapping error\n");
> >                       ndev->stats.tx_dropped++;
> >                       axienet_free_tx_chain(lp, orig_tail_ptr, ii + 1,
> > -                                           true, NULL, 0);
> > +                                           true, 0);
> >                       dev_kfree_skb_any(skb);
> >                       return NETDEV_TX_OK;
> >               }


More information about the linux-arm-kernel mailing list