[RFC PATCH net-next 3/3] net: macb: add TX stall watchdog as defence-in-depth safety net

Lukasz Raczylo lukasz at raczylo.com
Fri Apr 24 15:38:33 PDT 2026


Patches 1/3 and 2/3 address two candidate races that could lead
to a TCOMP completion being missed on PCIe-attached macb
instances.  This patch adds a defence-in-depth safety net, in
case a further race remains that we have not identified.

The watchdog is a per-queue delayed_work that runs once per
second.  It snapshots queue->tx_tail; if the ring is non-empty
(queue->tx_head != queue->tx_tail) and tx_tail has not advanced
since the previous tick, it calls macb_tx_restart().

No new recovery logic is introduced.  macb_tx_restart() already
exists in this file, is correctly locked (tx_ptr_lock, bp->lock),
and verifies that the hardware's TBQP is behind the driver's
head index before re-asserting TSTART.  On a healthy ring it is
a no-op at the hardware level; the watchdog only supplies the
missing trigger.

On a healthy queue the per-tick cost is one spin_lock_irqsave()
/ spin_unlock_irqrestore() and one branch.  The delayed_work is
only scheduled between macb_open() and macb_close(), and is
cancelled synchronously on close.

Context for submission: on our 24-node Raspberry Pi 5 fleet,
before this series, an out-of-band user-space watchdog
(monitoring tx_packets from /sys/class/net/.../statistics and
toggling the link down/up when it froze) was required to keep
nodes usable.  We include this kernel-side watchdog as a cleaner
in-kernel equivalent for any residual stall that patches 1 and
2 do not cover.  We are willing to drop this patch if the view
is that 1 and 2 should stand alone.

Link: https://github.com/cilium/cilium/issues/43198
Link: https://bugs.launchpad.net/ubuntu/+source/linux-raspi/+bug/2133877
Signed-off-by: Lukasz Raczylo <lukasz at raczylo.com>
---
 drivers/net/ethernet/cadence/macb.h      |  5 ++
 drivers/net/ethernet/cadence/macb_main.c | 59 ++++++++++++++++++++++++
 2 files changed, 64 insertions(+)

diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 2de56017e..9115f2b47 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -1278,6 +1278,11 @@ struct macb_queue {
 	dma_addr_t		tx_ring_dma;
 	struct work_struct	tx_error_task;
 	bool			txubr_pending;
+
+	/* TX stall watchdog -- see macb_tx_stall_watchdog() in macb_main.c */
+	struct delayed_work	tx_stall_watchdog_work;
+	unsigned int		tx_stall_last_tail;
+
 	struct napi_struct	napi_tx;
 
 	dma_addr_t		rx_ring_dma;
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index ea231b1c5..ea2306ef7 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -2002,6 +2002,59 @@ static int macb_tx_poll(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
+#define MACB_TX_STALL_INTERVAL_MS	1000
+
+/*
+ * TX stall watchdog.
+ *
+ * Defence-in-depth against lost TCOMP interrupts.  macb already has a
+ * recovery chain (tx_pending -> txubr_pending -> macb_tx_restart())
+ * that fires on TCOMP; if TCOMP itself is lost the TX ring stalls
+ * silently until something else kicks TSTART.  This watchdog runs
+ * once per second per queue, snapshots tx_tail, and calls
+ * macb_tx_restart() if the ring is non-empty and tx_tail has not
+ * advanced since the previous tick.
+ *
+ * macb_tx_restart() already checks the hardware's TBQP against the
+ * driver's head index before re-asserting TSTART, so on a healthy
+ * ring this is a no-op at the hardware level.  The watchdog only
+ * adds the missing trigger.
+ */
+static void macb_tx_stall_watchdog(struct work_struct *work)
+{
+	struct macb_queue *queue = container_of(to_delayed_work(work),
+						struct macb_queue,
+						tx_stall_watchdog_work);
+	struct macb *bp = queue->bp;
+	unsigned int cur_tail, cur_head;
+	bool stalled = false;
+	unsigned long flags;
+
+	if (!netif_running(bp->dev))
+		return;
+
+	spin_lock_irqsave(&queue->tx_ptr_lock, flags);
+	cur_tail = queue->tx_tail;
+	cur_head = queue->tx_head;
+	if (cur_head != cur_tail &&
+	    cur_tail == queue->tx_stall_last_tail)
+		stalled = true;
+	else
+		queue->tx_stall_last_tail = cur_tail;
+	spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
+
+	if (stalled) {
+		netdev_warn_once(bp->dev,
+				 "TX stall detected on queue %u (tail=%u head=%u); re-kicking TSTART\n",
+				 (unsigned int)(queue - bp->queues),
+				 cur_tail, cur_head);
+		macb_tx_restart(queue);
+	}
+
+	schedule_delayed_work(&queue->tx_stall_watchdog_work,
+			      msecs_to_jiffies(MACB_TX_STALL_INTERVAL_MS));
+}
+
 static void macb_hresp_error_task(struct work_struct *work)
 {
 	struct macb *bp = from_work(bp, work, hresp_err_bh_work);
@@ -3190,6 +3243,9 @@ static int macb_open(struct net_device *dev)
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
 		napi_enable(&queue->napi_rx);
 		napi_enable(&queue->napi_tx);
+		queue->tx_stall_last_tail = queue->tx_tail;
+		schedule_delayed_work(&queue->tx_stall_watchdog_work,
+				      msecs_to_jiffies(MACB_TX_STALL_INTERVAL_MS));
 	}
 
 	macb_init_hw(bp);
@@ -3240,6 +3296,7 @@ static int macb_close(struct net_device *dev)
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
 		napi_disable(&queue->napi_rx);
 		napi_disable(&queue->napi_tx);
+		cancel_delayed_work_sync(&queue->tx_stall_watchdog_work);
 		netdev_tx_reset_queue(netdev_get_tx_queue(dev, q));
 	}
 
@@ -4802,6 +4859,8 @@ static int macb_init_dflt(struct platform_device *pdev)
 		}
 
 		INIT_WORK(&queue->tx_error_task, macb_tx_error_task);
+		INIT_DELAYED_WORK(&queue->tx_stall_watchdog_work,
+				  macb_tx_stall_watchdog);
 		q++;
 	}
 
-- 
2.53.0




More information about the linux-arm-kernel mailing list