[RFC PATCH net-next 3/3] net: macb: add TX stall watchdog as defence-in-depth safety net
Lukasz Raczylo
lukasz at raczylo.com
Fri Apr 24 15:38:33 PDT 2026
Patches 1/3 and 2/3 address two candidate races that could lead
to a TCOMP completion being missed on PCIe-attached macb
instances. This patch adds a defence-in-depth safety net, in
case a further race remains that we have not identified.
The watchdog is a per-queue delayed_work that runs once per
second. It snapshots queue->tx_tail; if the ring is non-empty
(queue->tx_head != queue->tx_tail) and tx_tail has not advanced
since the previous tick, it calls macb_tx_restart().
No new recovery logic is introduced. macb_tx_restart() already
exists in this file, is correctly locked (tx_ptr_lock, bp->lock),
and verifies that the hardware's TBQP is behind the driver's
head index before re-asserting TSTART. On a healthy ring it is
a no-op at the hardware level; the watchdog only supplies the
missing trigger.
On a healthy queue the per-tick cost is one spin_lock_irqsave()
/ spin_unlock_irqrestore() and one branch. The delayed_work is
only scheduled between macb_open() and macb_close(), and is
cancelled synchronously on close.
Context for submission: on our 24-node Raspberry Pi 5 fleet,
before this series, an out-of-band user-space watchdog
(monitoring tx_packets from /sys/class/net/.../statistics and
toggling the link down/up when it froze) was required to keep
nodes usable. We include this kernel-side watchdog as a cleaner
in-kernel equivalent for any residual stall that patches 1 and
2 do not cover. We are willing to drop this patch if the view
is that 1 and 2 should stand alone.
Link: https://github.com/cilium/cilium/issues/43198
Link: https://bugs.launchpad.net/ubuntu/+source/linux-raspi/+bug/2133877
Signed-off-by: Lukasz Raczylo <lukasz at raczylo.com>
---
drivers/net/ethernet/cadence/macb.h | 5 ++
drivers/net/ethernet/cadence/macb_main.c | 59 ++++++++++++++++++++++++
2 files changed, 64 insertions(+)
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 2de56017e..9115f2b47 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -1278,6 +1278,11 @@ struct macb_queue {
dma_addr_t tx_ring_dma;
struct work_struct tx_error_task;
bool txubr_pending;
+
+ /* TX stall watchdog -- see macb_tx_stall_watchdog() in macb_main.c */
+ struct delayed_work tx_stall_watchdog_work;
+ unsigned int tx_stall_last_tail;
+
struct napi_struct napi_tx;
dma_addr_t rx_ring_dma;
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index ea231b1c5..ea2306ef7 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -2002,6 +2002,59 @@ static int macb_tx_poll(struct napi_struct *napi, int budget)
return work_done;
}
+#define MACB_TX_STALL_INTERVAL_MS 1000
+
+/*
+ * TX stall watchdog.
+ *
+ * Defence-in-depth against lost TCOMP interrupts. macb already has a
+ * recovery chain (tx_pending -> txubr_pending -> macb_tx_restart())
+ * that fires on TCOMP; if TCOMP itself is lost the TX ring stalls
+ * silently until something else kicks TSTART. This watchdog runs
+ * once per second per queue, snapshots tx_tail, and calls
+ * macb_tx_restart() if the ring is non-empty and tx_tail has not
+ * advanced since the previous tick.
+ *
+ * macb_tx_restart() already checks the hardware's TBQP against the
+ * driver's head index before re-asserting TSTART, so on a healthy
+ * ring this is a no-op at the hardware level. The watchdog only
+ * adds the missing trigger.
+ */
+static void macb_tx_stall_watchdog(struct work_struct *work)
+{
+ struct macb_queue *queue = container_of(to_delayed_work(work),
+ struct macb_queue,
+ tx_stall_watchdog_work);
+ struct macb *bp = queue->bp;
+ unsigned int cur_tail, cur_head;
+ bool stalled = false;
+ unsigned long flags;
+
+ if (!netif_running(bp->dev))
+ return;
+
+ spin_lock_irqsave(&queue->tx_ptr_lock, flags);
+ cur_tail = queue->tx_tail;
+ cur_head = queue->tx_head;
+ if (cur_head != cur_tail &&
+ cur_tail == queue->tx_stall_last_tail)
+ stalled = true;
+ else
+ queue->tx_stall_last_tail = cur_tail;
+ spin_unlock_irqrestore(&queue->tx_ptr_lock, flags);
+
+ if (stalled) {
+ netdev_warn_once(bp->dev,
+ "TX stall detected on queue %u (tail=%u head=%u); re-kicking TSTART\n",
+ (unsigned int)(queue - bp->queues),
+ cur_tail, cur_head);
+ macb_tx_restart(queue);
+ }
+
+ schedule_delayed_work(&queue->tx_stall_watchdog_work,
+ msecs_to_jiffies(MACB_TX_STALL_INTERVAL_MS));
+}
+
static void macb_hresp_error_task(struct work_struct *work)
{
struct macb *bp = from_work(bp, work, hresp_err_bh_work);
@@ -3190,6 +3243,9 @@ static int macb_open(struct net_device *dev)
for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
napi_enable(&queue->napi_rx);
napi_enable(&queue->napi_tx);
+ queue->tx_stall_last_tail = queue->tx_tail;
+ schedule_delayed_work(&queue->tx_stall_watchdog_work,
+ msecs_to_jiffies(MACB_TX_STALL_INTERVAL_MS));
}
macb_init_hw(bp);
@@ -3240,6 +3296,7 @@ static int macb_close(struct net_device *dev)
for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
napi_disable(&queue->napi_rx);
napi_disable(&queue->napi_tx);
+ cancel_delayed_work_sync(&queue->tx_stall_watchdog_work);
netdev_tx_reset_queue(netdev_get_tx_queue(dev, q));
}
@@ -4802,6 +4859,8 @@ static int macb_init_dflt(struct platform_device *pdev)
}
INIT_WORK(&queue->tx_error_task, macb_tx_error_task);
+ INIT_DELAYED_WORK(&queue->tx_stall_watchdog_work,
+ macb_tx_stall_watchdog);
q++;
}
--
2.53.0
More information about the linux-arm-kernel
mailing list