[PATCH] wifi: mt76: mt7921: Add PCIe AER handler support to prevent system crash

Mingyen Hsieh mingyen.hsieh at mediatek.com
Wed Oct 1 19:46:10 PDT 2025


From: Michael Lo <michael.lo at mediatek.com>

Activating AER ensures that enhanced error reporting feature are
properly initialized, enabling more effective error management
and helping to prevent system crashes caused by PCIe errors.

[ 2932.266976] Unable to handle kernel paging request at virtual address ffffffc01099eac0
[ 2932.267800] pc : mt76_dma_add_buf+0x124/0x188 [mt76]
[ 2932.267831] lr : mt76_dma_rx_fill+0x11c/0x1d8 [mt76]
[ 2932.267845] sp : ffffffc016d9bbf0
[ 2932.267859] x29: ffffffc016d9bc10 x28: 0000000000000000
[ 2932.267885] x27: 0000000000000000 x26: ffffffb7855e50b8
[ 2932.267911] x25: ffffffb80d04f000 x24: 0000000000000000
[ 2932.267936] x23: 0000000000000ec0 x22: ffffffb796803648
[ 2932.267962] x21: ffffffb796801f80 x20: ffffffb7968035f8
[ 2932.267987] x19: 0000000000000ec0 x18: 0000000000000000
[ 2932.268012] x17: 000000004ec00000 x16: 000000000ec00000
[ 2932.268037] x15: ffffffc01099eac0 x14: 000000004ec00000
[ 2932.268063] x13: 00000000ffc5a000 x12: ffffffc016d9bc32
[ 2932.268088] x11: 00000000ffffffff x10: 0000000000000002
[ 2932.268113] x9 : 0000000000000000 x8 : 000000000000b4ac
[ 2932.268138] x7 : 0000000000000a20 x6 : ffffffb6c1806400
[ 2932.268163] x5 : 0000000000000000 x4 : ffffffb80d04f000
[ 2932.268188] x3 : 0000000000000000 x2 : 0000000000000001
[ 2932.268213] x1 : 000000000ec04000 x0 : ffffffb7968035f8
[ 2932.268238] Call trace:
[ 2932.268275]  mt76_dma_add_buf+0x124/0x188 [mt76 (HASH:1029 4)]
[ 2932.268309]  mt76_dma_rx_reset+0xe8/0xfc [mt76 (HASH:1029 4)]
[ 2932.268342]  mt7921_wpdma_reset+0x188/0x1b0 [mt7921e (HASH:ee48 5)]
[ 2932.268371]  mt7921e_mac_reset+0x128/0x418 [mt7921e (HASH:ee48 5)]
[ 2932.268403]  mt7921_mac_reset_work+0xac/0x1a8 [mt7921_common (HASH:f721 6)]
[ 2932.268427]  process_one_work+0x188/0x514
[ 2932.268445]  worker_thread+0x12c/0x300
[ 2932.268465]  kthread+0x140/0x1fc
[ 2932.268483]  ret_from_fork+0x10/0x30

Due to hardware limitations - such as the lack of a connected hardware
reset pin or the absence of host re-probe functionality - affected Wi-Fi
devices may not fully recover to a normal operational state after certain
errors, even with AER enabled.

Signed-off-by: Michael Lo <michael.lo at mediatek.com>
Signed-off-by: Ming Yen Hsieh <mingyen.hsieh at mediatek.com>
---
 drivers/net/wireless/mediatek/mt76/agg-rx.c   |  9 +++
 drivers/net/wireless/mediatek/mt76/dma.c      |  6 ++
 drivers/net/wireless/mediatek/mt76/mac80211.c |  3 +
 drivers/net/wireless/mediatek/mt76/mcu.c      |  3 +
 .../net/wireless/mediatek/mt76/mt76_connac.h  |  3 +
 .../wireless/mediatek/mt76/mt76_connac_mac.c  |  3 +
 .../net/wireless/mediatek/mt76/mt7921/mac.c   |  3 +
 .../net/wireless/mediatek/mt76/mt7921/main.c  |  3 +
 .../net/wireless/mediatek/mt76/mt7921/pci.c   | 64 +++++++++++++++++++
 .../net/wireless/mediatek/mt76/mt792x_core.c  |  8 +++
 .../net/wireless/mediatek/mt76/mt792x_mac.c   | 12 ++++
 11 files changed, 117 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/agg-rx.c b/drivers/net/wireless/mediatek/mt76/agg-rx.c
index 936ab1ca9246..89d45f5954a2 100644
--- a/drivers/net/wireless/mediatek/mt76/agg-rx.c
+++ b/drivers/net/wireless/mediatek/mt76/agg-rx.c
@@ -96,6 +96,9 @@ mt76_rx_aggr_reorder_work(struct work_struct *work)
 	struct sk_buff_head frames;
 	int nframes;
 
+	if (atomic_read(&dev->bus_hung) == 1)
+		return;
+
 	__skb_queue_head_init(&frames);
 
 	local_bh_disable();
@@ -179,6 +182,9 @@ void mt76_rx_aggr_reorder(struct sk_buff *skb, struct sk_buff_head *frames)
 	if (!tid)
 		return;
 
+	if (atomic_read(&tid->dev->bus_hung) == 1)
+		return;
+
 	status->flag |= RX_FLAG_DUP_VALIDATED;
 	spin_lock_bh(&tid->lock);
 
@@ -246,6 +252,9 @@ int mt76_rx_aggr_start(struct mt76_dev *dev, struct mt76_wcid *wcid, u8 tidno,
 {
 	struct mt76_rx_tid *tid;
 
+	if (atomic_read(&dev->bus_hung) == 1)
+		return -EIO;
+
 	mt76_rx_aggr_stop(dev, wcid, tidno);
 
 	tid = kzalloc(struct_size(tid, reorder_buf, size), GFP_KERNEL);
diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 1fa7de1d2c45..2d508ddbc7b7 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -339,6 +339,9 @@ mt76_dma_add_buf(struct mt76_dev *dev, struct mt76_queue *q,
 	int i, idx = -1;
 	u32 ctrl, next;
 
+	if (atomic_read(&dev->bus_hung) == 1)
+		return idx;
+
 	if (txwi) {
 		q->entry[q->head].txwi = DMA_DUMMY_DATA;
 		q->entry[q->head].skip_buf0 = true;
@@ -765,6 +768,9 @@ mt76_dma_rx_fill_buf(struct mt76_dev *dev, struct mt76_queue *q,
 	int len = SKB_WITH_OVERHEAD(q->buf_size);
 	int frames = 0;
 
+	if (atomic_read(&dev->bus_hung) == 1)
+		return 0;
+
 	if (!q->ndesc)
 		return 0;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index 09cc5e40ccf9..a70245672638 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -1549,6 +1549,9 @@ void mt76_rx_poll_complete(struct mt76_dev *dev, enum mt76_rxq_id q,
 	struct sk_buff_head frames;
 	struct sk_buff *skb;
 
+	if (atomic_read(&dev->bus_hung) == 1)
+		return;
+
 	__skb_queue_head_init(&frames);
 
 	while ((skb = __skb_dequeue(&dev->rx_skb[q])) != NULL) {
diff --git a/drivers/net/wireless/mediatek/mt76/mcu.c b/drivers/net/wireless/mediatek/mt76/mcu.c
index 65d4c2adb538..2107c0c07f3e 100644
--- a/drivers/net/wireless/mediatek/mt76/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mcu.c
@@ -58,6 +58,9 @@ int mt76_mcu_send_and_get_msg(struct mt76_dev *dev, int cmd, const void *data,
 {
 	struct sk_buff *skb;
 
+	if (atomic_read(&dev->bus_hung) == 1)
+		return -EIO;
+
 	if (dev->mcu_ops->mcu_send_msg)
 		return dev->mcu_ops->mcu_send_msg(dev, cmd, data, len, wait_resp);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
index 756719ce0e48..46b0f65320c1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac.h
@@ -333,6 +333,9 @@ static inline u8 mt76_connac_spe_idx(u8 antenna_mask)
 
 static inline void mt76_connac_irq_enable(struct mt76_dev *dev, u32 mask)
 {
+	if (atomic_read(&dev->bus_hung) == 1)
+		return;
+
 	mt76_set_irq_mask(dev, 0, 0, mask);
 	tasklet_schedule(&dev->irq_tasklet);
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
index 0db00efe88b0..7a6db5e0e250 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c
@@ -64,6 +64,9 @@ void mt76_connac_power_save_sched(struct mt76_phy *phy,
 {
 	struct mt76_dev *dev = phy->dev;
 
+	if (atomic_read(&dev->bus_hung) == 1)
+		return;
+
 	if (mt76_is_usb(dev))
 		return;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
index bce26389ab18..610aaf7eccff 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c
@@ -658,6 +658,9 @@ void mt7921_mac_reset_work(struct work_struct *work)
 	struct mt76_connac_pm *pm = &dev->pm;
 	int i, ret;
 
+	if (atomic_read(&dev->mt76.bus_hung) == 1)
+		return;
+
 	dev_dbg(dev->mt76.dev, "chip reset\n");
 	set_bit(MT76_RESET, &dev->mphy.state);
 	dev->hw_full_reset = true;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
index ef216153cdf0..ba85f3e5d0f8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
@@ -1002,6 +1002,9 @@ void mt7921_scan_work(struct work_struct *work)
 	phy = (struct mt792x_phy *)container_of(work, struct mt792x_phy,
 						scan_work.work);
 
+	if (atomic_read(&phy->dev->mt76.bus_hung) == 1)
+		return;
+
 	while (true) {
 		struct mt76_connac2_mcu_rxd *rxd;
 		struct sk_buff *skb;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
index 71fba57db9be..019d7961d9d4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci.c
@@ -363,6 +363,8 @@ static int mt7921_pci_probe(struct pci_dev *pdev,
 		    (mt7921_l1_rr(dev, MT_HW_REV) & 0xff);
 	dev_info(mdev->dev, "ASIC revision: %04x\n", mdev->rev);
 
+	atomic_set(&mdev->bus_hung, 0);
+
 	ret = mt792x_wfsys_reset(dev);
 	if (ret)
 		goto err_free_dev;
@@ -562,6 +564,67 @@ static void mt7921_pci_shutdown(struct pci_dev *pdev)
 
 static DEFINE_SIMPLE_DEV_PM_OPS(mt7921_pm_ops, mt7921_pci_suspend, mt7921_pci_resume);
 
+static pci_ers_result_t mt7921_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+	struct mt76_dev *mdev = pci_get_drvdata(pdev);
+	struct mt792x_dev *dev = container_of(mdev, struct mt792x_dev, mt76);
+	struct ieee80211_hw *hw = mdev->hw;
+	struct mt792x_phy *phy = mt792x_hw_phy(hw);
+	struct net_device *netdev = pci_get_drvdata(pdev);
+
+	if (state == pci_channel_io_normal)
+		return PCI_ERS_RESULT_CAN_RECOVER;
+
+	if (atomic_read(&mdev->bus_hung) == 1)
+		return PCI_ERS_RESULT_NEED_RESET;
+
+	atomic_set(&mdev->bus_hung, 1);
+
+	set_bit(MT76_REMOVED, &mdev->phy.state);
+
+	if (netif_running(netdev))
+		netif_device_detach(netdev);
+
+	cancel_delayed_work_sync(&phy->mt76->mac_work);
+
+	cancel_delayed_work_sync(&dev->pm.ps_work);
+	cancel_work_sync(&dev->pm.wake_work);
+	mt76_connac_free_pending_tx_skbs(&dev->pm, NULL);
+
+	mt792x_mutex_acquire(dev);
+	clear_bit(MT76_STATE_RUNNING, &phy->mt76->state);
+	mt76_connac_mcu_set_mac_enable(&dev->mt76, 0, false, false);
+	mt792x_mutex_release(dev);
+
+	if (state == pci_channel_io_perm_failure)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	tasklet_kill(&mdev->irq_tasklet);
+
+	pci_disable_device(pdev);
+
+	/* Request a slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t mt7921_pci_error_slot_reset(struct pci_dev *pdev)
+{
+	pci_ers_result_t ret = PCI_ERS_RESULT_DISCONNECT;
+
+	return ret;
+}
+
+static void mt7921_pci_error_resume(struct pci_dev *pdev)
+{
+	return;
+}
+
+static const struct pci_error_handlers mt7921_pci_err_handler = {
+	.error_detected = mt7921_pci_error_detected,
+	.slot_reset             = mt7921_pci_error_slot_reset,
+	.resume                 = mt7921_pci_error_resume,
+};
+
 static struct pci_driver mt7921_pci_driver = {
 	.name		= KBUILD_MODNAME,
 	.id_table	= mt7921_pci_device_table,
@@ -569,6 +632,7 @@ static struct pci_driver mt7921_pci_driver = {
 	.remove		= mt7921_pci_remove,
 	.shutdown	= mt7921_pci_shutdown,
 	.driver.pm	= pm_sleep_ptr(&mt7921_pm_ops),
+	.err_handler = &mt7921_pci_err_handler,
 };
 
 module_pci_driver(mt7921_pci_driver);
diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_core.c b/drivers/net/wireless/mediatek/mt76/mt792x_core.c
index 65cff5302a5a..4f4aa26b359d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt792x_core.c
+++ b/drivers/net/wireless/mediatek/mt76/mt792x_core.c
@@ -811,6 +811,10 @@ int mt792x_mcu_drv_pmctrl(struct mt792x_dev *dev)
 	struct mt76_phy *mphy = &dev->mt76.phy;
 	struct mt76_connac_pm *pm = &dev->pm;
 	int err = 0;
+	struct mt76_dev *mdev = mphy->dev;
+
+	if (atomic_read(&mdev->bus_hung) == 1)
+		return -EIO;
 
 	mutex_lock(&pm->mutex);
 
@@ -833,6 +837,10 @@ int mt792x_mcu_fw_pmctrl(struct mt792x_dev *dev)
 	struct mt76_phy *mphy = &dev->mt76.phy;
 	struct mt76_connac_pm *pm = &dev->pm;
 	int err = 0;
+	struct mt76_dev *mdev = mphy->dev;
+
+	if (atomic_read(&mdev->bus_hung) == 1)
+		return -EIO;
 
 	mutex_lock(&pm->mutex);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_mac.c b/drivers/net/wireless/mediatek/mt76/mt792x_mac.c
index f86e0ac91100..c813547a3562 100644
--- a/drivers/net/wireless/mediatek/mt76/mt792x_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt792x_mac.c
@@ -13,6 +13,10 @@ void mt792x_mac_work(struct work_struct *work)
 
 	mphy = (struct mt76_phy *)container_of(work, struct mt76_phy,
 					       mac_work.work);
+
+	if (atomic_read(&mphy->dev->bus_hung) == 1)
+		return;
+
 	phy = mphy->priv;
 
 	mt792x_mutex_acquire(phy->dev);
@@ -322,6 +326,10 @@ void mt792x_pm_wake_work(struct work_struct *work)
 
 	dev = (struct mt792x_dev *)container_of(work, struct mt792x_dev,
 						pm.wake_work);
+
+	if (atomic_read(&dev->mt76.bus_hung) == 1)
+		return;
+
 	mphy = dev->phy.mt76;
 
 	if (!mt792x_mcu_drv_pmctrl(dev)) {
@@ -357,6 +365,10 @@ void mt792x_pm_power_save_work(struct work_struct *work)
 
 	dev = (struct mt792x_dev *)container_of(work, struct mt792x_dev,
 						pm.ps_work.work);
+
+	if (atomic_read(&dev->mt76.bus_hung) == 1)
+		return;
+
 	mphy = dev->phy.mt76;
 
 	delta = dev->pm.idle_timeout;
-- 
2.34.1




More information about the Linux-mediatek mailing list