[PATCH RESEND RFC 3/3] net: ath11k: add lockup simulation via debugfs
Jeff Johnson
jeff.johnson at oss.qualcomm.com
Tue May 12 16:19:04 PDT 2026
On 3/30/2026 3:05 AM, Matthew Leach wrote:
> Add a debugfs command to simulate a firmware lockup.
>
> This does not hang the hardware. Instead, it forces the driver down an
> error path that reproduces the sequence observed during real lockups:
>
> ath11k_pci 0000:03:00.0: failed to transmit frame -12
> ath11k_pci 0000:03:00.0: failed to transmit frame -12
> ath11k_pci 0000:03:00.0: failed to transmit frame -12
> ...
> ath11k_pci 0000:03:00.0: wmi command 28680 timeout
> ath11k_pci 0000:03:00.0: failed to submit WMI_MGMT_TX_SEND_CMDID cmd
> ath11k_pci 0000:03:00.0: failed to send mgmt frame: -11
>
> This allows validation of the firmware lockup detection and recovery
> mechanism without requiring a real hardware failure.
>
> Signed-off-by: Matthew Leach <matthew.leach at collabora.com>
> ---
> drivers/net/wireless/ath/ath11k/core.h | 1 +
> drivers/net/wireless/ath/ath11k/debugfs.c | 7 ++++++-
> drivers/net/wireless/ath/ath11k/hal.c | 7 +++++--
> drivers/net/wireless/ath/ath11k/htc.c | 2 +-
> drivers/net/wireless/ath/ath11k/wmi.c | 6 +++++-
> 5 files changed, 18 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
> index 221dcd23b3dd..44b02ae1e85b 100644
> --- a/drivers/net/wireless/ath/ath11k/core.h
> +++ b/drivers/net/wireless/ath/ath11k/core.h
> @@ -1041,6 +1041,7 @@ struct ath11k_base {
> struct ath11k_dbring_cap *db_caps;
> u32 num_db_cap;
> u64 last_frame_tx_error_jiffies;
> + bool simulate_lockup;
>
> /* To synchronize 11d scan vdev id */
> struct mutex vdev_id_11d_lock;
> diff --git a/drivers/net/wireless/ath/ath11k/debugfs.c b/drivers/net/wireless/ath/ath11k/debugfs.c
> index 0c1138407838..ca0b72a3e0b0 100644
> --- a/drivers/net/wireless/ath/ath11k/debugfs.c
> +++ b/drivers/net/wireless/ath/ath11k/debugfs.c
> @@ -356,7 +356,8 @@ static ssize_t ath11k_read_simulate_fw_crash(struct file *file,
> const char buf[] =
> "To simulate firmware crash write one of the keywords to this file:\n"
> "`assert` - this will send WMI_FORCE_FW_HANG_CMDID to firmware to cause assert.\n"
> - "`hw-restart` - this will simply queue hw restart without fw/hw actually crashing.\n";
> + "`hw-restart` - this will simply queue hw restart without fw/hw actually crashing.\n"
> + "`lockup` - simulate a firmware lockup without the h/w actually hanging.\n";
>
> return simple_read_from_buffer(user_buf, count, ppos, buf, strlen(buf));
> }
> @@ -413,6 +414,10 @@ static ssize_t ath11k_write_simulate_fw_crash(struct file *file,
> ath11k_info(ab, "user requested hw restart\n");
> queue_work(ab->workqueue_aux, &ab->reset_work);
> ret = 0;
> + } else if (!strcmp(buf, "lockup")) {
> + ath11k_info(ab, "simulating lockup\n");
> + ab->simulate_lockup = true;
> + ret = 0;
> } else {
> ret = -EINVAL;
> goto exit;
> diff --git a/drivers/net/wireless/ath/ath11k/hal.c b/drivers/net/wireless/ath/ath11k/hal.c
> index e821e5a62c1c..e01fb17a4734 100644
> --- a/drivers/net/wireless/ath/ath11k/hal.c
> +++ b/drivers/net/wireless/ath/ath11k/hal.c
> @@ -691,7 +691,7 @@ int ath11k_hal_srng_dst_num_free(struct ath11k_base *ab, struct hal_srng *srng,
>
> tp = srng->u.dst_ring.tp;
>
> - if (sync_hw_ptr) {
> + if (sync_hw_ptr && !ab->simulate_lockup) {
> hp = *srng->u.dst_ring.hp_addr;
> srng->u.dst_ring.cached_hp = hp;
> } else {
> @@ -743,7 +743,7 @@ u32 *ath11k_hal_srng_src_get_next_entry(struct ath11k_base *ab,
> */
> next_hp = (srng->u.src_ring.hp + srng->entry_size) % srng->ring_size;
>
> - if (next_hp == srng->u.src_ring.cached_tp)
> + if (next_hp == srng->u.src_ring.cached_tp || ab->simulate_lockup)
> return NULL;
>
> desc = srng->ring_base_vaddr + srng->u.src_ring.hp;
> @@ -828,6 +828,9 @@ void ath11k_hal_srng_access_begin(struct ath11k_base *ab, struct hal_srng *srng)
>
> lockdep_assert_held(&srng->lock);
>
> + if (ab->simulate_lockup)
> + return;
> +
> if (srng->ring_dir == HAL_SRNG_DIR_SRC) {
> srng->u.src_ring.cached_tp =
> *(volatile u32 *)srng->u.src_ring.tp_addr;
> diff --git a/drivers/net/wireless/ath/ath11k/htc.c b/drivers/net/wireless/ath/ath11k/htc.c
> index 4571d01cc33d..b05d04a1f5e8 100644
> --- a/drivers/net/wireless/ath/ath11k/htc.c
> +++ b/drivers/net/wireless/ath/ath11k/htc.c
> @@ -208,7 +208,7 @@ static int ath11k_htc_process_trailer(struct ath11k_htc *htc,
> break;
> }
>
> - if (ab->hw_params.credit_flow) {
> + if (ab->hw_params.credit_flow && !ab->simulate_lockup) {
> switch (record->hdr.id) {
> case ATH11K_HTC_RECORD_CREDITS:
> len = sizeof(struct ath11k_htc_credit_report);
> diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
> index 7d9f0bcbb3b0..27d6d4a2f803 100644
> --- a/drivers/net/wireless/ath/ath11k/wmi.c
> +++ b/drivers/net/wireless/ath/ath11k/wmi.c
> @@ -345,9 +345,13 @@ int ath11k_wmi_cmd_send(struct ath11k_pdev_wmi *wmi, struct sk_buff *skb,
>
> if (time_in_range64(ab->last_frame_tx_error_jiffies,
> range_start, jiffies_64) &&
> - queue_work(ab->workqueue_aux, &ab->reset_work))
> + queue_work(ab->workqueue_aux, &ab->reset_work)) {
> ath11k_err(wmi_ab->ab,
> "Firmware lockup detected. Resetting.");
> +
> + /* Assume that reset gets us out of lockup. */
> + ab->simulate_lockup = false;
> + }
> }
>
> if (ret == -ENOBUFS)
>
My 1st impression of this patch is that the datapath folks are not going to
like the ab->simulate_lockup checks in the hot path. But I'll let the
engineers speak for themselves.
/jeff
More information about the ath11k
mailing list