[PATCH ath-next] wifi: ath10k: move recovery check logic into a new work

Kang Yang kang.yang at oss.qualcomm.com
Mon Sep 29 01:54:00 PDT 2025



Baochen Qiang 於 9/29/2025 4:45 PM 寫道:
> 
> 
> On 9/29/2025 10:37 AM, Kang Yang wrote:
>>
>>
>> Baochen Qiang 於 9/22/2025 6:00 PM 寫道:
>>>
>>>
>>> On 9/22/2025 2:31 PM, Kang Yang wrote:
>>>> Currently, ath10k has a recovery check logic. It will wait for the
>>>> last recovery to finish by wait_for_completion_timeout();
>>>>
>>>> But in SDIO scenarios, the recovery function may be invoked from
>>>> interrupt context, where long blocking waits are undesirable and can
>>>> lead to system instability.
>>>>
>>>> To address this, move the recovery check logic into a workqueue.
>>>> Fixes: c256a94d1b1b ("wifi: ath10k: shutdown driver when hardware is unreliable")
>>>> Signed-off-by: Kang Yang <kang.yang at oss.qualcomm.com>
>>>> ---
>>>>    drivers/net/wireless/ath/ath10k/core.c | 19 ++++++++-----------
>>>>    drivers/net/wireless/ath/ath10k/core.h |  2 +-
>>>>    drivers/net/wireless/ath/ath10k/mac.c  |  2 +-
>>>>    3 files changed, 10 insertions(+), 13 deletions(-)
>>>>
>>>> diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/
>>>> core.c
>>>> index 6f78f1752cd6..991fe8d92297 100644
>>>> --- a/drivers/net/wireless/ath/ath10k/core.c
>>>> +++ b/drivers/net/wireless/ath/ath10k/core.c
>>>> @@ -3,7 +3,6 @@
>>>>     * Copyright (c) 2005-2011 Atheros Communications Inc.
>>>>     * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
>>>>     * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved.
>>>> - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
>>>>     * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
>>>>     */
>>>>    @@ -2493,8 +2492,9 @@ static int ath10k_init_hw_params(struct ath10k *ar)
>>>>        return 0;
>>>>    }
>>>>    -static bool ath10k_core_needs_recovery(struct ath10k *ar)
>>>> +static void ath10k_core_recovery_check_work(struct work_struct *work)
>>>>    {
>>>> +    struct ath10k *ar = container_of(work, struct ath10k, recovery_check_work);
>>>>        long time_left;
>>>>          /* Sometimes the recovery will fail and then the next all recovery fail,
>>>> @@ -2504,7 +2504,7 @@ static bool ath10k_core_needs_recovery(struct ath10k *ar)
>>>>            ath10k_err(ar, "consecutive fail %d times, will shutdown driver!",
>>>>                   atomic_read(&ar->fail_cont_count));
>>>>            ar->state = ATH10K_STATE_WEDGED;
>>>> -        return false;
>>>> +        return;
>>>>        }
>>>>          ath10k_dbg(ar, ATH10K_DBG_BOOT, "total recovery count: %d", ++ar->recovery_count);
>>>> @@ -2518,27 +2518,23 @@ static bool ath10k_core_needs_recovery(struct ath10k *ar)
>>>>                                ATH10K_RECOVERY_TIMEOUT_HZ);
>>>>            if (time_left) {
>>>>                ath10k_warn(ar, "previous recovery succeeded, skip this!\n");
>>>> -            return false;
>>>> +            return;
>>>>            }
>>>>              /* Record the continuous recovery fail count when recovery failed. */
>>>>            atomic_inc(&ar->fail_cont_count);
>>>>              /* Avoid having multiple recoveries at the same time. */
>>>> -        return false;
>>>> +        return;
>>>>        }
>>>>          atomic_inc(&ar->pending_recovery);
>>>> -
>>>> -    return true;
>>>> +    queue_work(ar->workqueue, &ar->restart_work);
>>>>    }
>>>>      void ath10k_core_start_recovery(struct ath10k *ar)
>>>>    {
>>>> -    if (!ath10k_core_needs_recovery(ar))
>>>> -        return;
>>>> -
>>>> -    queue_work(ar->workqueue, &ar->restart_work);
>>>> +    queue_work(ar->workqueue, &ar->recovery_check_work);
>>>>    }
>>>>    EXPORT_SYMBOL(ath10k_core_start_recovery);
>>>>    @@ -3734,6 +3730,7 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct
>>>> device *dev,
>>>>          INIT_WORK(&ar->register_work, ath10k_core_register_work);
>>>>        INIT_WORK(&ar->restart_work, ath10k_core_restart);
>>>> +    INIT_WORK(&ar->recovery_check_work, ath10k_core_recovery_check_work);
>>>>        INIT_WORK(&ar->set_coverage_class_work,
>>>>              ath10k_core_set_coverage_class_work);
>>>>    diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/
>>>> ath10k/core.h
>>>> index 8c72ed386edb..859176fcb5a2 100644
>>>> --- a/drivers/net/wireless/ath/ath10k/core.h
>>>> +++ b/drivers/net/wireless/ath/ath10k/core.h
>>>> @@ -3,7 +3,6 @@
>>>>     * Copyright (c) 2005-2011 Atheros Communications Inc.
>>>>     * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
>>>>     * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved.
>>>> - * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved.
>>>>     * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
>>>>     */
>>>>    @@ -1208,6 +1207,7 @@ struct ath10k {
>>>>          struct work_struct register_work;
>>>>        struct work_struct restart_work;
>>>> +    struct work_struct recovery_check_work;
>>>
>>> Instead of adding a new work item, how about just moving the recovery check logic into the
>>> exsiting restart_work?
>>>
>>
>> If we use only one work, Linux’s workqueue mechanism will ignore new recovery triggers
>> while the previous one is still queued or executing. This means we can't track consecutive
>> failures or enter the WEDGED state in consecutive failures cases.
> 
> OK, makes sense.
> 
> But ar->workqueue is an ordered workqueue, meaning recovery_check_work won't run until
> previous recovery work done.
> 

It seems that i need to use different queues for them.

>>
>>
>>
>>
>>>>        struct work_struct bundle_tx_work;
>>>>        struct work_struct tx_complete_work;
>>>>    diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/
>>>> mac.c
>>>> index 154ac7a70982..da6f7957a0ae 100644
>>>> --- a/drivers/net/wireless/ath/ath10k/mac.c
>>>> +++ b/drivers/net/wireless/ath/ath10k/mac.c
>>>> @@ -3,7 +3,6 @@
>>>>     * Copyright (c) 2005-2011 Atheros Communications Inc.
>>>>     * Copyright (c) 2011-2017 Qualcomm Atheros, Inc.
>>>>     * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved.
>>>> - * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved.
>>>>     * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
>>>>     */
>>>>    @@ -5428,6 +5427,7 @@ static void ath10k_stop(struct ieee80211_hw *hw, bool suspend)
>>>>        cancel_work_sync(&ar->set_coverage_class_work);
>>>>        cancel_delayed_work_sync(&ar->scan.timeout);
>>>>        cancel_work_sync(&ar->restart_work);
>>>> +    cancel_work_sync(&ar->recovery_check_work);
>>>>    }
>>>>      static int ath10k_config_ps(struct ath10k *ar)
>>>
>>
> 




More information about the ath10k mailing list