[PATCH 13/13] ath10k: implement push-pull tx
Michal Kazior
michal.kazior at tieto.com
Thu Jan 21 23:47:07 PST 2016
On 21 January 2016 at 18:40, Peter Oh <poh at codeaurora.org> wrote:
>
> On 01/21/2016 05:46 AM, Michal Kazior wrote:
>>
>> The current/old tx path design was that host, at
>> its own leisure, pushed tx frames to the device.
>> For HTT there was ~1000-1400 msdu queue depth.
>>
>> After reaching that limit the driver would request
>> mac80211 to stop queues. There was little control
>> over what packets got in there as far as
>> DA/RA was considered so it was rather easy to
>> starve per-station traffic flows.
>>
>> With MU-MIMO this became a significant problem
>> because the queue depth was insufficient to buffer
>> frames from multiple clients (which could have
>> different signal quality and capabilities) in an
>> efficient fashion.
>>
>> Hence the new tx path in 10.4 was introduced: a
>> pull-push mode.
>>
>> Firmware and host can share tx queue state via
>> DMA. The state is logically a 2 dimensional array
>> addressed via peer_id+tid pair. Each entry is a
>> counter (either number of bytes or packets. Host
>> keeps it updated and firmware uses it for
>> scheduling Tx pull requests to host.
>>
>> This allows MU-MIMO to become a lot more effective
>> with 10+ clients.
>>
>> Signed-off-by: Michal Kazior <michal.kazior at tieto.com>
>> ---
>> drivers/net/wireless/ath/ath10k/core.h | 1 +
>> drivers/net/wireless/ath/ath10k/htt.h | 6 ++
>> drivers/net/wireless/ath/ath10k/htt_rx.c | 117
>> ++++++++++++++++++++++++++++---
>> drivers/net/wireless/ath/ath10k/htt_tx.c | 39 ++++++++---
>> drivers/net/wireless/ath/ath10k/mac.c | 48 +++++++++++--
>> drivers/net/wireless/ath/ath10k/mac.h | 5 ++
>> 6 files changed, 192 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/net/wireless/ath/ath10k/core.h
>> b/drivers/net/wireless/ath/ath10k/core.h
>> index f51887c6be74..ab8cbccc0f4b 100644
>> --- a/drivers/net/wireless/ath/ath10k/core.h
>> +++ b/drivers/net/wireless/ath/ath10k/core.h
>> @@ -307,6 +307,7 @@ struct ath10k_peer {
>> struct ath10k_txq {
>> unsigned long num_fw_queued;
>> + unsigned long num_push_allowed;
>> };
>> struct ath10k_sta {
>> diff --git a/drivers/net/wireless/ath/ath10k/htt.h
>> b/drivers/net/wireless/ath/ath10k/htt.h
>> index b1e40f44e76b..02cf55d306e8 100644
>> --- a/drivers/net/wireless/ath/ath10k/htt.h
>> +++ b/drivers/net/wireless/ath/ath10k/htt.h
>> @@ -1652,6 +1652,7 @@ struct ath10k_htt {
>> struct sk_buff_head tx_compl_q;
>> struct sk_buff_head rx_compl_q;
>> struct sk_buff_head rx_in_ord_compl_q;
>> + struct sk_buff_head tx_fetch_ind_q;
>> /* rx_status template */
>> struct ieee80211_rx_status rx_status;
>> @@ -1670,8 +1671,10 @@ struct ath10k_htt {
>> bool enabled;
>> struct htt_q_state *vaddr;
>> dma_addr_t paddr;
>> + u16 num_push_allowed;
>> u16 num_peers;
>> u16 num_tids;
>> + enum htt_tx_mode_switch_mode mode;
>> enum htt_q_depth_type type;
>> } tx_q_state;
>> };
>> @@ -1761,6 +1764,9 @@ int ath10k_htt_tx_fetch_resp(struct ath10k *ar,
>> void ath10k_htt_tx_txq_update(struct ieee80211_hw *hw,
>> struct ieee80211_txq *txq);
>> +void ath10k_htt_tx_txq_recalc(struct ieee80211_hw *hw,
>> + struct ieee80211_txq *txq);
>> +void ath10k_htt_tx_txq_sync(struct ath10k *ar);
>> void ath10k_htt_tx_dec_pending(struct ath10k_htt *htt,
>> bool is_mgmt);
>> int ath10k_htt_tx_inc_pending(struct ath10k_htt *htt,
>> diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c
>> b/drivers/net/wireless/ath/ath10k/htt_rx.c
>> index 6e3d95c95568..827c8369b589 100644
>> --- a/drivers/net/wireless/ath/ath10k/htt_rx.c
>> +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
>> @@ -229,6 +229,7 @@ void ath10k_htt_rx_free(struct ath10k_htt *htt)
>> skb_queue_purge(&htt->tx_compl_q);
>> skb_queue_purge(&htt->rx_compl_q);
>> skb_queue_purge(&htt->rx_in_ord_compl_q);
>> + skb_queue_purge(&htt->tx_fetch_ind_q);
>> ath10k_htt_rx_ring_free(htt);
>> @@ -569,6 +570,7 @@ int ath10k_htt_rx_alloc(struct ath10k_htt *htt)
>> skb_queue_head_init(&htt->tx_compl_q);
>> skb_queue_head_init(&htt->rx_compl_q);
>> skb_queue_head_init(&htt->rx_in_ord_compl_q);
>> + skb_queue_head_init(&htt->tx_fetch_ind_q);
>> tasklet_init(&htt->txrx_compl_task, ath10k_htt_txrx_compl_task,
>> (unsigned long)htt);
>> @@ -2004,16 +2006,21 @@ static void
>> ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar,
>> static void ath10k_htt_rx_tx_fetch_ind(struct ath10k *ar, struct
>> sk_buff
>> *skb)
>> {
>> + struct ieee80211_hw *hw = ar->hw;
>> + struct ieee80211_txq *txq;
>> struct htt_resp *resp = (struct htt_resp *)skb->data;
>> struct htt_tx_fetch_record *record;
>> size_t len;
>> size_t max_num_bytes;
>> size_t max_num_msdus;
>> + size_t num_bytes;
>> + size_t num_msdus;
>> const __le32 *resp_ids;
>> u16 num_records;
>> u16 num_resp_ids;
>> u16 peer_id;
>> u8 tid;
>> + int ret;
>> int i;
>> ath10k_dbg(ar, ATH10K_DBG_HTT, "htt rx tx fetch ind\n");
>> @@ -2039,7 +2046,17 @@ static void ath10k_htt_rx_tx_fetch_ind(struct
>> ath10k *ar, struct sk_buff *skb)
>> num_records, num_resp_ids,
>> le16_to_cpu(resp->tx_fetch_ind.fetch_seq_num));
>> - /* TODO: runtime sanity checks */
>> + if (!ar->htt.tx_q_state.enabled) {
>> + ath10k_warn(ar, "received unexpected tx_fetch_ind event:
>> not enabled\n");
>> + return;
>> + }
>> +
>> + if (ar->htt.tx_q_state.mode == HTT_TX_MODE_SWITCH_PUSH) {
>> + ath10k_warn(ar, "received unexpected tx_fetch_ind event:
>> in push mode\n");
>> + return;
>> + }
>> +
>> + rcu_read_lock();
>> for (i = 0; i < num_records; i++) {
>> record = &resp->tx_fetch_ind.records[i];
>> @@ -2060,13 +2077,56 @@ static void ath10k_htt_rx_tx_fetch_ind(struct
>> ath10k *ar, struct sk_buff *skb)
>> continue;
>> }
>> - /* TODO: dequeue and submit tx to device */
>> + spin_lock_bh(&ar->data_lock);
>> + txq = ath10k_mac_txq_lookup(ar, peer_id, tid);
>> + spin_unlock_bh(&ar->data_lock);
>> +
>> + /* It is okay to release the lock and use txq because RCU
>> read
>> + * lock is held.
>> + */
>> +
>> + if (unlikely(!txq)) {
>> + ath10k_warn(ar, "failed to lookup txq for peer_id
>> %hu tid %hhu\n",
>> + peer_id, tid);
>> + continue;
>> + }
>> +
>> + num_msdus = 0;
>> + num_bytes = 0;
>> +
>> + while (num_msdus < max_num_msdus &&
>> + num_bytes < max_num_bytes) {
>> + ret = ath10k_mac_tx_push_txq(hw, txq);
>> + if (ret < 0)
>> + break;
>> +
>> + num_msdus++;
>> + num_bytes += ret;
>> + }
>> +
>> + record->num_msdus = cpu_to_le16(num_msdus);
>> + record->num_bytes = cpu_to_le32(num_bytes);
>> +
>> + ath10k_htt_tx_txq_recalc(hw, txq);
>> }
>> + rcu_read_unlock();
>> +
>> resp_ids =
>> ath10k_htt_get_tx_fetch_ind_resp_ids(&resp->tx_fetch_ind);
>> ath10k_htt_rx_tx_fetch_resp_id_confirm(ar, resp_ids,
>> num_resp_ids);
>> - /* TODO: generate and send fetch response to device */
>> + ret = ath10k_htt_tx_fetch_resp(ar,
>> + resp->tx_fetch_ind.token,
>> + resp->tx_fetch_ind.fetch_seq_num,
>> + resp->tx_fetch_ind.records,
>> + num_records);
>> + if (unlikely(ret)) {
>> + ath10k_warn(ar, "failed to submit tx fetch resp for token
>> 0x%08x: %d\n",
>> + le32_to_cpu(resp->tx_fetch_ind.token), ret);
>> + /* FIXME: request fw restart */
>> + }
>> +
>> + ath10k_htt_tx_txq_sync(ar);
>> }
>> static void ath10k_htt_rx_tx_fetch_confirm(struct ath10k *ar,
>> @@ -2102,6 +2162,8 @@ static void ath10k_htt_rx_tx_mode_switch_ind(struct
>> ath10k *ar,
>> {
>> const struct htt_resp *resp = (void *)skb->data;
>> const struct htt_tx_mode_switch_record *record;
>> + struct ieee80211_txq *txq;
>> + struct ath10k_txq *artxq;
>> size_t len;
>> size_t num_records;
>> enum htt_tx_mode_switch_mode mode;
>> @@ -2153,7 +2215,11 @@ static void ath10k_htt_rx_tx_mode_switch_ind(struct
>> ath10k *ar,
>> if (!enable)
>> return;
>> - /* TODO: apply configuration */
>> + ar->htt.tx_q_state.enabled = enable;
>> + ar->htt.tx_q_state.mode = mode;
>> + ar->htt.tx_q_state.num_push_allowed = threshold;
>> +
>> + rcu_read_lock();
>> for (i = 0; i < num_records; i++) {
>> record = &resp->tx_mode_switch_ind.records[i];
>> @@ -2168,10 +2234,33 @@ static void
>> ath10k_htt_rx_tx_mode_switch_ind(struct ath10k *ar,
>> continue;
>> }
>> - /* TODO: apply configuration */
>> + spin_lock_bh(&ar->data_lock);
>> + txq = ath10k_mac_txq_lookup(ar, peer_id, tid);
>> + spin_unlock_bh(&ar->data_lock);
>> +
>> + /* It is okay to release the lock and use txq because RCU
>> read
>> + * lock is held.
>> + */
>> +
>> + if (unlikely(!txq)) {
>> + ath10k_warn(ar, "failed to lookup txq for peer_id
>> %hu tid %hhu\n",
>> + peer_id, tid);
>> + continue;
>> + }
>> +
>> + spin_lock_bh(&ar->htt.tx_lock);
>> + artxq = (void *)txq->drv_priv;
>> + artxq->num_push_allowed =
>> le16_to_cpu(record->num_max_msdus);
>> + spin_unlock_bh(&ar->htt.tx_lock);
>> }
>> - /* TODO: apply configuration */
>> + rcu_read_unlock();
>> +
>> + spin_lock_bh(&ar->htt.tx_lock);
>> + ath10k_mac_tx_lock(ar, ATH10K_TX_PAUSE_Q_FLUSH_PENDING);
>> + spin_unlock_bh(&ar->htt.tx_lock);
>> +
>
> Isn't it proved it break Mesh from working?
Yes, good point. I'm still working on this - I should've mentioned
that in the cover letter.
Nonetheless I wanted to get the review process going for this patchset.
I'll address the mesh problem in v2.
Michał
More information about the ath10k
mailing list