[PATCH 13/13] ath10k: implement push-pull tx
Peter Oh
poh at codeaurora.org
Thu Jan 21 09:40:58 PST 2016
On 01/21/2016 05:46 AM, Michal Kazior wrote:
> The current/old tx path design was that host, at
> its own leisure, pushed tx frames to the device.
> For HTT there was ~1000-1400 msdu queue depth.
>
> After reaching that limit the driver would request
> mac80211 to stop queues. There was little control
> over what packets got in there as far as
> DA/RA was considered so it was rather easy to
> starve per-station traffic flows.
>
> With MU-MIMO this became a significant problem
> because the queue depth was insufficient to buffer
> frames from multiple clients (which could have
> different signal quality and capabilities) in an
> efficient fashion.
>
> Hence the new tx path in 10.4 was introduced: a
> pull-push mode.
>
> Firmware and host can share tx queue state via
> DMA. The state is logically a 2 dimensional array
> addressed via peer_id+tid pair. Each entry is a
> counter (either number of bytes or packets. Host
> keeps it updated and firmware uses it for
> scheduling Tx pull requests to host.
>
> This allows MU-MIMO to become a lot more effective
> with 10+ clients.
>
> Signed-off-by: Michal Kazior <michal.kazior at tieto.com>
> ---
> drivers/net/wireless/ath/ath10k/core.h | 1 +
> drivers/net/wireless/ath/ath10k/htt.h | 6 ++
> drivers/net/wireless/ath/ath10k/htt_rx.c | 117
> ++++++++++++++++++++++++++++---
> drivers/net/wireless/ath/ath10k/htt_tx.c | 39 ++++++++---
> drivers/net/wireless/ath/ath10k/mac.c | 48 +++++++++++--
> drivers/net/wireless/ath/ath10k/mac.h | 5 ++
> 6 files changed, 192 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/net/wireless/ath/ath10k/core.h
> b/drivers/net/wireless/ath/ath10k/core.h
> index f51887c6be74..ab8cbccc0f4b 100644
> --- a/drivers/net/wireless/ath/ath10k/core.h
> +++ b/drivers/net/wireless/ath/ath10k/core.h
> @@ -307,6 +307,7 @@ struct ath10k_peer {
>
> struct ath10k_txq {
> unsigned long num_fw_queued;
> + unsigned long num_push_allowed;
> };
>
> struct ath10k_sta {
> diff --git a/drivers/net/wireless/ath/ath10k/htt.h
> b/drivers/net/wireless/ath/ath10k/htt.h
> index b1e40f44e76b..02cf55d306e8 100644
> --- a/drivers/net/wireless/ath/ath10k/htt.h
> +++ b/drivers/net/wireless/ath/ath10k/htt.h
> @@ -1652,6 +1652,7 @@ struct ath10k_htt {
> struct sk_buff_head tx_compl_q;
> struct sk_buff_head rx_compl_q;
> struct sk_buff_head rx_in_ord_compl_q;
> + struct sk_buff_head tx_fetch_ind_q;
>
> /* rx_status template */
> struct ieee80211_rx_status rx_status;
> @@ -1670,8 +1671,10 @@ struct ath10k_htt {
> bool enabled;
> struct htt_q_state *vaddr;
> dma_addr_t paddr;
> + u16 num_push_allowed;
> u16 num_peers;
> u16 num_tids;
> + enum htt_tx_mode_switch_mode mode;
> enum htt_q_depth_type type;
> } tx_q_state;
> };
> @@ -1761,6 +1764,9 @@ int ath10k_htt_tx_fetch_resp(struct ath10k *ar,
>
> void ath10k_htt_tx_txq_update(struct ieee80211_hw *hw,
> struct ieee80211_txq *txq);
> +void ath10k_htt_tx_txq_recalc(struct ieee80211_hw *hw,
> + struct ieee80211_txq *txq);
> +void ath10k_htt_tx_txq_sync(struct ath10k *ar);
> void ath10k_htt_tx_dec_pending(struct ath10k_htt *htt,
> bool is_mgmt);
> int ath10k_htt_tx_inc_pending(struct ath10k_htt *htt,
> diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c
> b/drivers/net/wireless/ath/ath10k/htt_rx.c
> index 6e3d95c95568..827c8369b589 100644
> --- a/drivers/net/wireless/ath/ath10k/htt_rx.c
> +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
> @@ -229,6 +229,7 @@ void ath10k_htt_rx_free(struct ath10k_htt *htt)
> skb_queue_purge(&htt->tx_compl_q);
> skb_queue_purge(&htt->rx_compl_q);
> skb_queue_purge(&htt->rx_in_ord_compl_q);
> + skb_queue_purge(&htt->tx_fetch_ind_q);
>
> ath10k_htt_rx_ring_free(htt);
>
> @@ -569,6 +570,7 @@ int ath10k_htt_rx_alloc(struct ath10k_htt *htt)
> skb_queue_head_init(&htt->tx_compl_q);
> skb_queue_head_init(&htt->rx_compl_q);
> skb_queue_head_init(&htt->rx_in_ord_compl_q);
> + skb_queue_head_init(&htt->tx_fetch_ind_q);
>
> tasklet_init(&htt->txrx_compl_task, ath10k_htt_txrx_compl_task,
> (unsigned long)htt);
> @@ -2004,16 +2006,21 @@ static void
> ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar,
>
> static void ath10k_htt_rx_tx_fetch_ind(struct ath10k *ar, struct sk_buff
> *skb)
> {
> + struct ieee80211_hw *hw = ar->hw;
> + struct ieee80211_txq *txq;
> struct htt_resp *resp = (struct htt_resp *)skb->data;
> struct htt_tx_fetch_record *record;
> size_t len;
> size_t max_num_bytes;
> size_t max_num_msdus;
> + size_t num_bytes;
> + size_t num_msdus;
> const __le32 *resp_ids;
> u16 num_records;
> u16 num_resp_ids;
> u16 peer_id;
> u8 tid;
> + int ret;
> int i;
>
> ath10k_dbg(ar, ATH10K_DBG_HTT, "htt rx tx fetch ind\n");
> @@ -2039,7 +2046,17 @@ static void ath10k_htt_rx_tx_fetch_ind(struct
> ath10k *ar, struct sk_buff *skb)
> num_records, num_resp_ids,
> le16_to_cpu(resp->tx_fetch_ind.fetch_seq_num));
>
> - /* TODO: runtime sanity checks */
> + if (!ar->htt.tx_q_state.enabled) {
> + ath10k_warn(ar, "received unexpected tx_fetch_ind event:
> not enabled\n");
> + return;
> + }
> +
> + if (ar->htt.tx_q_state.mode == HTT_TX_MODE_SWITCH_PUSH) {
> + ath10k_warn(ar, "received unexpected tx_fetch_ind event:
> in push mode\n");
> + return;
> + }
> +
> + rcu_read_lock();
>
> for (i = 0; i < num_records; i++) {
> record = &resp->tx_fetch_ind.records[i];
> @@ -2060,13 +2077,56 @@ static void ath10k_htt_rx_tx_fetch_ind(struct
> ath10k *ar, struct sk_buff *skb)
> continue;
> }
>
> - /* TODO: dequeue and submit tx to device */
> + spin_lock_bh(&ar->data_lock);
> + txq = ath10k_mac_txq_lookup(ar, peer_id, tid);
> + spin_unlock_bh(&ar->data_lock);
> +
> + /* It is okay to release the lock and use txq because RCU
> read
> + * lock is held.
> + */
> +
> + if (unlikely(!txq)) {
> + ath10k_warn(ar, "failed to lookup txq for peer_id
> %hu tid %hhu\n",
> + peer_id, tid);
> + continue;
> + }
> +
> + num_msdus = 0;
> + num_bytes = 0;
> +
> + while (num_msdus < max_num_msdus &&
> + num_bytes < max_num_bytes) {
> + ret = ath10k_mac_tx_push_txq(hw, txq);
> + if (ret < 0)
> + break;
> +
> + num_msdus++;
> + num_bytes += ret;
> + }
> +
> + record->num_msdus = cpu_to_le16(num_msdus);
> + record->num_bytes = cpu_to_le32(num_bytes);
> +
> + ath10k_htt_tx_txq_recalc(hw, txq);
> }
>
> + rcu_read_unlock();
> +
> resp_ids =
> ath10k_htt_get_tx_fetch_ind_resp_ids(&resp->tx_fetch_ind);
> ath10k_htt_rx_tx_fetch_resp_id_confirm(ar, resp_ids,
> num_resp_ids);
>
> - /* TODO: generate and send fetch response to device */
> + ret = ath10k_htt_tx_fetch_resp(ar,
> + resp->tx_fetch_ind.token,
> + resp->tx_fetch_ind.fetch_seq_num,
> + resp->tx_fetch_ind.records,
> + num_records);
> + if (unlikely(ret)) {
> + ath10k_warn(ar, "failed to submit tx fetch resp for token
> 0x%08x: %d\n",
> + le32_to_cpu(resp->tx_fetch_ind.token), ret);
> + /* FIXME: request fw restart */
> + }
> +
> + ath10k_htt_tx_txq_sync(ar);
> }
>
> static void ath10k_htt_rx_tx_fetch_confirm(struct ath10k *ar,
> @@ -2102,6 +2162,8 @@ static void ath10k_htt_rx_tx_mode_switch_ind(struct
> ath10k *ar,
> {
> const struct htt_resp *resp = (void *)skb->data;
> const struct htt_tx_mode_switch_record *record;
> + struct ieee80211_txq *txq;
> + struct ath10k_txq *artxq;
> size_t len;
> size_t num_records;
> enum htt_tx_mode_switch_mode mode;
> @@ -2153,7 +2215,11 @@ static void ath10k_htt_rx_tx_mode_switch_ind(struct
> ath10k *ar,
> if (!enable)
> return;
>
> - /* TODO: apply configuration */
> + ar->htt.tx_q_state.enabled = enable;
> + ar->htt.tx_q_state.mode = mode;
> + ar->htt.tx_q_state.num_push_allowed = threshold;
> +
> + rcu_read_lock();
>
> for (i = 0; i < num_records; i++) {
> record = &resp->tx_mode_switch_ind.records[i];
> @@ -2168,10 +2234,33 @@ static void
> ath10k_htt_rx_tx_mode_switch_ind(struct ath10k *ar,
> continue;
> }
>
> - /* TODO: apply configuration */
> + spin_lock_bh(&ar->data_lock);
> + txq = ath10k_mac_txq_lookup(ar, peer_id, tid);
> + spin_unlock_bh(&ar->data_lock);
> +
> + /* It is okay to release the lock and use txq because RCU
> read
> + * lock is held.
> + */
> +
> + if (unlikely(!txq)) {
> + ath10k_warn(ar, "failed to lookup txq for peer_id
> %hu tid %hhu\n",
> + peer_id, tid);
> + continue;
> + }
> +
> + spin_lock_bh(&ar->htt.tx_lock);
> + artxq = (void *)txq->drv_priv;
> + artxq->num_push_allowed =
> le16_to_cpu(record->num_max_msdus);
> + spin_unlock_bh(&ar->htt.tx_lock);
> }
>
> - /* TODO: apply configuration */
> + rcu_read_unlock();
> +
> + spin_lock_bh(&ar->htt.tx_lock);
> + ath10k_mac_tx_lock(ar, ATH10K_TX_PAUSE_Q_FLUSH_PENDING);
> + spin_unlock_bh(&ar->htt.tx_lock);
> +
Isn't it proved it break Mesh from working?
> + ath10k_mac_tx_push_pending(ar);
> }
>
> void ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
> @@ -2317,8 +2406,9 @@ void ath10k_htt_t2h_msg_handler(struct ath10k *ar,
> struct sk_buff *skb)
> case HTT_T2H_MSG_TYPE_AGGR_CONF:
> break;
> case HTT_T2H_MSG_TYPE_TX_FETCH_IND:
> - ath10k_htt_rx_tx_fetch_ind(ar, skb);
> - break;
> + skb_queue_tail(&htt->tx_fetch_ind_q, skb);
> + tasklet_schedule(&htt->txrx_compl_task);
> + return;
> case HTT_T2H_MSG_TYPE_TX_FETCH_CONFIRM:
> ath10k_htt_rx_tx_fetch_confirm(ar, skb);
> break;
> @@ -2356,21 +2446,32 @@ static void ath10k_htt_txrx_compl_task(unsigned
> long ptr)
> struct ath10k_htt *htt = (struct ath10k_htt *)ptr;
> struct ath10k *ar = htt->ar;
> struct sk_buff_head tx_q;
> + struct sk_buff_head tx_ind_q;
> struct htt_resp *resp;
> struct sk_buff *skb;
> unsigned long flags;
>
> __skb_queue_head_init(&tx_q);
> + __skb_queue_head_init(&tx_ind_q);
>
> spin_lock_irqsave(&htt->tx_compl_q.lock, flags);
> skb_queue_splice_init(&htt->tx_compl_q, &tx_q);
> spin_unlock_irqrestore(&htt->tx_compl_q.lock, flags);
>
> + spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags);
> + skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q);
> + spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags);
> +
> while ((skb = __skb_dequeue(&tx_q))) {
> ath10k_htt_rx_frm_tx_compl(htt->ar, skb);
> dev_kfree_skb_any(skb);
> }
>
> + while ((skb = __skb_dequeue(&tx_ind_q))) {
> + ath10k_htt_rx_tx_fetch_ind(ar, skb);
> + dev_kfree_skb_any(skb);
> + }
> +
> ath10k_mac_tx_push_pending(ar);
>
> spin_lock_bh(&htt->rx_ring.lock);
> diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c
> b/drivers/net/wireless/ath/ath10k/htt_tx.c
> index 5ef0a1b3773c..00877ff6861e 100644
> --- a/drivers/net/wireless/ath/ath10k/htt_tx.c
> +++ b/drivers/net/wireless/ath/ath10k/htt_tx.c
> @@ -62,6 +62,9 @@ static void __ath10k_htt_tx_txq_recalc(struct
> ieee80211_hw *hw,
> if (!ar->htt.tx_q_state.enabled)
> return;
>
> + if (ar->htt.tx_q_state.mode != HTT_TX_MODE_SWITCH_PUSH_PULL)
> + return;
> +
> if (txq->sta)
> peer_id = arsta->peer_id;
> else
> @@ -97,6 +100,9 @@ static void __ath10k_htt_tx_txq_sync(struct ath10k *ar)
> if (!ar->htt.tx_q_state.enabled)
> return;
>
> + if (ar->htt.tx_q_state.mode != HTT_TX_MODE_SWITCH_PUSH_PULL)
> + return;
> +
> seq = le32_to_cpu(ar->htt.tx_q_state.vaddr->seq);
> seq++;
> ar->htt.tx_q_state.vaddr->seq = cpu_to_le32(seq);
> @@ -111,6 +117,23 @@ static void __ath10k_htt_tx_txq_sync(struct ath10k
> *ar)
> DMA_TO_DEVICE);
> }
>
> +void ath10k_htt_tx_txq_recalc(struct ieee80211_hw *hw,
> + struct ieee80211_txq *txq)
> +{
> + struct ath10k *ar = hw->priv;
> +
> + spin_lock_bh(&ar->htt.tx_lock);
> + __ath10k_htt_tx_txq_recalc(hw, txq);
> + spin_unlock_bh(&ar->htt.tx_lock);
> +}
> +
> +void ath10k_htt_tx_txq_sync(struct ath10k *ar)
> +{
> + spin_lock_bh(&ar->htt.tx_lock);
> + __ath10k_htt_tx_txq_sync(ar);
> + spin_unlock_bh(&ar->htt.tx_lock);
> +}
> +
> void ath10k_htt_tx_txq_update(struct ieee80211_hw *hw,
> struct ieee80211_txq *txq)
> {
> @@ -634,10 +657,14 @@ int ath10k_htt_tx_fetch_resp(struct ath10k *ar,
> {
> struct sk_buff *skb;
> struct htt_cmd *cmd;
> - u16 resp_id;
> + const u16 resp_id = 0;
> int len = 0;
> int ret;
>
> + /* Response IDs are echo-ed back only for host driver convienence
> + * purposes. They aren't used for anything in the driver yet so
> use 0.
> + */
> +
> len += sizeof(cmd->hdr);
> len += sizeof(cmd->tx_fetch_resp);
> len += sizeof(cmd->tx_fetch_resp.records[0]) * num_records;
> @@ -646,11 +673,6 @@ int ath10k_htt_tx_fetch_resp(struct ath10k *ar,
> if (!skb)
> return -ENOMEM;
>
> - resp_id = 0; /* TODO: allocate resp_id */
> - ret = 0;
> - if (ret)
> - goto err_free_skb;
> -
> skb_put(skb, len);
> cmd = (struct htt_cmd *)skb->data;
> cmd->hdr.msg_type = HTT_H2T_MSG_TYPE_TX_FETCH_RESP;
> @@ -665,14 +687,11 @@ int ath10k_htt_tx_fetch_resp(struct ath10k *ar,
> ret = ath10k_htc_send(&ar->htc, ar->htt.eid, skb);
> if (ret) {
> ath10k_warn(ar, "failed to submit htc command: %d\n",
> ret);
> - goto err_free_resp_id;
> + goto err_free_skb;
> }
>
> return 0;
>
> -err_free_resp_id:
> - (void)resp_id; /* TODO: free resp_id */
> -
> err_free_skb:
> dev_kfree_skb_any(skb);
>
> diff --git a/drivers/net/wireless/ath/ath10k/mac.c
> b/drivers/net/wireless/ath/ath10k/mac.c
> index 0dc16de1d9fb..ee94c83b5128 100644
> --- a/drivers/net/wireless/ath/ath10k/mac.c
> +++ b/drivers/net/wireless/ath/ath10k/mac.c
> @@ -3629,11 +3629,25 @@ void ath10k_mgmt_over_wmi_tx_work(struct
> work_struct *work)
> static bool ath10k_mac_tx_can_push(struct ieee80211_hw *hw,
> struct ieee80211_txq *txq)
> {
> - return 1; /* TBD */
> + struct ath10k *ar = hw->priv;
> + struct ath10k_txq *artxq = (void *)txq->drv_priv;
> +
> + /* No need to get locks */
> +
> + if (ar->htt.tx_q_state.mode == HTT_TX_MODE_SWITCH_PUSH)
> + return true;
> +
> + if (ar->htt.num_pending_tx < ar->htt.tx_q_state.num_push_allowed)
> + return true;
> +
> + if (artxq->num_fw_queued < artxq->num_push_allowed)
> + return true;
> +
> + return false;
> }
>
> -static int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw,
> - struct ieee80211_txq *txq)
> +int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw,
> + struct ieee80211_txq *txq)
> {
> const bool is_mgmt = false;
> const bool is_presp = false;
> @@ -3645,6 +3659,7 @@ static int ath10k_mac_tx_push_txq(struct
> ieee80211_hw *hw,
> enum ath10k_hw_txrx_mode txmode;
> enum ath10k_mac_tx_path txpath;
> struct sk_buff *skb;
> + size_t skb_len;
> int ret;
>
> spin_lock_bh(&ar->htt.tx_lock);
> @@ -3665,6 +3680,7 @@ static int ath10k_mac_tx_push_txq(struct
> ieee80211_hw *hw,
>
> ath10k_mac_tx_h_fill_cb(ar, vif, txq, skb);
>
> + skb_len = skb->len;
> txmode = ath10k_mac_tx_h_get_txmode(ar, vif, sta, skb);
> txpath = ath10k_mac_tx_h_get_txpath(ar, skb, txmode);
>
> @@ -3683,7 +3699,7 @@ static int ath10k_mac_tx_push_txq(struct
> ieee80211_hw *hw,
> artxq->num_fw_queued++;
> spin_unlock_bh(&ar->htt.tx_lock);
>
> - return 0;
> + return skb_len;
> }
>
> static void ath10k_mac_tx_push(struct ieee80211_hw *hw,
> @@ -3693,7 +3709,7 @@ static void ath10k_mac_tx_push(struct ieee80211_hw
> *hw,
> int ret;
>
> ret = ath10k_mac_tx_push_txq(hw, txq);
> - if (unlikely(ret)) {
> + if (unlikely(ret < 0)) {
> if (txq->sta)
> ath10k_warn(ar, "failed to push tx to station %pM
> tid %hhu: %d\n",
> txq->sta->addr, txq->tid, ret);
> @@ -3738,7 +3754,7 @@ static void ath10k_mac_tx_push_pending_txq(struct
> ieee80211_hw *hw,
> break;
>
> ret = ath10k_mac_tx_push_txq(hw, txq);
> - if (ret)
> + if (ret < 0)
> break;
> }
>
> @@ -3820,6 +3836,26 @@ static void ath10k_mac_txq_unref(struct ath10k *ar,
> spin_unlock_bh(&ar->htt.tx_lock);
> }
>
> +struct ieee80211_txq *ath10k_mac_txq_lookup(struct ath10k *ar,
> + u16 peer_id,
> + u8 tid)
> +{
> + struct ath10k_peer *peer;
> +
> + lockdep_assert_held(&ar->data_lock);
> +
> + peer = ar->peer_map[peer_id];
> + if (!peer)
> + return NULL;
> +
> + if (peer->sta)
> + return peer->sta->txq[tid];
> + else if (peer->vif)
> + return peer->vif->txq;
> + else
> + return NULL;
> +}
> +
> /************/
> /* Scanning */
> /************/
> diff --git a/drivers/net/wireless/ath/ath10k/mac.h
> b/drivers/net/wireless/ath/ath10k/mac.h
> index 453f606a250e..2c3327beb445 100644
> --- a/drivers/net/wireless/ath/ath10k/mac.h
> +++ b/drivers/net/wireless/ath/ath10k/mac.h
> @@ -76,6 +76,11 @@ void ath10k_mac_vif_tx_lock(struct ath10k_vif *arvif,
> int reason);
> void ath10k_mac_vif_tx_unlock(struct ath10k_vif *arvif, int reason);
> bool ath10k_mac_tx_frm_has_freq(struct ath10k *ar);
> void ath10k_mac_tx_push_pending(struct ath10k *ar);
> +int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw,
> + struct ieee80211_txq *txq);
> +struct ieee80211_txq *ath10k_mac_txq_lookup(struct ath10k *ar,
> + u16 peer_id,
> + u8 tid);
>
> static inline struct ath10k_vif *ath10k_vif_to_arvif(struct ieee80211_vif
> *vif)
> {
More information about the ath10k
mailing list