[PATCH 09/11] ath10k_sdio: virtual scatter gather for receive
Erik Stromdahl
erik.stromdahl at gmail.com
Wed Oct 4 12:56:13 PDT 2017
On 2017-09-30 19:37, silexcommon at gmail.com wrote:
> From: Alagu Sankar <alagusankar at silex-india.com>
>
> The existing implementation of initiating multiple sdio transfers for
> receive bundling is slowing down the receive speed. Combining the
> transfers using a scatter gather method would be ideal. This results in
> significant performance improvement.
>
> Since the sg implementation for sdio transfers are not reliable due to
> buffer start and size alignment, a virtual scatter gather implementation
> is used.
>
> Signed-off-by: Alagu Sankar <alagusankar at silex-india.com>
> ---
> drivers/net/wireless/ath/ath10k/htc.h | 1 +
> drivers/net/wireless/ath/ath10k/sdio.c | 122 ++++++++++++++++++++++++---------
> drivers/net/wireless/ath/ath10k/sdio.h | 5 +-
> 3 files changed, 93 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/net/wireless/ath/ath10k/htc.h b/drivers/net/wireless/ath/ath10k/htc.h
> index 24663b0..5d87908 100644
> --- a/drivers/net/wireless/ath/ath10k/htc.h
> +++ b/drivers/net/wireless/ath/ath10k/htc.h
> @@ -58,6 +58,7 @@ enum ath10k_htc_tx_flags {
> };
>
> enum ath10k_htc_rx_flags {
> + ATH10K_HTC_FLAGS_RECV_1MORE_BLOCK = 0x01,
> ATH10K_HTC_FLAG_TRAILER_PRESENT = 0x02,
> ATH10K_HTC_FLAG_BUNDLE_MASK = 0xF0
> };
> diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c
> index bb6fa67..45df9db 100644
> --- a/drivers/net/wireless/ath/ath10k/sdio.c
> +++ b/drivers/net/wireless/ath/ath10k/sdio.c
> @@ -35,6 +35,7 @@
> #include "sdio.h"
>
> #define ATH10K_SDIO_DMA_BUF_SIZE (32 * 1024)
> +#define ATH10K_SDIO_VSG_BUF_SIZE (32 * 1024)
>
> static int ath10k_sdio_read(struct ath10k *ar, u32 addr, void *buf,
> u32 len, bool incr);
> @@ -430,6 +431,7 @@ static int ath10k_sdio_mbox_rx_process_packet(struct ath10k *ar,
> int ret;
>
> payload_len = le16_to_cpu(htc_hdr->len);
> + skb->len = payload_len + sizeof(struct ath10k_htc_hdr);
>
> if (trailer_present) {
> trailer = skb->data + sizeof(*htc_hdr) +
> @@ -468,12 +470,13 @@ static int ath10k_sdio_mbox_rx_process_packets(struct ath10k *ar,
> enum ath10k_htc_ep_id id;
> int ret, i, *n_lookahead_local;
> u32 *lookaheads_local;
> + int lookahd_idx = 0;
I think the variable should be named *lookahead_idx* instead of *lookahd_idx*,
since all other variables are using the string lookahead without abbreviations.
>
> for (i = 0; i < ar_sdio->n_rx_pkts; i++) {
> lookaheads_local = lookaheads;
> n_lookahead_local = n_lookahead;
>
> - id = ((struct ath10k_htc_hdr *)&lookaheads[i])->eid;
> + id = ((struct ath10k_htc_hdr *)&lookaheads[lookahd_idx++])->eid;
>
> if (id >= ATH10K_HTC_EP_COUNT) {
> ath10k_warn(ar, "invalid endpoint in look-ahead: %d\n",
> @@ -496,6 +499,7 @@ static int ath10k_sdio_mbox_rx_process_packets(struct ath10k *ar,
> /* Only read lookahead's from RX trailers
> * for the last packet in a bundle.
> */
> + lookahd_idx--;
> lookaheads_local = NULL;
> n_lookahead_local = NULL;
> }
> @@ -529,11 +533,11 @@ static int ath10k_sdio_mbox_rx_process_packets(struct ath10k *ar,
> return ret;
> }
>
> -static int ath10k_sdio_mbox_alloc_pkt_bundle(struct ath10k *ar,
> - struct ath10k_sdio_rx_data *rx_pkts,
> - struct ath10k_htc_hdr *htc_hdr,
> - size_t full_len, size_t act_len,
> - size_t *bndl_cnt)
> +static int ath10k_sdio_mbox_alloc_bundle(struct ath10k *ar,
> + struct ath10k_sdio_rx_data *rx_pkts,
> + struct ath10k_htc_hdr *htc_hdr,
> + size_t full_len, size_t act_len,
> + size_t *bndl_cnt)
> {
> int ret, i;
>
> @@ -574,6 +578,7 @@ static int ath10k_sdio_mbox_rx_alloc(struct ath10k *ar,
> size_t full_len, act_len;
> bool last_in_bundle;
> int ret, i;
> + int pkt_cnt = 0;
>
> if (n_lookaheads > ATH10K_SDIO_MAX_RX_MSGS) {
> ath10k_warn(ar,
> @@ -616,16 +621,22 @@ static int ath10k_sdio_mbox_rx_alloc(struct ath10k *ar,
> * optimally fetched as a full bundle.
> */
> size_t bndl_cnt;
> -
> - ret = ath10k_sdio_mbox_alloc_pkt_bundle(ar,
> - &ar_sdio->rx_pkts[i],
> - htc_hdr,
> - full_len,
> - act_len,
> - &bndl_cnt);
> -
> - n_lookaheads += bndl_cnt;
> - i += bndl_cnt;
> + struct ath10k_sdio_rx_data *rx_pkts =
> + &ar_sdio->rx_pkts[pkt_cnt];
> +
> + ret = ath10k_sdio_mbox_alloc_bundle(ar,
> + rx_pkts,
> + htc_hdr,
> + full_len,
> + act_len,
> + &bndl_cnt);
> +
> + if (ret) {
> + ath10k_warn(ar, "alloc_bundle error %d\n", ret);
> + goto err;
> + }
> +
> + pkt_cnt += bndl_cnt;
> /*Next buffer will be the last in the bundle */
> last_in_bundle = true;
> }
> @@ -634,14 +645,18 @@ static int ath10k_sdio_mbox_rx_alloc(struct ath10k *ar,
> * ATH10K_HTC_FLAG_BUNDLE_MASK flag set, all bundled
> * packet skb's have been allocated in the previous step.
> */
> - ret = ath10k_sdio_mbox_alloc_rx_pkt(&ar_sdio->rx_pkts[i],
> + if (htc_hdr->flags & ATH10K_HTC_FLAGS_RECV_1MORE_BLOCK)
> + full_len += ATH10K_HIF_MBOX_BLOCK_SIZE;
> +
> + ret = ath10k_sdio_mbox_alloc_rx_pkt(&ar_sdio->rx_pkts[pkt_cnt],
> act_len,
> full_len,
> last_in_bundle,
> last_in_bundle);
> + pkt_cnt++;
> }
>
> - ar_sdio->n_rx_pkts = i;
> + ar_sdio->n_rx_pkts = pkt_cnt;
>
> return 0;
>
> @@ -655,41 +670,71 @@ static int ath10k_sdio_mbox_rx_alloc(struct ath10k *ar,
> return ret;
> }
>
> -static int ath10k_sdio_mbox_rx_packet(struct ath10k *ar,
> - struct ath10k_sdio_rx_data *pkt)
> +static int ath10k_sdio_mbox_rx_fetch(struct ath10k *ar)
> {
> struct ath10k_sdio *ar_sdio = ath10k_sdio_priv(ar);
> - struct sk_buff *skb = pkt->skb;
> + struct ath10k_sdio_rx_data *pkt = &ar_sdio->rx_pkts[0];
> + struct sk_buff *skb;
> int ret;
>
> + skb = pkt->skb;
> ret = ath10k_sdio_read(ar, ar_sdio->mbox_info.htc_addr,
> skb->data, pkt->alloc_len, false);
> - pkt->status = ret;
> - if (!ret)
> + if (ret) {
> + ar_sdio->n_rx_pkts = 0;
> + ath10k_sdio_mbox_free_rx_pkt(pkt);
> + } else {
> + pkt->status = ret;
> skb_put(skb, pkt->act_len);
> + }
>
> return ret;
> }
>
> -static int ath10k_sdio_mbox_rx_fetch(struct ath10k *ar)
> +static int ath10k_sdio_mbox_rx_fetch_bundle(struct ath10k *ar)
> {
> struct ath10k_sdio *ar_sdio = ath10k_sdio_priv(ar);
> + struct ath10k_sdio_rx_data *pkt;
> int ret, i;
> + u32 pkt_offset, virt_pkt_len;
>
> + virt_pkt_len = 0;
> for (i = 0; i < ar_sdio->n_rx_pkts; i++) {
> - ret = ath10k_sdio_mbox_rx_packet(ar,
> - &ar_sdio->rx_pkts[i]);
> - if (ret)
> + virt_pkt_len += ar_sdio->rx_pkts[i].alloc_len;
> + }
> + if (virt_pkt_len < ATH10K_SDIO_DMA_BUF_SIZE) {
> + ret = ath10k_sdio_read(ar, ar_sdio->mbox_info.htc_addr,
> + ar_sdio->vsg_buffer, virt_pkt_len,
> + false);
> + if (ret) {
> + i = 0;
> goto err;
> + }
> + } else {
> + ath10k_err(ar, "size exceeding limit %d\n", virt_pkt_len);
> + }
> +
> + pkt_offset = 0;
> + for (i = 0; i < ar_sdio->n_rx_pkts; i++) {
> + struct sk_buff *skb = ar_sdio->rx_pkts[i].skb;
> +
> + pkt = &ar_sdio->rx_pkts[i];
> + memcpy(skb->data, ar_sdio->vsg_buffer + pkt_offset,
> + pkt->alloc_len);
> + pkt->status = 0;
> + skb_put(skb, pkt->act_len);
> + pkt_offset += pkt->alloc_len;
> }
>
> return 0;
>
> err:
> /* Free all packets that was not successfully fetched. */
Change comment to: /* Free all packets */
since all packets are freed and not only those that was not successfully fetched.
> - for (; i < ar_sdio->n_rx_pkts; i++)
> + for (i = 0; i < ar_sdio->n_rx_pkts; i++)
> ath10k_sdio_mbox_free_rx_pkt(&ar_sdio->rx_pkts[i]);
>
> + ar_sdio->n_rx_pkts = 0;
> +
> return ret;
> }
>
> @@ -732,7 +777,10 @@ static int ath10k_sdio_mbox_rxmsg_pending_handler(struct ath10k *ar,
> */
> *done = false;
>
> - ret = ath10k_sdio_mbox_rx_fetch(ar);
> + if (ar_sdio->n_rx_pkts > 1)
> + ret = ath10k_sdio_mbox_rx_fetch_bundle(ar);
> + else
> + ret = ath10k_sdio_mbox_rx_fetch(ar);
ret is not checked at all (I noticed this is the case in the current code as well).
I think it would be wise to break the loop if error:
if (ret)
break;
>
> /* Process fetched packets. This will potentially update
> * n_lookaheads depending on if the packets contain lookahead
> @@ -1136,7 +1184,7 @@ static int ath10k_sdio_bmi_get_rx_lookahead(struct ath10k *ar)
> MBOX_HOST_INT_STATUS_ADDRESS,
> &rx_word);
> if (ret) {
> - ath10k_warn(ar, "unable to read RX_LOOKAHEAD_VALID: %d\n", ret);
> + ath10k_warn(ar, "unable to read rx_lookahd: %d\n", ret);
Change print to "unable to read RX lookahead: %d\n" as it is more descriptive
> return ret;
> }
>
> @@ -1480,7 +1528,7 @@ static int ath10k_sdio_hif_tx_sg(struct ath10k *ar, u8 pipe_id,
> skb = items[i].transfer_context;
> padded_len = ath10k_sdio_calc_txrx_padded_len(ar_sdio,
> skb->len);
> - skb_trim(skb, padded_len);
> + skb->len = padded_len;
Why this change?
I think the skb_ family of functions is the preferred way to manipulate skb's
>
> /* Write TX data to the end of the mbox address space */
> address = ar_sdio->mbox_addr[eid] + ar_sdio->mbox_size[eid] -
> @@ -1508,7 +1556,8 @@ static int ath10k_sdio_hif_enable_intrs(struct ath10k *ar)
> /* Enable all but CPU interrupts */
> regs->int_status_en = FIELD_PREP(MBOX_INT_STATUS_ENABLE_ERROR_MASK, 1) |
> FIELD_PREP(MBOX_INT_STATUS_ENABLE_CPU_MASK, 1) |
> - FIELD_PREP(MBOX_INT_STATUS_ENABLE_COUNTER_MASK, 1);
> + FIELD_PREP(MBOX_INT_STATUS_ENABLE_COUNTER_MASK,
> + 1);
Is this a checkpatch-fix?
I would recommend creating a separate patch for style issues.
>
> /* NOTE: There are some cases where HIF can do detection of
> * pending mbox messages which is disabled now.
> @@ -2024,6 +2073,12 @@ static int ath10k_sdio_probe(struct sdio_func *func,
> goto err_free_bmi_buf;
> }
>
> + ar_sdio->vsg_buffer = kzalloc(ATH10K_SDIO_VSG_BUF_SIZE, GFP_KERNEL);
> + if (!ar_sdio->vsg_buffer) {
> + ret = -ENOMEM;
> + goto err_free_bmi_buf;
> + }
> +
> ar_sdio->func = func;
> sdio_set_drvdata(func, ar_sdio);
>
> @@ -2081,7 +2136,7 @@ static int ath10k_sdio_probe(struct sdio_func *func,
> }
>
> /* TODO: remove this once SDIO support is fully implemented */
> - ath10k_warn(ar, "WARNING: ath10k SDIO support is incomplete, don't expect anything to work!\n");
> + ath10k_warn(ar, "WARNING: ath10k SDIO support is experimental\n");
>
> return 0;
>
> @@ -2115,6 +2170,7 @@ static void ath10k_sdio_remove(struct sdio_func *func)
> ath10k_core_unregister(ar);
> ath10k_core_destroy(ar);
> kfree(ar_sdio->dma_buffer);
> + kfree(ar_sdio->vsg_buffer);
> }
>
> static const struct sdio_device_id ath10k_sdio_devices[] = {
> diff --git a/drivers/net/wireless/ath/ath10k/sdio.h b/drivers/net/wireless/ath/ath10k/sdio.h
> index 718b8b7..8b6a86a 100644
> --- a/drivers/net/wireless/ath/ath10k/sdio.h
> +++ b/drivers/net/wireless/ath/ath10k/sdio.h
> @@ -149,8 +149,8 @@ struct ath10k_sdio_irq_proc_regs {
> u8 rx_lookahead_valid;
> u8 host_int_status2;
> u8 gmbox_rx_avail;
> - __le32 rx_lookahead[2];
> - __le32 rx_gmbox_lookahead_alias[2];
> + __le32 rx_lookahead[2 * ATH10K_HIF_MBOX_NUM_MAX];
> + __le32 int_status_enable;
> };
>
> struct ath10k_sdio_irq_enable_regs {
> @@ -207,6 +207,7 @@ struct ath10k_sdio {
> struct ath10k *ar;
> struct ath10k_sdio_irq_data irq_data;
>
> + u8 *vsg_buffer;
> u8 *dma_buffer;
>
> /* protects access to dma_buffer */
>
More information about the ath10k
mailing list