[PATCH 2/2] ath10k: do not use coherent memory for tx buffers

Peter Oh poh at codeaurora.org
Mon Nov 23 10:25:39 PST 2015


On 11/23/2015 10:02 AM, Sebastian Gottschall wrote:
> Am 23.11.2015 um 18:25 schrieb Peter Oh:
>> Hi,
>>
>> Have you measured the peak throughput?
>> The pre-allocated coherent memory concept was introduced as once of 
>> peak throughput improvement.
>> IIRC, dma_map_single takes about 4 us on Cortex A7 and 
>> dma_unmap_single also takes time to invalid cache.
>> Please share your tput number before and after, so I don't need to 
>> worry about performance degrade.
> yes. and this concept fucks up the qualcom ipq806x platform (which has 
> by default 2 QCA99XX cards). it does not work, since the preallocated 
> concept allocates too much memory
Can you specify the number that you're seeing?
The tx buffer descriptor uses 56KB, so it would require 64KB x 2 slab 
for 2 cards which is 128KB.
Are you saying your system does not work because of this 128KB usage?
> which is not available
> in dma space on that platform.
>
> thanks
> Sebastian
>>
>> Thanks,
>> Peter
>>
>> On 11/23/2015 05:18 AM, Felix Fietkau wrote:
>>> Coherent memory is expensive to access, since all memory accesses 
>>> bypass
>>> the cache. It is also completely unnecessary for this case.
>>> Convert to mapped memory instead and use the DMA API to flush the cache
>>> where necessary.
>>> Fixes allocation failures on embedded devices.
>>>
>>> Signed-off-by: Felix Fietkau <nbd at openwrt.org>
>>> ---
>>>   drivers/net/wireless/ath/ath10k/htt_tx.c | 77 
>>> +++++++++++++++++++++-----------
>>>   1 file changed, 51 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c 
>>> b/drivers/net/wireless/ath/ath10k/htt_tx.c
>>> index 8f76b9d..99d9793 100644
>>> --- a/drivers/net/wireless/ath/ath10k/htt_tx.c
>>> +++ b/drivers/net/wireless/ath/ath10k/htt_tx.c
>>> @@ -100,7 +100,7 @@ void ath10k_htt_tx_free_msdu_id(struct 
>>> ath10k_htt *htt, u16 msdu_id)
>>>   int ath10k_htt_tx_alloc(struct ath10k_htt *htt)
>>>   {
>>>       struct ath10k *ar = htt->ar;
>>> -    int ret, size;
>>> +    int size;
>>>         ath10k_dbg(ar, ATH10K_DBG_BOOT, "htt tx max num pending tx 
>>> %d\n",
>>>              htt->max_num_pending_tx);
>>> @@ -109,39 +109,41 @@ int ath10k_htt_tx_alloc(struct ath10k_htt *htt)
>>>       idr_init(&htt->pending_tx);
>>>         size = htt->max_num_pending_tx * sizeof(struct 
>>> ath10k_htt_txbuf);
>>> -    htt->txbuf.vaddr = dma_alloc_coherent(ar->dev, size,
>>> -                          &htt->txbuf.paddr,
>>> -                          GFP_DMA);
>>> -    if (!htt->txbuf.vaddr) {
>>> -        ath10k_err(ar, "failed to alloc tx buffer\n");
>>> -        ret = -ENOMEM;
>>> +    htt->txbuf.vaddr = kzalloc(size, GFP_KERNEL);
>>> +    if (!htt->txbuf.vaddr)
>>>           goto free_idr_pending_tx;
>>> -    }
>>> +
>>> +    htt->txbuf.paddr = dma_map_single(ar->dev, htt->txbuf.vaddr, size,
>>> +                      DMA_TO_DEVICE);
>>> +    if (dma_mapping_error(ar->dev, htt->txbuf.paddr))
>>> +        goto free_txbuf_vaddr;
>>>         if (!ar->hw_params.continuous_frag_desc)
>>> -        goto skip_frag_desc_alloc;
>>> +        return 0;
>>>         size = htt->max_num_pending_tx * sizeof(struct 
>>> htt_msdu_ext_desc);
>>> -    htt->frag_desc.vaddr = dma_alloc_coherent(ar->dev, size,
>>> -                          &htt->frag_desc.paddr,
>>> -                          GFP_DMA);
>>> -    if (!htt->frag_desc.vaddr) {
>>> -        ath10k_warn(ar, "failed to alloc fragment desc memory\n");
>>> -        ret = -ENOMEM;
>>> +    htt->frag_desc.vaddr = kzalloc(size, GFP_KERNEL);
>>> +    if (!htt->frag_desc.vaddr)
>>>           goto free_txbuf;
>>> -    }
>>>   -skip_frag_desc_alloc:
>>> +    htt->frag_desc.paddr = dma_map_single(ar->dev, 
>>> htt->frag_desc.vaddr,
>>> +                          size, DMA_TO_DEVICE);
>>> +    if (dma_mapping_error(ar->dev, htt->txbuf.paddr))
>>> +        goto free_frag_desc;
>>> +
>>>       return 0;
>>>   +free_frag_desc:
>>> +    kfree(htt->frag_desc.vaddr);
>>>   free_txbuf:
>>>       size = htt->max_num_pending_tx *
>>>                 sizeof(struct ath10k_htt_txbuf);
>>> -    dma_free_coherent(htt->ar->dev, size, htt->txbuf.vaddr,
>>> -              htt->txbuf.paddr);
>>> +    dma_unmap_single(htt->ar->dev, htt->txbuf.paddr, size, 
>>> DMA_TO_DEVICE);
>>> +free_txbuf_vaddr:
>>> +    kfree(htt->txbuf.vaddr);
>>>   free_idr_pending_tx:
>>>       idr_destroy(&htt->pending_tx);
>>> -    return ret;
>>> +    return -ENOMEM;
>>>   }
>>>     static int ath10k_htt_tx_clean_up_pending(int msdu_id, void 
>>> *skb, void *ctx)
>>> @@ -170,15 +172,17 @@ void ath10k_htt_tx_free(struct ath10k_htt *htt)
>>>       if (htt->txbuf.vaddr) {
>>>           size = htt->max_num_pending_tx *
>>>                     sizeof(struct ath10k_htt_txbuf);
>>> -        dma_free_coherent(htt->ar->dev, size, htt->txbuf.vaddr,
>>> -                  htt->txbuf.paddr);
>>> +        dma_unmap_single(htt->ar->dev, htt->txbuf.paddr, size,
>>> +                 DMA_TO_DEVICE);
>>> +        kfree(htt->txbuf.vaddr);
>>>       }
>>>         if (htt->frag_desc.vaddr) {
>>>           size = htt->max_num_pending_tx *
>>>                     sizeof(struct htt_msdu_ext_desc);
>>> -        dma_free_coherent(htt->ar->dev, size, htt->frag_desc.vaddr,
>>> -                  htt->frag_desc.paddr);
>>> +        dma_unmap_single(htt->ar->dev, htt->frag_desc.paddr, size,
>>> +                 DMA_TO_DEVICE);
>>> +        kfree(htt->frag_desc.vaddr);
>>>       }
>>>   }
>>>   @@ -550,6 +554,7 @@ int ath10k_htt_tx(struct ath10k_htt *htt, 
>>> struct sk_buff *msdu)
>>>       struct htt_msdu_ext_desc *ext_desc = NULL;
>>>       bool limit_mgmt_desc = false;
>>>       bool is_probe_resp = false;
>>> +    int txbuf_offset, frag_offset, frag_size;
>>>         if (unlikely(ieee80211_is_mgmt(hdr->frame_control)) &&
>>>           ar->hw_params.max_probe_resp_desc_thres) {
>>> @@ -574,9 +579,11 @@ int ath10k_htt_tx(struct ath10k_htt *htt, 
>>> struct sk_buff *msdu)
>>>       prefetch_len = min(htt->prefetch_len, msdu->len);
>>>       prefetch_len = roundup(prefetch_len, 4);
>>>   +    frag_size = sizeof(struct htt_msdu_ext_desc);
>>> +    frag_offset = frag_size * msdu_id;
>>> +    txbuf_offset = sizeof(struct ath10k_htt_txbuf) * msdu_id;
>>>       skb_cb->htt.txbuf = &htt->txbuf.vaddr[msdu_id];
>>> -    skb_cb->htt.txbuf_paddr = htt->txbuf.paddr +
>>> -        (sizeof(struct ath10k_htt_txbuf) * msdu_id);
>>> +    skb_cb->htt.txbuf_paddr = htt->txbuf.paddr + txbuf_offset;
>>>         if ((ieee80211_is_action(hdr->frame_control) ||
>>>            ieee80211_is_deauth(hdr->frame_control) ||
>>> @@ -597,6 +604,15 @@ int ath10k_htt_tx(struct ath10k_htt *htt, 
>>> struct sk_buff *msdu)
>>>           goto err_free_msdu_id;
>>>       }
>>>   +    dma_sync_single_range_for_cpu(dev, htt->txbuf.paddr, 
>>> txbuf_offset,
>>> +                      sizeof(struct ath10k_htt_txbuf),
>>> +                      DMA_TO_DEVICE);
>>> +
>>> +    if (ar->hw_params.continuous_frag_desc)
>>> +        dma_sync_single_range_for_cpu(dev, htt->frag_desc.paddr,
>>> +                          frag_offset, frag_size,
>>> +                          DMA_TO_DEVICE);
>>> +
>>>       switch (skb_cb->txmode) {
>>>       case ATH10K_HW_TXRX_RAW:
>>>       case ATH10K_HW_TXRX_NATIVE_WIFI:
>>> @@ -723,6 +739,15 @@ int ath10k_htt_tx(struct ath10k_htt *htt, 
>>> struct sk_buff *msdu)
>>>       sg_items[1].paddr = skb_cb->paddr;
>>>       sg_items[1].len = prefetch_len;
>>>   +    if (ar->hw_params.continuous_frag_desc)
>>> +        dma_sync_single_range_for_device(dev, htt->frag_desc.paddr,
>>> +                         frag_offset, frag_size,
>>> +                         DMA_TO_DEVICE);
>>> +
>>> +    dma_sync_single_range_for_device(dev, htt->txbuf.paddr, 
>>> txbuf_offset,
>>> +                     sizeof(struct ath10k_htt_txbuf),
>>> +                     DMA_TO_DEVICE);
>>> +
>>>       res = ath10k_hif_tx_sg(htt->ar,
>>> htt->ar->htc.endpoint[htt->eid].ul_pipe_id,
>>>                      sg_items, ARRAY_SIZE(sg_items));
>>
>>
>> _______________________________________________
>> ath10k mailing list
>> ath10k at lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/ath10k
>>
>
>
> _______________________________________________
> ath10k mailing list
> ath10k at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/ath10k




More information about the ath10k mailing list