[PATCH v2] wifi: mt76: fix potential memory leakage

lorenzo at kernel.org lorenzo at kernel.org
Thu Dec 22 01:44:54 PST 2022


> On Wed, 2022-12-21 at 19:02 +0100, Lorenzo Bianconi wrote:
> > On Dec 21, Felix Fietkau wrote:
> > > Hi Sujuan,
> > > 
> > > > Yes, it is so expensive, but if no memcopy, it will casue memory
> > > > fragmentation (we hit this issue in internal SQC).
> > > > 
> > > > as we know, wed needs to exchange rx pkt(belongs to wed rx buffer
> > > > manager) with wifi driver(dma rx data queue) by exchanging wfdma
> > > > dmad
> > > > to ensure the free wed rx buffer.
> > > > 
> > > > it is possiable that a large number of buffer has been exchanged
> > > > to wed
> > > > and can not come back to wlan driver. So, the memory from the
> > > > same 32K
> > > > page cache is unable to be released, and it will be failed at
> > > > page_frag_alloc in mt76_dma_rx_fill.
> > > > 
> > > > Any ideas but memcopy?
> > > 
> > > A simple solution would be to simply allocate single pages, or
> > > half-page fragments.
> > > 
> > > - Felix
> > > 
> > 
> > A simple approach would be allocating a single page for each rx
> > buffer.
> > 
> > @Sujuan: can you please double check if it is ok from performance and
> > memory
> > 	 fragmentation point of view? If not I guess we can try to
> > optimize it
> > 	 and allocate multiple buffers in the same page tweeking page
> > refcount.
> > 
> > (this patch must be applied on top of Felix's dma fix).
> > 
> 
> Allocating single page for each rx buffer avoids memory fragmentation,
> but it always uses 4K for one rx pkt which only needs 2K, right?

correct, we can optimize it allocating multiple buffers (in this case 2,
assuming 4K pages) in a single page and recycling the page.

> 
> I guess performance would be worse without page cache.

I think it is a trade-off

> We have tested on the mtk private driver, 7% drop in throughput when
> setting the 4K page cache compared to the 32K page cache.
> and 10% drop when use slab to allocate buffer.

IIUC you tested with PAGE_FRAG_CACHE_MAX_SIZE set to 4K (or with a private
page_frag_alloc() implementation) and you avoided memory allocation
failures due to fragmentation but you got ~ 7% drop in throughput, correct?
I think this is quite expected since we need to refill ~ 8 times more the
page cache.

Not considering memory fragmentation, have you measured the impact of the
memcpy of a full buffer?

> 
> A single page per rx buffer may cause a throughput drop of over 7% and
> waste memory, what do you think?

Implementing the page recycles as it is done in page_frag_alloc() we should get
the same results you got with PAGE_FRAG_CACHE_MAX_SIZE set to 4K.

Regards,
Lorenzo

> 
> Regards,
> Sujuan
> 
> > Regards,
> > Lorenzo
> > 
> > diff --git a/drivers/net/wireless/mediatek/mt76/dma.c
> > b/drivers/net/wireless/mediatek/mt76/dma.c
> > index 28a7fe064313..1d9e580977fc 100644
> > --- a/drivers/net/wireless/mediatek/mt76/dma.c
> > +++ b/drivers/net/wireless/mediatek/mt76/dma.c
> > @@ -580,6 +580,20 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev,
> > struct mt76_queue *q,
> >  	return ret;
> >  }
> >  
> > +static void *
> > +mt76_dma_get_rx_buf(struct mt76_queue *q)
> > +{
> > +	if ((q->flags & MT_QFLAG_WED) &&
> > +	    FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_RX) {
> > +		/* WED RX queue */
> > +		struct page *page = dev_alloc_page();
> > +
> > +		return page ? page_address(page) : NULL;
> > +	}
> > +
> > +	return page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC);
> > +}
> > +
> >  static int
> >  mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
> >  {
> > @@ -596,7 +610,7 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct
> > mt76_queue *q)
> >  		struct mt76_queue_buf qbuf;
> >  		void *buf = NULL;
> >  
> > -		buf = page_frag_alloc(&q->rx_page, q->buf_size,
> > GFP_ATOMIC);
> > +		buf = mt76_dma_get_rx_buf(q);
> >  		if (!buf)
> >  			break;
> >  
> > diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > index 1a2e4df8d1b5..2924e71e4fbe 100644
> > --- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > @@ -594,13 +594,9 @@ static void
> > mt7915_mmio_wed_offload_disable(struct mtk_wed_device *wed)
> >  static void mt7915_mmio_wed_release_rx_buf(struct mtk_wed_device
> > *wed)
> >  {
> >  	struct mt7915_dev *dev;
> > -	u32 length;
> >  	int i;
> >  
> >  	dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed);
> > -	length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size +
> > -				sizeof(struct skb_shared_info));
> > -
> >  	for (i = 0; i < dev->mt76.rx_token_size; i++) {
> >  		struct mt76_txwi_cache *t;
> >  
> > @@ -610,7 +606,7 @@ static void mt7915_mmio_wed_release_rx_buf(struct
> > mtk_wed_device *wed)
> >  
> >  		dma_unmap_single(dev->mt76.dma_dev, t->dma_addr,
> >  				 wed->wlan.rx_size, DMA_FROM_DEVICE);
> > -		__free_pages(virt_to_page(t->ptr), get_order(length));
> > +		free_page(virt_to_page(t->ptr));
> >  		t->ptr = NULL;
> >  
> >  		mt76_put_rxwi(&dev->mt76, t);
> > @@ -621,13 +617,9 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> > mtk_wed_device *wed, int size)
> >  {
> >  	struct mtk_rxbm_desc *desc = wed->rx_buf_ring.desc;
> >  	struct mt7915_dev *dev;
> > -	u32 length;
> >  	int i;
> >  
> >  	dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed);
> > -	length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size +
> > -				sizeof(struct skb_shared_info));
> > -
> >  	for (i = 0; i < size; i++) {
> >  		struct mt76_txwi_cache *t = mt76_get_rxwi(&dev->mt76);
> >  		dma_addr_t phy_addr;
> > @@ -635,7 +627,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> > mtk_wed_device *wed, int size)
> >  		int token;
> >  		void *ptr;
> >  
> > -		page = __dev_alloc_pages(GFP_KERNEL,
> > get_order(length));
> > +		page = __dev_alloc_page(GFP_KERNEL);
> >  		if (!page)
> >  			goto unmap;
> >  
> > @@ -644,7 +636,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> > mtk_wed_device *wed, int size)
> >  					  wed->wlan.rx_size,
> >  					  DMA_TO_DEVICE);
> >  		if (unlikely(dma_mapping_error(dev->mt76.dev,
> > phy_addr))) {
> > -			__free_pages(page, get_order(length));
> > +			free_page(page);
> >  			goto unmap;
> >  		}
> >  
> > @@ -653,7 +645,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> > mtk_wed_device *wed, int size)
> >  		if (token < 0) {
> >  			dma_unmap_single(dev->mt76.dma_dev, phy_addr,
> >  					 wed->wlan.rx_size,
> > DMA_TO_DEVICE);
> > -			__free_pages(page, get_order(length));
> > +			free_page(page);
> >  			goto unmap;
> >  		}
> >  
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 228 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-mediatek/attachments/20221222/51fa32fc/attachment-0001.sig>


More information about the Linux-mediatek mailing list