[PATCH 3/5] wifi: mt76: usb: add optional RX aggregation support

Lorenzo Bianconi lorenzo at kernel.org
Mon Jun 15 00:08:07 PDT 2026


On Jun 13, Sean Wang wrote:
> From: Sean Wang <sean.wang at mediatek.com>
> 
> Add common USB RX aggregation support and let drivers opt in by programming
> the UDMA RX aggregation limit and timeout.
> 
> RX aggregation allows the device to pack multiple RX packets into one USB
> transfer, reducing URB completion rate, USB interrupt/IO overhead, and host
> RX scheduling pressure. This is especially useful at high throughput, where
> per-packet USB RX handling can become a CPU bottleneck.
> 
> Keep it disabled by default so existing USB drivers keep the current RX
> behavior unless they explicitly enable aggregation.
> 
> Signed-off-by: Sean Wang <sean.wang at mediatek.com>
> ---
>  drivers/net/wireless/mediatek/mt76/mt76.h     |  21 ++-
>  .../net/wireless/mediatek/mt76/mt7925/usb.c   |  12 ++
>  .../net/wireless/mediatek/mt76/mt792x_usb.c   |  23 +++-
>  drivers/net/wireless/mediatek/mt76/usb.c      | 124 +++++++++++++++++-
>  4 files changed, 169 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> index 81740aa7df71..125c97dc1f28 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> @@ -680,6 +680,13 @@ struct mt76_usb {
>  	void (*ctrl_timeout)(struct mt76_dev *dev, int err);
>  	bool sg_en;
>  
> +	struct {
> +		bool enable;
> +		int align;
> +		int padding;
> +		int buf_size;
> +	} rx_aggr;
> +
>  	struct mt76u_mcu {
>  		u8 *data;
>  		/* multiple reads */
> @@ -1857,6 +1864,17 @@ mt76u_bulk_msg(struct mt76_dev *dev, void *data, int len, int *actual_len,
>  	return usb_bulk_msg(udev, pipe, data, len, actual_len, timeout);
>  }
>  
> +static inline int
> +mt76u_rx_aggr_buf_size(int max_mpdu, int aggr_limit, int aggr_pkt_limit,
> +		       int padding)
> +{
> +	int aggr_size;
> +
> +	aggr_size = min(aggr_limit, aggr_pkt_limit * (max_mpdu + padding));
> +
> +	return PAGE_ALIGN(max_mpdu + aggr_size);
> +}
> +
>  void mt76_ethtool_page_pool_stats(struct mt76_dev *dev, u64 *data, int *index);
>  void mt76_ethtool_worker(struct mt76_ethtool_worker_info *wi,
>  			 struct mt76_sta_stats *stats, bool eht);
> @@ -1882,7 +1900,8 @@ void mt76u_stop_tx(struct mt76_dev *dev);
>  void mt76u_stop_rx(struct mt76_dev *dev);
>  int mt76u_resume_rx(struct mt76_dev *dev);
>  void mt76u_queues_deinit(struct mt76_dev *dev);
> -
> +void mt76u_enable_rx_aggr(struct mt76_dev *dev, int align, int padding,
> +			  int buf_size);
>  int mt76s_init(struct mt76_dev *dev, struct sdio_func *func,
>  	       const struct mt76_bus_ops *bus_ops);
>  int mt76s_alloc_rx_queue(struct mt76_dev *dev, enum mt76_rxq_id qid);
> diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/usb.c b/drivers/net/wireless/mediatek/mt76/mt7925/usb.c
> index 49ad4fe9eb1b..a0bfe6f09ae4 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt7925/usb.c
> +++ b/drivers/net/wireless/mediatek/mt76/mt7925/usb.c
> @@ -3,12 +3,24 @@
>  
>  #include <linux/kernel.h>
>  #include <linux/module.h>
> +#include <linux/sizes.h>
>  #include <linux/usb.h>
>  
>  #include "mt7925.h"
>  #include "mcu.h"
>  #include "mac.h"
>  
> +#define MT7927_USB_RX_AGGR_ALIGN	16
> +#define MT7927_USB_RX_AGGR_PADDING	12
> +#define MT7927_USB_RX_AGGR_LIMIT	SZ_32K
> +#define MT7927_USB_RX_AGGR_PKT_LIMIT	30
> +#define MT7927_USB_RX_MAX_MPDU		(13 * SZ_1K)
> +#define MT7927_USB_RX_AGGR_BUF_SIZE \
> +	mt76u_rx_aggr_buf_size(MT7927_USB_RX_MAX_MPDU, \
> +			       MT7927_USB_RX_AGGR_LIMIT, \
> +			       MT7927_USB_RX_AGGR_PKT_LIMIT, \
> +			       MT7927_USB_RX_AGGR_PADDING)

If I do the math correctly, it will use an order-5 buffer for each urb, right?
If so, this approach is not very unrecommended, please take a look at [0]

[0] https://lore.kernel.org/netdev/CANn89iJsNWkWzAJbOvaBNjozuLOQBcpVo1bnvfeGq5Zm6h9e=Q@mail.gmail.com/

> +
>  static const struct usb_device_id mt7925u_device_table[] = {
>  	{ USB_DEVICE_AND_INTERFACE_INFO(0x0e8d, 0x6639, 0xff, 0xff, 0xff),
>  		.driver_info = (kernel_ulong_t)MT7925_FIRMWARE_WM },
> diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c
> index 6280bc4bf78d..769e828e9449 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c
> +++ b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c
> @@ -13,6 +13,9 @@
>  
>  #define MT792X_USB_TX_TIMEOUT_LIMIT	50000
>  #define MT792X_USB_UDMA_IDLE_TIMEOUT	1000
> +#define MT792X_USB_RX_AGG_LIMIT		32
> +#define MT792X_USB_RX_AGG_TIMEOUT	100
> +#define MT792X_USB_RX_AGG_PKT_LIMIT	30
>  
>  static int mt792xu_read32(struct mt76_dev *dev, u32 addr, void *buf)
>  {
> @@ -403,9 +406,23 @@ int mt792xu_dma_init(struct mt792x_dev *dev, bool resume)
>  		 FIELD_PREP(MT_WL_TX_TMOUT_LMT,
>  			    MT792X_USB_TX_TIMEOUT_LIMIT));
>  	mt76_set(dev, MT_UDMA_WLCFG_0, MT_WL_TX_TMOUT_FUNC_EN);
> -	mt76_clear(dev, MT_UDMA_WLCFG_0,
> -		   MT_WL_RX_AGG_TO | MT_WL_RX_AGG_LMT);
> -	mt76_clear(dev, MT_UDMA_WLCFG_1, MT_WL_RX_AGG_PKT_LMT);
> +
> +	if (dev->mt76.usb.rx_aggr.enable) {
> +		mt76_set(dev, MT_UDMA_WLCFG_0, MT_WL_RX_AGG_EN);
> +		mt76_rmw(dev, MT_UDMA_WLCFG_0,
> +			 MT_WL_RX_AGG_TO | MT_WL_RX_AGG_LMT,
> +			 FIELD_PREP(MT_WL_RX_AGG_TO,
> +				    MT792X_USB_RX_AGG_TIMEOUT) |
> +			 FIELD_PREP(MT_WL_RX_AGG_LMT,
> +				    MT792X_USB_RX_AGG_LIMIT));
> +		mt76_rmw(dev, MT_UDMA_WLCFG_1, MT_WL_RX_AGG_PKT_LMT,
> +			 FIELD_PREP(MT_WL_RX_AGG_PKT_LMT,
> +				    MT792X_USB_RX_AGG_PKT_LIMIT));
> +	} else {
> +		mt76_clear(dev, MT_UDMA_WLCFG_0, MT_WL_RX_AGG_EN |
> +			   MT_WL_RX_AGG_TO | MT_WL_RX_AGG_LMT);
> +		mt76_clear(dev, MT_UDMA_WLCFG_1, MT_WL_RX_AGG_PKT_LMT);
> +	}
>  
>  	if (resume)
>  		return 0;
> diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
> index cab36630c978..cbdd663fbb25 100644
> --- a/drivers/net/wireless/mediatek/mt76/usb.c
> +++ b/drivers/net/wireless/mediatek/mt76/usb.c
> @@ -371,6 +371,14 @@ mt76u_refill_rx(struct mt76_dev *dev, struct mt76_queue *q,
>  		return mt76u_fill_rx_sg(dev, q, urb, nsgs);
>  
>  	urb->transfer_buffer_length = q->buf_size;
> +	if (qid == MT_RXQ_MAIN && dev->usb.rx_aggr.enable) {
> +		if (!urb->transfer_buffer)
> +			urb->transfer_buffer =
> +				mt76_get_page_pool_buf(q, &offset, q->buf_size);
> +
> +		return urb->transfer_buffer ? 0 : -ENOMEM;

maybe I am missing something, but this chunk of code seems unnecessary

> +	}
> +
>  	urb->transfer_buffer = mt76_get_page_pool_buf(q, &offset, q->buf_size);
>  
>  	return urb->transfer_buffer ? 0 : -ENOMEM;
> @@ -538,18 +546,113 @@ mt76u_build_rx_skb(struct mt76_dev *dev, void *data,
>  	return skb;
>  }
>  
> +static struct sk_buff *
> +mt76u_build_rx_skb_aggr(struct mt76_dev *dev, void *data, int data_len,
> +			int buf_len)
> +{

Can we integrate it better with mt76u_build_rx_skb()?

> +	int head_room, drv_flags = dev->drv->drv_flags;
> +	int len = min_t(int, data_len, MT_SKB_HEAD_LEN);
> +	struct sk_buff *skb;
> +
> +	if (data_len <= 0)
> +		return NULL;
> +
> +	head_room = drv_flags & MT_DRV_RX_DMA_HDR ? 0 : MT_DMA_HDR_LEN;
> +	skb = alloc_skb(len, GFP_ATOMIC);
> +	if (!skb)
> +		return NULL;
> +
> +	data += head_room;
> +	skb_put_data(skb, data, len);
> +	if (data_len > len) {
> +		struct page *page;
> +
> +		data += len;
> +		page = virt_to_head_page(data);
> +		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
> +				page, data - page_address(page),
> +				data_len - len, buf_len);
> +		get_page(page);
> +	}
> +
> +	return skb;
> +}
> +
> +static int mt76u_process_rx_agg_entry(struct mt76_dev *dev, struct urb *urb)
> +{
> +	int offset = 0, head_room, drv_flags = dev->drv->drv_flags;
> +	int align = dev->usb.rx_aggr.align ?: 4;
> +	int padding = dev->usb.rx_aggr.padding ?: 4;
> +	u8 *data = urb->transfer_buffer;
> +	int min_len;
> +	int nframes = 0;
> +
> +	if (!test_bit(MT76_STATE_INITIALIZED, &dev->phy.state) ||
> +	    test_bit(MT76_REMOVED, &dev->phy.state))
> +		return 0;
> +
> +	head_room = drv_flags & MT_DRV_RX_DMA_HDR ? 0 : MT_DMA_HDR_LEN;
> +	min_len = head_room + MT_RX_RXWI_LEN;
> +
> +	while (urb->actual_length - offset >= min_len) {
> +		struct sk_buff *skb;
> +		int len, frame_len, agg_len;
> +
> +		len = mt76u_get_rx_entry_len(dev, data + offset,
> +					     urb->actual_length - offset);
> +		if (len < 0) {
> +			dev_warn_ratelimited(dev->dev,
> +					     "invalid USB RX aggregate at offset %d\n",
> +					     offset);
> +			break;
> +		}
> +
> +		frame_len = head_room + len;
> +		if (frame_len > urb->actual_length - offset) {
> +			dev_warn_ratelimited(dev->dev,
> +					     "truncated USB RX aggregate at offset %d\n",
> +					     offset);
> +			break;
> +		}
> +
> +		agg_len = ALIGN(frame_len, align) + padding;
> +		if (dev->drv->rx_check &&
> +		    !dev->drv->rx_check(dev, data + offset + head_room, len))
> +			goto next;
> +
> +		skb = mt76u_build_rx_skb_aggr(dev, data + offset, len,
> +					      agg_len);
> +		if (skb) {
> +			dev->drv->rx_skb(dev, MT_RXQ_MAIN, skb, NULL);
> +			nframes++;
> +		}
> +
> +next:
> +		offset += agg_len;
> +	}
> +
> +	mt76_put_page_pool_buf(urb->transfer_buffer, false);
> +	urb->transfer_buffer = NULL;
> +
> +	return max(nframes, 1);
> +}
> +
>  static int
>  mt76u_process_rx_entry(struct mt76_dev *dev, struct urb *urb,
> -		       int buf_size)
> +		       enum mt76_rxq_id qid, int buf_size)
>  {
>  	u8 *data = urb->num_sgs ? sg_virt(&urb->sg[0]) : urb->transfer_buffer;
>  	int data_len = urb->num_sgs ? urb->sg[0].length : urb->actual_length;
>  	int len, nsgs = 1, head_room, drv_flags = dev->drv->drv_flags;
>  	struct sk_buff *skb;
>  
> -	if (!test_bit(MT76_STATE_INITIALIZED, &dev->phy.state))
> +	if (!test_bit(MT76_STATE_INITIALIZED, &dev->phy.state) ||
> +	    test_bit(MT76_REMOVED, &dev->phy.state))

This seems a fix to me.

>  		return 0;
>  
> +	if (qid == MT_RXQ_MAIN && dev->usb.rx_aggr.enable && !urb->num_sgs)
> +		return mt76u_process_rx_agg_entry(dev, urb);
> +
>  	len = mt76u_get_rx_entry_len(dev, data, urb->actual_length);
>  	if (len < 0)
>  		return 0;
> @@ -594,6 +697,9 @@ static void mt76u_complete_rx(struct urb *urb)
>  
>  	trace_rx_urb(dev, urb);
>  
> +	if (test_bit(MT76_REMOVED, &dev->phy.state))
> +		return;

same here.

> +
>  	switch (urb->status) {
>  	case -ECONNRESET:
>  	case -ESHUTDOWN:
> @@ -658,12 +764,14 @@ mt76u_process_rx_queue(struct mt76_dev *dev, struct mt76_queue *q)
>  		if (!urb)
>  			break;
>  
> -		count = mt76u_process_rx_entry(dev, urb, q->buf_size);
> +		count = mt76u_process_rx_entry(dev, urb, qid, q->buf_size);
>  		if (count > 0) {
>  			err = mt76u_refill_rx(dev, q, urb, count);
>  			if (err < 0)
>  				break;
>  		}
> +		if (test_bit(MT76_REMOVED, &dev->phy.state))
> +			break;
>  		mt76u_submit_rx_buf(dev, qid, urb);
>  	}
>  }
> @@ -729,10 +837,6 @@ mt76u_alloc_rx_queue(struct mt76_dev *dev, enum mt76_rxq_id qid)
>  	struct mt76_queue *q = &dev->q_rx[qid];
>  	int i, err;
>  
> -	err = mt76_create_page_pool(dev, q);
> -	if (err)
> -		return err;
> -
>  	spin_lock_init(&q->lock);
>  	q->entry = devm_kcalloc(dev->dev,
>  				MT_NUM_RX_ENTRIES, sizeof(*q->entry),
> @@ -742,6 +846,12 @@ mt76u_alloc_rx_queue(struct mt76_dev *dev, enum mt76_rxq_id qid)
>  
>  	q->ndesc = MT_NUM_RX_ENTRIES;
>  	q->buf_size = PAGE_SIZE;
> +	if (qid == MT_RXQ_MAIN && dev->usb.rx_aggr.enable)
> +		q->buf_size = dev->usb.rx_aggr.buf_size ?: PAGE_SIZE;
> +
> +	err = mt76_create_page_pool(dev, q);
> +	if (err)
> +		return err;
>  
>  	for (i = 0; i < q->ndesc; i++) {
>  		err = mt76u_rx_urb_alloc(dev, q, &q->entry[i]);
> -- 
> 2.43.0
> 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 228 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-mediatek/attachments/20260615/f7d34a3f/attachment-0001.sig>


More information about the Linux-mediatek mailing list