[PATCH 3/5] wifi: mt76: usb: add optional RX aggregation support
Lorenzo Bianconi
lorenzo at kernel.org
Mon Jun 15 00:08:07 PDT 2026
On Jun 13, Sean Wang wrote:
> From: Sean Wang <sean.wang at mediatek.com>
>
> Add common USB RX aggregation support and let drivers opt in by programming
> the UDMA RX aggregation limit and timeout.
>
> RX aggregation allows the device to pack multiple RX packets into one USB
> transfer, reducing URB completion rate, USB interrupt/IO overhead, and host
> RX scheduling pressure. This is especially useful at high throughput, where
> per-packet USB RX handling can become a CPU bottleneck.
>
> Keep it disabled by default so existing USB drivers keep the current RX
> behavior unless they explicitly enable aggregation.
>
> Signed-off-by: Sean Wang <sean.wang at mediatek.com>
> ---
> drivers/net/wireless/mediatek/mt76/mt76.h | 21 ++-
> .../net/wireless/mediatek/mt76/mt7925/usb.c | 12 ++
> .../net/wireless/mediatek/mt76/mt792x_usb.c | 23 +++-
> drivers/net/wireless/mediatek/mt76/usb.c | 124 +++++++++++++++++-
> 4 files changed, 169 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
> index 81740aa7df71..125c97dc1f28 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt76.h
> +++ b/drivers/net/wireless/mediatek/mt76/mt76.h
> @@ -680,6 +680,13 @@ struct mt76_usb {
> void (*ctrl_timeout)(struct mt76_dev *dev, int err);
> bool sg_en;
>
> + struct {
> + bool enable;
> + int align;
> + int padding;
> + int buf_size;
> + } rx_aggr;
> +
> struct mt76u_mcu {
> u8 *data;
> /* multiple reads */
> @@ -1857,6 +1864,17 @@ mt76u_bulk_msg(struct mt76_dev *dev, void *data, int len, int *actual_len,
> return usb_bulk_msg(udev, pipe, data, len, actual_len, timeout);
> }
>
> +static inline int
> +mt76u_rx_aggr_buf_size(int max_mpdu, int aggr_limit, int aggr_pkt_limit,
> + int padding)
> +{
> + int aggr_size;
> +
> + aggr_size = min(aggr_limit, aggr_pkt_limit * (max_mpdu + padding));
> +
> + return PAGE_ALIGN(max_mpdu + aggr_size);
> +}
> +
> void mt76_ethtool_page_pool_stats(struct mt76_dev *dev, u64 *data, int *index);
> void mt76_ethtool_worker(struct mt76_ethtool_worker_info *wi,
> struct mt76_sta_stats *stats, bool eht);
> @@ -1882,7 +1900,8 @@ void mt76u_stop_tx(struct mt76_dev *dev);
> void mt76u_stop_rx(struct mt76_dev *dev);
> int mt76u_resume_rx(struct mt76_dev *dev);
> void mt76u_queues_deinit(struct mt76_dev *dev);
> -
> +void mt76u_enable_rx_aggr(struct mt76_dev *dev, int align, int padding,
> + int buf_size);
> int mt76s_init(struct mt76_dev *dev, struct sdio_func *func,
> const struct mt76_bus_ops *bus_ops);
> int mt76s_alloc_rx_queue(struct mt76_dev *dev, enum mt76_rxq_id qid);
> diff --git a/drivers/net/wireless/mediatek/mt76/mt7925/usb.c b/drivers/net/wireless/mediatek/mt76/mt7925/usb.c
> index 49ad4fe9eb1b..a0bfe6f09ae4 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt7925/usb.c
> +++ b/drivers/net/wireless/mediatek/mt76/mt7925/usb.c
> @@ -3,12 +3,24 @@
>
> #include <linux/kernel.h>
> #include <linux/module.h>
> +#include <linux/sizes.h>
> #include <linux/usb.h>
>
> #include "mt7925.h"
> #include "mcu.h"
> #include "mac.h"
>
> +#define MT7927_USB_RX_AGGR_ALIGN 16
> +#define MT7927_USB_RX_AGGR_PADDING 12
> +#define MT7927_USB_RX_AGGR_LIMIT SZ_32K
> +#define MT7927_USB_RX_AGGR_PKT_LIMIT 30
> +#define MT7927_USB_RX_MAX_MPDU (13 * SZ_1K)
> +#define MT7927_USB_RX_AGGR_BUF_SIZE \
> + mt76u_rx_aggr_buf_size(MT7927_USB_RX_MAX_MPDU, \
> + MT7927_USB_RX_AGGR_LIMIT, \
> + MT7927_USB_RX_AGGR_PKT_LIMIT, \
> + MT7927_USB_RX_AGGR_PADDING)
If I do the math correctly, it will use an order-5 buffer for each urb, right?
If so, this approach is not very unrecommended, please take a look at [0]
[0] https://lore.kernel.org/netdev/CANn89iJsNWkWzAJbOvaBNjozuLOQBcpVo1bnvfeGq5Zm6h9e=Q@mail.gmail.com/
> +
> static const struct usb_device_id mt7925u_device_table[] = {
> { USB_DEVICE_AND_INTERFACE_INFO(0x0e8d, 0x6639, 0xff, 0xff, 0xff),
> .driver_info = (kernel_ulong_t)MT7925_FIRMWARE_WM },
> diff --git a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c
> index 6280bc4bf78d..769e828e9449 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt792x_usb.c
> +++ b/drivers/net/wireless/mediatek/mt76/mt792x_usb.c
> @@ -13,6 +13,9 @@
>
> #define MT792X_USB_TX_TIMEOUT_LIMIT 50000
> #define MT792X_USB_UDMA_IDLE_TIMEOUT 1000
> +#define MT792X_USB_RX_AGG_LIMIT 32
> +#define MT792X_USB_RX_AGG_TIMEOUT 100
> +#define MT792X_USB_RX_AGG_PKT_LIMIT 30
>
> static int mt792xu_read32(struct mt76_dev *dev, u32 addr, void *buf)
> {
> @@ -403,9 +406,23 @@ int mt792xu_dma_init(struct mt792x_dev *dev, bool resume)
> FIELD_PREP(MT_WL_TX_TMOUT_LMT,
> MT792X_USB_TX_TIMEOUT_LIMIT));
> mt76_set(dev, MT_UDMA_WLCFG_0, MT_WL_TX_TMOUT_FUNC_EN);
> - mt76_clear(dev, MT_UDMA_WLCFG_0,
> - MT_WL_RX_AGG_TO | MT_WL_RX_AGG_LMT);
> - mt76_clear(dev, MT_UDMA_WLCFG_1, MT_WL_RX_AGG_PKT_LMT);
> +
> + if (dev->mt76.usb.rx_aggr.enable) {
> + mt76_set(dev, MT_UDMA_WLCFG_0, MT_WL_RX_AGG_EN);
> + mt76_rmw(dev, MT_UDMA_WLCFG_0,
> + MT_WL_RX_AGG_TO | MT_WL_RX_AGG_LMT,
> + FIELD_PREP(MT_WL_RX_AGG_TO,
> + MT792X_USB_RX_AGG_TIMEOUT) |
> + FIELD_PREP(MT_WL_RX_AGG_LMT,
> + MT792X_USB_RX_AGG_LIMIT));
> + mt76_rmw(dev, MT_UDMA_WLCFG_1, MT_WL_RX_AGG_PKT_LMT,
> + FIELD_PREP(MT_WL_RX_AGG_PKT_LMT,
> + MT792X_USB_RX_AGG_PKT_LIMIT));
> + } else {
> + mt76_clear(dev, MT_UDMA_WLCFG_0, MT_WL_RX_AGG_EN |
> + MT_WL_RX_AGG_TO | MT_WL_RX_AGG_LMT);
> + mt76_clear(dev, MT_UDMA_WLCFG_1, MT_WL_RX_AGG_PKT_LMT);
> + }
>
> if (resume)
> return 0;
> diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
> index cab36630c978..cbdd663fbb25 100644
> --- a/drivers/net/wireless/mediatek/mt76/usb.c
> +++ b/drivers/net/wireless/mediatek/mt76/usb.c
> @@ -371,6 +371,14 @@ mt76u_refill_rx(struct mt76_dev *dev, struct mt76_queue *q,
> return mt76u_fill_rx_sg(dev, q, urb, nsgs);
>
> urb->transfer_buffer_length = q->buf_size;
> + if (qid == MT_RXQ_MAIN && dev->usb.rx_aggr.enable) {
> + if (!urb->transfer_buffer)
> + urb->transfer_buffer =
> + mt76_get_page_pool_buf(q, &offset, q->buf_size);
> +
> + return urb->transfer_buffer ? 0 : -ENOMEM;
maybe I am missing something, but this chunk of code seems unnecessary
> + }
> +
> urb->transfer_buffer = mt76_get_page_pool_buf(q, &offset, q->buf_size);
>
> return urb->transfer_buffer ? 0 : -ENOMEM;
> @@ -538,18 +546,113 @@ mt76u_build_rx_skb(struct mt76_dev *dev, void *data,
> return skb;
> }
>
> +static struct sk_buff *
> +mt76u_build_rx_skb_aggr(struct mt76_dev *dev, void *data, int data_len,
> + int buf_len)
> +{
Can we integrate it better with mt76u_build_rx_skb()?
> + int head_room, drv_flags = dev->drv->drv_flags;
> + int len = min_t(int, data_len, MT_SKB_HEAD_LEN);
> + struct sk_buff *skb;
> +
> + if (data_len <= 0)
> + return NULL;
> +
> + head_room = drv_flags & MT_DRV_RX_DMA_HDR ? 0 : MT_DMA_HDR_LEN;
> + skb = alloc_skb(len, GFP_ATOMIC);
> + if (!skb)
> + return NULL;
> +
> + data += head_room;
> + skb_put_data(skb, data, len);
> + if (data_len > len) {
> + struct page *page;
> +
> + data += len;
> + page = virt_to_head_page(data);
> + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
> + page, data - page_address(page),
> + data_len - len, buf_len);
> + get_page(page);
> + }
> +
> + return skb;
> +}
> +
> +static int mt76u_process_rx_agg_entry(struct mt76_dev *dev, struct urb *urb)
> +{
> + int offset = 0, head_room, drv_flags = dev->drv->drv_flags;
> + int align = dev->usb.rx_aggr.align ?: 4;
> + int padding = dev->usb.rx_aggr.padding ?: 4;
> + u8 *data = urb->transfer_buffer;
> + int min_len;
> + int nframes = 0;
> +
> + if (!test_bit(MT76_STATE_INITIALIZED, &dev->phy.state) ||
> + test_bit(MT76_REMOVED, &dev->phy.state))
> + return 0;
> +
> + head_room = drv_flags & MT_DRV_RX_DMA_HDR ? 0 : MT_DMA_HDR_LEN;
> + min_len = head_room + MT_RX_RXWI_LEN;
> +
> + while (urb->actual_length - offset >= min_len) {
> + struct sk_buff *skb;
> + int len, frame_len, agg_len;
> +
> + len = mt76u_get_rx_entry_len(dev, data + offset,
> + urb->actual_length - offset);
> + if (len < 0) {
> + dev_warn_ratelimited(dev->dev,
> + "invalid USB RX aggregate at offset %d\n",
> + offset);
> + break;
> + }
> +
> + frame_len = head_room + len;
> + if (frame_len > urb->actual_length - offset) {
> + dev_warn_ratelimited(dev->dev,
> + "truncated USB RX aggregate at offset %d\n",
> + offset);
> + break;
> + }
> +
> + agg_len = ALIGN(frame_len, align) + padding;
> + if (dev->drv->rx_check &&
> + !dev->drv->rx_check(dev, data + offset + head_room, len))
> + goto next;
> +
> + skb = mt76u_build_rx_skb_aggr(dev, data + offset, len,
> + agg_len);
> + if (skb) {
> + dev->drv->rx_skb(dev, MT_RXQ_MAIN, skb, NULL);
> + nframes++;
> + }
> +
> +next:
> + offset += agg_len;
> + }
> +
> + mt76_put_page_pool_buf(urb->transfer_buffer, false);
> + urb->transfer_buffer = NULL;
> +
> + return max(nframes, 1);
> +}
> +
> static int
> mt76u_process_rx_entry(struct mt76_dev *dev, struct urb *urb,
> - int buf_size)
> + enum mt76_rxq_id qid, int buf_size)
> {
> u8 *data = urb->num_sgs ? sg_virt(&urb->sg[0]) : urb->transfer_buffer;
> int data_len = urb->num_sgs ? urb->sg[0].length : urb->actual_length;
> int len, nsgs = 1, head_room, drv_flags = dev->drv->drv_flags;
> struct sk_buff *skb;
>
> - if (!test_bit(MT76_STATE_INITIALIZED, &dev->phy.state))
> + if (!test_bit(MT76_STATE_INITIALIZED, &dev->phy.state) ||
> + test_bit(MT76_REMOVED, &dev->phy.state))
This seems a fix to me.
> return 0;
>
> + if (qid == MT_RXQ_MAIN && dev->usb.rx_aggr.enable && !urb->num_sgs)
> + return mt76u_process_rx_agg_entry(dev, urb);
> +
> len = mt76u_get_rx_entry_len(dev, data, urb->actual_length);
> if (len < 0)
> return 0;
> @@ -594,6 +697,9 @@ static void mt76u_complete_rx(struct urb *urb)
>
> trace_rx_urb(dev, urb);
>
> + if (test_bit(MT76_REMOVED, &dev->phy.state))
> + return;
same here.
> +
> switch (urb->status) {
> case -ECONNRESET:
> case -ESHUTDOWN:
> @@ -658,12 +764,14 @@ mt76u_process_rx_queue(struct mt76_dev *dev, struct mt76_queue *q)
> if (!urb)
> break;
>
> - count = mt76u_process_rx_entry(dev, urb, q->buf_size);
> + count = mt76u_process_rx_entry(dev, urb, qid, q->buf_size);
> if (count > 0) {
> err = mt76u_refill_rx(dev, q, urb, count);
> if (err < 0)
> break;
> }
> + if (test_bit(MT76_REMOVED, &dev->phy.state))
> + break;
> mt76u_submit_rx_buf(dev, qid, urb);
> }
> }
> @@ -729,10 +837,6 @@ mt76u_alloc_rx_queue(struct mt76_dev *dev, enum mt76_rxq_id qid)
> struct mt76_queue *q = &dev->q_rx[qid];
> int i, err;
>
> - err = mt76_create_page_pool(dev, q);
> - if (err)
> - return err;
> -
> spin_lock_init(&q->lock);
> q->entry = devm_kcalloc(dev->dev,
> MT_NUM_RX_ENTRIES, sizeof(*q->entry),
> @@ -742,6 +846,12 @@ mt76u_alloc_rx_queue(struct mt76_dev *dev, enum mt76_rxq_id qid)
>
> q->ndesc = MT_NUM_RX_ENTRIES;
> q->buf_size = PAGE_SIZE;
> + if (qid == MT_RXQ_MAIN && dev->usb.rx_aggr.enable)
> + q->buf_size = dev->usb.rx_aggr.buf_size ?: PAGE_SIZE;
> +
> + err = mt76_create_page_pool(dev, q);
> + if (err)
> + return err;
>
> for (i = 0; i < q->ndesc; i++) {
> err = mt76u_rx_urb_alloc(dev, q, &q->entry[i]);
> --
> 2.43.0
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 228 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-mediatek/attachments/20260615/f7d34a3f/attachment-0001.sig>
More information about the Linux-mediatek
mailing list