[PATCH RFC net-next v2] net: airoha: Add TCP LRO support

Alexander Lobakin aleksander.lobakin at intel.com
Tue May 26 09:01:26 PDT 2026


From: Lorenzo Bianconi <lorenzo at kernel.org>
Date: Tue, 26 May 2026 08:58:05 +0200

> Add hardware TCP Large Receive Offload (LRO) support to the airoha_eth
> driver, leveraging the EN7581/AN7583 SoC's 8 dedicated LRO hardware queues
> mapped to RX queues 24–31. LRO hw offloading does not support
> Scatter-Gather (SG) so it is required to increase the page_pool allocation
> order to 2 for RX queues 24–31 (LRO queues).
> 
> Performance comparison between GRO and hw LRO has been carried out using
> a 10Gbps NIC:
> 
> GRO: ~2.7 Gbps
> LRO: ~8.1 Gbps
> 
> Please note with respect to the previous implementation, page_pool
> allocation order has been reduced from 5 to 2.
> 
> Tested-by: Madhur Agrawal <madhur.agrawal at airoha.com>
> Signed-off-by: Lorenzo Bianconi <lorenzo at kernel.org>

[...]

> @@ -587,6 +630,85 @@ static int airoha_qdma_get_gdm_port(struct airoha_eth *eth,
>  	return port >= ARRAY_SIZE(eth->ports) ? -EINVAL : port;
>  }
>  
> +static int airoha_qdma_lro_rx_process(struct airoha_queue *q,
> +				      struct airoha_qdma_desc *desc)
> +{
> +	u32 desc_ctrl = le32_to_cpu(READ_ONCE(desc->ctrl));
> +	u32 msg1 = le32_to_cpu(READ_ONCE(desc->msg1));
> +	u32 msg2 = le32_to_cpu(READ_ONCE(desc->msg2));
> +	u32 msg3 = le32_to_cpu(READ_ONCE(desc->msg3));

Why are these READ_ONCE()s needed? Does desc come from the HW (sorry I
didn't follow the whole code flow) or...?

> +	struct sk_buff *skb = q->skb;
> +	u32 len, th_off, tcp_ack_seq;
> +	u16 tcp_win, l2_len;
> +	struct tcphdr *th;
> +	bool ipv4, ipv6;
> +
> +	if (FIELD_GET(QDMA_ETH_RXMSG_AGG_COUNT_MASK, msg2) <= 1)
> +		return 0;
> +
> +	ipv4 = FIELD_GET(QDMA_ETH_RXMSG_IP4_MASK, msg1);
> +	ipv6 = FIELD_GET(QDMA_ETH_RXMSG_IP6_MASK, msg1);
> +	if (!ipv4 && !ipv6)
> +		return -EOPNOTSUPP;
> +
> +	l2_len = FIELD_GET(QDMA_ETH_RXMSG_L2_LEN_MASK, msg2);
> +	len = FIELD_GET(QDMA_DESC_LEN_MASK, desc_ctrl);
> +	if (ipv4) {
> +		struct iphdr *iph;
> +
> +		if (!pskb_may_pull(skb, l2_len + sizeof(*iph)))
> +			return -EINVAL;
> +
> +		iph = (struct iphdr *)(skb->data + l2_len);
> +		if (iph->protocol != IPPROTO_TCP)
> +			return -EOPNOTSUPP;
> +
> +		iph->tot_len = cpu_to_be16(len - l2_len);
> +		iph->check = 0;
> +		iph->check = ip_fast_csum((void *)iph, iph->ihl);
> +		th_off = l2_len + (iph->ihl << 2);
> +	} else {
> +		struct ipv6hdr *ip6h;
> +
> +		if (!pskb_may_pull(skb, l2_len + sizeof(*ip6h)))
> +			return -EINVAL;
> +
> +		ip6h = (struct ipv6hdr *)(skb->data + l2_len);
> +		if (ip6h->nexthdr != NEXTHDR_TCP)
> +			return -EOPNOTSUPP;
> +
> +		ip6h->payload_len = cpu_to_be16(len - l2_len - sizeof(*ip6h));
> +		th_off = l2_len + sizeof(*ip6h);
> +	}
> +
> +	tcp_win = FIELD_GET(QDMA_ETH_RXMSG_TCP_WIN_MASK, msg3);
> +	tcp_ack_seq = le32_to_cpu(READ_ONCE(desc->data));
> +
> +	if (!pskb_may_pull(skb, th_off + sizeof(*th)))
> +		return -EINVAL;
> +
> +	th = (struct tcphdr *)(skb->data + th_off);
> +	th->ack_seq = cpu_to_be32(tcp_ack_seq);
> +	th->window = cpu_to_be16(tcp_win);
> +
> +	/* Check tcp timestamp option */
> +	if (th->doff == (sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4) {
> +		__be32 *topt = (__be32 *)(th + 1);

Make sure you checked the code with sparse (sometimes it's needed to
mark casts as __force, not this one tho)

> +
> +		if (*topt == cpu_to_be32((TCPOPT_NOP << 24) |

Shouldn't this be `((u32)TCPOPT_NOP) << 24` to avoid sign issues?

> +					 (TCPOPT_NOP << 16) |
> +					 (TCPOPT_TIMESTAMP << 8) |
> +					 TCPOLEN_TIMESTAMP)) {
> +			__le32 tcp_ts_reply = READ_ONCE(desc->tcp_ts_reply);
> +
> +			put_unaligned_be32(le32_to_cpu(tcp_ts_reply),
> +					   topt + 2);
> +		}
> +	}
> +
> +	return 0;
> +}
Thanks,
Olek



More information about the linux-arm-kernel mailing list