[openwrt/openwrt] mac80211: merge performance improvement patches

LEDE Commits lede-commits at lists.infradead.org
Thu Aug 6 06:44:04 EDT 2020


nbd pushed a commit to openwrt/openwrt.git, branch master:
https://git.openwrt.org/3d731fc9030655e18d86f81ca8aed3341ab2bc1e

commit 3d731fc9030655e18d86f81ca8aed3341ab2bc1e
Author: Felix Fietkau <nbd at nbd.name>
AuthorDate: Sun Jul 26 15:12:32 2020 +0200

    mac80211: merge performance improvement patches
    
    Fix fq_codel performance issues
    Add a new rx function for batch processing
    
    Signed-off-by: Felix Fietkau <nbd at nbd.name>
---
 ...d-a-function-for-running-rx-without-passi.patch | 186 +++++++++++++++++++++
 ...-use-skb_get_hash-instead-of-skb_get_hash.patch |  55 ++++++
 ...calculcate-skb-hash-early-when-using-itxq.patch |  19 +++
 3 files changed, 260 insertions(+)

diff --git a/package/kernel/mac80211/patches/subsys/307-mac80211-add-a-function-for-running-rx-without-passi.patch b/package/kernel/mac80211/patches/subsys/307-mac80211-add-a-function-for-running-rx-without-passi.patch
new file mode 100644
index 0000000000..5837a7b651
--- /dev/null
+++ b/package/kernel/mac80211/patches/subsys/307-mac80211-add-a-function-for-running-rx-without-passi.patch
@@ -0,0 +1,186 @@
+From: Felix Fietkau <nbd at nbd.name>
+Date: Sat, 25 Jul 2020 20:53:23 +0200
+Subject: [PATCH] mac80211: add a function for running rx without passing skbs
+ to the stack
+
+This can be used to run mac80211 rx processing on a batch of frames in NAPI
+poll before passing them to the network stack in a large batch.
+This can improve icache footprint, or it can be used to pass frames via
+netif_receive_skb_list.
+
+Signed-off-by: Felix Fietkau <nbd at nbd.name>
+---
+
+--- a/include/net/mac80211.h
++++ b/include/net/mac80211.h
+@@ -4358,6 +4358,31 @@ void ieee80211_free_hw(struct ieee80211_
+ void ieee80211_restart_hw(struct ieee80211_hw *hw);
+ 
+ /**
++ * ieee80211_rx_list - receive frame and store processed skbs in a list
++ *
++ * Use this function to hand received frames to mac80211. The receive
++ * buffer in @skb must start with an IEEE 802.11 header. In case of a
++ * paged @skb is used, the driver is recommended to put the ieee80211
++ * header of the frame on the linear part of the @skb to avoid memory
++ * allocation and/or memcpy by the stack.
++ *
++ * This function may not be called in IRQ context. Calls to this function
++ * for a single hardware must be synchronized against each other. Calls to
++ * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be
++ * mixed for a single hardware. Must not run concurrently with
++ * ieee80211_tx_status() or ieee80211_tx_status_ni().
++ *
++ * This function must be called with BHs disabled and RCU read lock
++ *
++ * @hw: the hardware this frame came in on
++ * @sta: the station the frame was received from, or %NULL
++ * @skb: the buffer to receive, owned by mac80211 after this call
++ * @list: the destination list
++ */
++void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *sta,
++		       struct sk_buff *skb, struct list_head *list);
++
++/**
+  * ieee80211_rx_napi - receive frame from NAPI context
+  *
+  * Use this function to hand received frames to mac80211. The receive
+--- a/net/mac80211/ieee80211_i.h
++++ b/net/mac80211/ieee80211_i.h
+@@ -218,7 +218,7 @@ enum ieee80211_rx_flags {
+ };
+ 
+ struct ieee80211_rx_data {
+-	struct napi_struct *napi;
++	struct list_head *list;
+ 	struct sk_buff *skb;
+ 	struct ieee80211_local *local;
+ 	struct ieee80211_sub_if_data *sdata;
+--- a/net/mac80211/rx.c
++++ b/net/mac80211/rx.c
+@@ -2552,8 +2552,8 @@ static void ieee80211_deliver_skb_to_loc
+ 		memset(skb->cb, 0, sizeof(skb->cb));
+ 
+ 		/* deliver to local stack */
+-		if (rx->napi)
+-			napi_gro_receive(rx->napi, skb);
++		if (rx->list)
++			list_add_tail(&skb->list, rx->list);
+ 		else
+ 			netif_receive_skb(skb);
+ 	}
+@@ -3843,7 +3843,6 @@ void ieee80211_release_reorder_timeout(s
+ 		/* This is OK -- must be QoS data frame */
+ 		.security_idx = tid,
+ 		.seqno_idx = tid,
+-		.napi = NULL, /* must be NULL to not have races */
+ 	};
+ 	struct tid_ampdu_rx *tid_agg_rx;
+ 
+@@ -4453,8 +4452,8 @@ static bool ieee80211_invoke_fast_rx(str
+ 	/* deliver to local stack */
+ 	skb->protocol = eth_type_trans(skb, fast_rx->dev);
+ 	memset(skb->cb, 0, sizeof(skb->cb));
+-	if (rx->napi)
+-		napi_gro_receive(rx->napi, skb);
++	if (rx->list)
++		list_add_tail(&skb->list, rx->list);
+ 	else
+ 		netif_receive_skb(skb);
+ 
+@@ -4521,7 +4520,7 @@ static bool ieee80211_prepare_and_rx_han
+ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
+ 					 struct ieee80211_sta *pubsta,
+ 					 struct sk_buff *skb,
+-					 struct napi_struct *napi)
++					 struct list_head *list)
+ {
+ 	struct ieee80211_local *local = hw_to_local(hw);
+ 	struct ieee80211_sub_if_data *sdata;
+@@ -4536,7 +4535,7 @@ static void __ieee80211_rx_handle_packet
+ 	memset(&rx, 0, sizeof(rx));
+ 	rx.skb = skb;
+ 	rx.local = local;
+-	rx.napi = napi;
++	rx.list = list;
+ 
+ 	if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc))
+ 		I802_DEBUG_INC(local->dot11ReceivedFragmentCount);
+@@ -4644,8 +4643,8 @@ static void __ieee80211_rx_handle_packet
+  * This is the receive path handler. It is called by a low level driver when an
+  * 802.11 MPDU is received from the hardware.
+  */
+-void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
+-		       struct sk_buff *skb, struct napi_struct *napi)
++void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
++		       struct sk_buff *skb, struct list_head *list)
+ {
+ 	struct ieee80211_local *local = hw_to_local(hw);
+ 	struct ieee80211_rate *rate = NULL;
+@@ -4737,36 +4736,53 @@ void ieee80211_rx_napi(struct ieee80211_
+ 	status->rx_flags = 0;
+ 
+ 	/*
+-	 * key references and virtual interfaces are protected using RCU
+-	 * and this requires that we are in a read-side RCU section during
+-	 * receive processing
+-	 */
+-	rcu_read_lock();
+-
+-	/*
+ 	 * Frames with failed FCS/PLCP checksum are not returned,
+ 	 * all other frames are returned without radiotap header
+ 	 * if it was previously present.
+ 	 * Also, frames with less than 16 bytes are dropped.
+ 	 */
+ 	skb = ieee80211_rx_monitor(local, skb, rate);
+-	if (!skb) {
+-		rcu_read_unlock();
++	if (!skb)
+ 		return;
+-	}
+ 
+ 	ieee80211_tpt_led_trig_rx(local,
+ 			((struct ieee80211_hdr *)skb->data)->frame_control,
+ 			skb->len);
+ 
+-	__ieee80211_rx_handle_packet(hw, pubsta, skb, napi);
+-
+-	rcu_read_unlock();
++	__ieee80211_rx_handle_packet(hw, pubsta, skb, list);
+ 
+ 	return;
+  drop:
+ 	kfree_skb(skb);
+ }
++EXPORT_SYMBOL(ieee80211_rx_list);
++
++void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
++		       struct sk_buff *skb, struct napi_struct *napi)
++{
++	struct sk_buff *tmp;
++	LIST_HEAD(list);
++
++
++	/*
++	 * key references and virtual interfaces are protected using RCU
++	 * and this requires that we are in a read-side RCU section during
++	 * receive processing
++	 */
++	rcu_read_lock();
++	ieee80211_rx_list(hw, pubsta, skb, &list);
++	rcu_read_unlock();
++
++	if (!napi) {
++		netif_receive_skb_list(&list);
++		return;
++	}
++
++	list_for_each_entry_safe(skb, tmp, &list, list) {
++		skb_list_del_init(skb);
++		napi_gro_receive(napi, skb);
++	}
++}
+ EXPORT_SYMBOL(ieee80211_rx_napi);
+ 
+ /* This is a version of the rx handler that can be called from hard irq
diff --git a/package/kernel/mac80211/patches/subsys/308-net-fq_impl-use-skb_get_hash-instead-of-skb_get_hash.patch b/package/kernel/mac80211/patches/subsys/308-net-fq_impl-use-skb_get_hash-instead-of-skb_get_hash.patch
new file mode 100644
index 0000000000..77ecc82302
--- /dev/null
+++ b/package/kernel/mac80211/patches/subsys/308-net-fq_impl-use-skb_get_hash-instead-of-skb_get_hash.patch
@@ -0,0 +1,55 @@
+From: Felix Fietkau <nbd at nbd.name>
+Date: Sun, 26 Jul 2020 14:37:02 +0200
+Subject: [PATCH] net/fq_impl: use skb_get_hash instead of
+ skb_get_hash_perturb
+
+This avoids unnecessary regenerating of the skb flow hash
+
+Signed-off-by: Felix Fietkau <nbd at nbd.name>
+---
+
+--- a/include/net/fq.h
++++ b/include/net/fq.h
+@@ -69,15 +69,6 @@ struct fq {
+ 	struct list_head backlogs;
+ 	spinlock_t lock;
+ 	u32 flows_cnt;
+-#if LINUX_VERSION_IS_GEQ(5,3,10) || \
+-    LINUX_VERSION_IN_RANGE(4,19,83, 4,20,0) || \
+-    LINUX_VERSION_IN_RANGE(4,14,153, 4,15,0) || \
+-    LINUX_VERSION_IN_RANGE(4,9,200, 4,10,0) || \
+-    LINUX_VERSION_IN_RANGE(4,4,200, 4,5,0)
+-	siphash_key_t	perturbation;
+-#else
+-	u32 perturbation;
+-#endif
+ 	u32 limit;
+ 	u32 memory_limit;
+ 	u32 memory_usage;
+--- a/include/net/fq_impl.h
++++ b/include/net/fq_impl.h
+@@ -108,15 +108,7 @@ begin:
+ 
+ static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb)
+ {
+-#if LINUX_VERSION_IS_GEQ(5,3,10) || \
+-    LINUX_VERSION_IN_RANGE(4,19,83, 4,20,0) || \
+-    LINUX_VERSION_IN_RANGE(4,14,153, 4,15,0) || \
+-    LINUX_VERSION_IN_RANGE(4,9,200, 4,10,0) || \
+-    LINUX_VERSION_IN_RANGE(4,4,200, 4,5,0)
+-	u32 hash = skb_get_hash_perturb(skb, &fq->perturbation);
+-#else
+-	u32 hash = skb_get_hash_perturb(skb, fq->perturbation);
+-#endif
++	u32 hash = skb_get_hash(skb);
+ 
+ 	return reciprocal_scale(hash, fq->flows_cnt);
+ }
+@@ -316,7 +308,6 @@ static int fq_init(struct fq *fq, int fl
+ 	INIT_LIST_HEAD(&fq->backlogs);
+ 	spin_lock_init(&fq->lock);
+ 	fq->flows_cnt = max_t(u32, flows_cnt, 1);
+-	get_random_bytes(&fq->perturbation, sizeof(fq->perturbation));
+ 	fq->quantum = 300;
+ 	fq->limit = 8192;
+ 	fq->memory_limit = 16 << 20; /* 16 MBytes */
diff --git a/package/kernel/mac80211/patches/subsys/309-mac80211-calculcate-skb-hash-early-when-using-itxq.patch b/package/kernel/mac80211/patches/subsys/309-mac80211-calculcate-skb-hash-early-when-using-itxq.patch
new file mode 100644
index 0000000000..92b136279a
--- /dev/null
+++ b/package/kernel/mac80211/patches/subsys/309-mac80211-calculcate-skb-hash-early-when-using-itxq.patch
@@ -0,0 +1,19 @@
+From: Felix Fietkau <nbd at nbd.name>
+Date: Sun, 26 Jul 2020 14:42:58 +0200
+Subject: [PATCH] mac80211: calculcate skb hash early when using itxq
+
+This avoids flow separation issues when using software encryption
+
+Signed-off-by: Felix Fietkau <nbd at nbd.name>
+---
+
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -3937,6 +3937,7 @@ void __ieee80211_subif_start_xmit(struct
+ 	if (local->ops->wake_tx_queue) {
+ 		u16 queue = __ieee80211_select_queue(sdata, sta, skb);
+ 		skb_set_queue_mapping(skb, queue);
++		skb_get_hash(skb);
+ 	}
+ 
+ 	if (sta) {



More information about the lede-commits mailing list