[openwrt/openwrt] ag71xx: Reorder ag71xx struct members for better cache performance

LEDE Commits lede-commits at lists.infradead.org
Mon Feb 5 01:18:13 PST 2018


nbd pushed a commit to openwrt/openwrt.git, branch master:
https://git.lede-project.org/4e03a742e0e59a0b996196500d06bb72ff224c02

commit 4e03a742e0e59a0b996196500d06bb72ff224c02
Author: Rosen Penev <rosenp at gmail.com>
AuthorDate: Mon Dec 4 11:40:23 2017 -0800

    ag71xx: Reorder ag71xx struct members for better cache performance
    
    Qualcomm claims this improves the D-cache footprint. Origina commit message below:
    
    From: Ben Menchaca <ben.menchaca at qca.qualcomm.com>
    Date: Fri, 7 Jun 2013 10:57:28 -0500
    Subject: [ag71xx] cluster/align structs for cache perf
    
    Cluster the frequently used, per-packet structures in ag71xx near
    to each other, and cacheline-align them.  Some other re-ordering
    occurred to move "warmer" structures near the per-packet structures.
    
    Signed-off-by: Ben Menchaca <ben.menchaca at qca.qualcomm.com>
    Signed-off-by: Rosen Penev <rosenp at gmail.com>
---
 .../drivers/net/ethernet/atheros/ag71xx/ag71xx.h   | 27 ++++++++++++++--------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h
index a712812..5ead6b3 100644
--- a/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h
+++ b/target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h
@@ -153,20 +153,31 @@ struct ag71xx_debug {
 };
 
 struct ag71xx {
-	void __iomem		*mac_base;
+	/*
+	 * Critical data related to the per-packet data path are clustered
+	 * early in this structure to help improve the D-cache footprint.
+	 */
+	struct ag71xx_ring	rx_ring ____cacheline_aligned;
+	struct ag71xx_ring	tx_ring ____cacheline_aligned;
+
+	unsigned int            max_frame_len;
+	unsigned int            desc_pktlen_mask;
+	unsigned int            rx_buf_size;
 
-	spinlock_t		lock;
-	struct platform_device	*pdev;
 	struct net_device	*dev;
+	struct platform_device  *pdev;
+	spinlock_t		lock;
 	struct napi_struct	napi;
 	u32			msg_enable;
 
+	/*
+	 * From this point onwards we're not looking at per-packet fields.
+	 */
+	void __iomem		*mac_base;
+
 	struct ag71xx_desc	*stop_desc;
 	dma_addr_t		stop_desc_dma;
 
-	struct ag71xx_ring	rx_ring;
-	struct ag71xx_ring	tx_ring;
-
 	struct mii_bus		*mii_bus;
 	struct phy_device	*phy_dev;
 	void			*phy_priv;
@@ -175,10 +186,6 @@ struct ag71xx {
 	unsigned int		speed;
 	int			duplex;
 
-	unsigned int		max_frame_len;
-	unsigned int		desc_pktlen_mask;
-	unsigned int		rx_buf_size;
-
 	struct delayed_work	restart_work;
 	struct delayed_work	link_work;
 	struct timer_list	oom_timer;



More information about the lede-commits mailing list