[openwrt/openwrt] cns3xxx: fix ethernet IRQ imbalance in 4.14

LEDE Commits lede-commits at lists.infradead.org
Wed Jan 17 02:07:32 PST 2018


blogic pushed a commit to openwrt/openwrt.git, branch master:
https://git.lede-project.org/f3dec4452043a9ba3ea828f6bb3a38196fc46bbb

commit f3dec4452043a9ba3ea828f6bb3a38196fc46bbb
Author: Koen Vandeputte <koen.vandeputte at ncentric.com>
AuthorDate: Thu Jan 11 16:04:37 2018 +0100

    cns3xxx: fix ethernet IRQ imbalance in 4.14
    
    Kernel 4.14 contains more warnings regarding NAPI usage, which showed
    following warning:
    
    [ 4016.420000] ------------[ cut here ]------------
    [ 4016.430000] WARNING: CPU: 1 PID: 14 at kernel/irq/manage.c:525
    __enable_irq+0x3c/0x78
    [ 4016.440000] Unbalanced enable for IRQ 51
    [ 4016.440000] Modules linked in: ath9k ath9k_common pppoe ppp_async
    ath9k_hw ath pppox ppp_generic nf_conntrack_ipv6 mac80211 iptable_nat
    ipt_REJECT ipt_MASQUERADE cfg80211 xt_time xt_tcpudp xt_state xt_nat
    xt_multiport xt_mark xt_mac xt_limit xt_conntrack xt_comment xt_TCPMSS
    xt_REDIRECT xt_LOG slhc nf_reject_ipv4 nf_nat_redirs
    [ 4016.500000] CPU: 1 PID: 14 Comm: ksoftirqd/1 Tainted: G        W
           4.14.11 #0
    [ 4016.500000] Hardware name: Gateworks Corporation Laguna Platform
    [ 4016.510000] Backtrace:
    [ 4016.510000] [<c010a28c>] (dump_backtrace) from [<c010a56c>]
    (show_stack+0x18/0x1c)
    [ 4016.520000]  r7:c059df4b r6:00000000 r5:60000093 r4:00000000
    [ 4016.530000] [<c010a554>] (show_stack) from [<c048b04c>]
    (dump_stack+0x94/0xb4)
    [ 4016.530000] [<c048afb8>] (dump_stack) from [<c011760c>]
    (__warn+0xc8/0xf8)
    [ 4016.540000]  r7:c059df4b r6:c015144c r5:00000000 r4:cf863e08
    [ 4016.550000] [<c0117544>] (__warn) from [<c011767c>]
    (warn_slowpath_fmt+0x40/0x48)
    [ 4016.550000]  r9:cf863eb0 r8:00000036 r7:00000000 r6:c0636360
    r5:00000033 r4:cf80a500
    [ 4016.560000] [<c0117640>] (warn_slowpath_fmt) from [<c015144c>]
    (__enable_irq+0x3c/0x78)
    [ 4016.570000]  r3:00000033 r2:c059e0c5
    [ 4016.570000] [<c0151410>] (__enable_irq) from [<c01514ec>]
    (enable_irq+0x64/0x7c)
    [ 4016.580000] [<c0151488>] (enable_irq) from [<c0335c14>]
    (eth_poll+0x28c/0x558)
    [ 4016.590000]  r5:d08926c0 r4:cf894488
    [ 4016.590000] [<c0335988>] (eth_poll) from [<c03b1718>]
    (net_rx_action+0xfc/0x2e8)
    [ 4016.600000]  r10:cf863eb8 r9:cf863eb0 r8:00000001 r7:0f7b5000
    r6:c0636360 r5:cfdeb360
    [ 4016.610000]  r4:cf894488
    [ 4016.610000] [<c03b161c>] (net_rx_action) from [<c0101510>]
    (__do_softirq+0xe0/0x228)
    [ 4016.620000]  r10:00000100 r9:c070204c r8:c0702040 r7:00000003
    r6:00000008 r5:40000003
    [ 4016.620000]  r4:cf862000
    [ 4016.630000] [<c0101430>] (__do_softirq) from [<c011ae28>]
    (run_ksoftirqd+0x34/0x58)
    [ 4016.630000]  r10:c0133964 r9:00000000 r8:00000001 r7:00000000
    r6:c070b44c r5:cf862000
    [ 4016.640000]  r4:cf8037e0
    [ 4016.640000] [<c011adf4>] (run_ksoftirqd) from [<c0133adc>]
    (smpboot_thread_fn+0x178/0x190)
    [ 4016.650000] [<c0133964>] (smpboot_thread_fn) from [<c013099c>]
    (kthread+0x11c/0x138)
    [ 4016.660000]  r9:cf8037e0 r8:cf802cdc r7:cf839df8 r6:cf803800
    r5:00000000 r4:cf802cc0
    [ 4016.670000] [<c0130880>] (kthread) from [<c0107218>]
    (ret_from_fork+0x14/0x3c)
    [ 4016.680000]  r10:00000000 r9:00000000 r8:00000000 r7:00000000
    r6:00000000 r5:c0130880
    [ 4016.680000]  r4:cf803800 r3:cf862000
    [ 4016.690000] ---[ end trace 51114df08f429115 ]---
    
    This is triggered by calling eth_schedule_poll(sw) after the IRQ has
    been re-enabled.
    
    Rework the network code to only enable IRQ's again if NAPI agrees it's
    safe to do so.
    
    Also only re-enable IRQ *after* cleaning up the RX ring and re-enabling
    DMA, which otherwise resulted in ugly warnings regarding dirty page
    fragments.
    These popped up nearly immediately when building the kernel with O2 iso
    Os.
    
    --> Note that this change fixes stability issues, at the cost of ~8%
    throughput performance.
    
    While at it, also change the iface name being used in warning prints,
    making it more obvious: "switch%d" --> "cns3xxx_eth"
    
    The changes have been tested on 4 boards, each moving ~30TB of data
    
    Signed-off-by: Koen Vandeputte <koen.vandeputte at ncentric.com>
---
 .../140-fix-ethernet-irq-imbalance.patch           | 77 ++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/target/linux/cns3xxx/patches-4.14/140-fix-ethernet-irq-imbalance.patch b/target/linux/cns3xxx/patches-4.14/140-fix-ethernet-irq-imbalance.patch
new file mode 100644
index 0000000..3538586
--- /dev/null
+++ b/target/linux/cns3xxx/patches-4.14/140-fix-ethernet-irq-imbalance.patch
@@ -0,0 +1,77 @@
+--- a/drivers/net/ethernet/cavium/cns3xxx_eth.c
++++ b/drivers/net/ethernet/cavium/cns3xxx_eth.c
+@@ -712,26 +712,20 @@ static int eth_poll(struct napi_struct *
+ 	}
+ 
+ 	rx_ring->cur_index = i;
+-	if (!received) {
+-		napi_complete(napi);
+-		enable_irq(sw->rx_irq);
+-		budget = 0;
+-
+-		/* If 1 or more frames came in during IRQ enable, re-schedule */
+-		if (rx_ring->desc[i].cown)
+-			eth_schedule_poll(sw);
+-	}
+-
+-	spin_lock_bh(&tx_lock);
+-	eth_complete_tx(sw);
+-	spin_unlock_bh(&tx_lock);
+ 
+ 	cns3xxx_alloc_rx_buf(sw, received);
+-
+ 	wmb();
+ 	enable_rx_dma(sw);
+ 
+-	return budget;
++	if (received < budget && napi_complete_done(napi, received)) {
++		enable_irq(sw->rx_irq);
++	}
++	
++	spin_lock_bh(&tx_lock);
++	eth_complete_tx(sw);
++	spin_unlock_bh(&tx_lock);
++	
++	return received;
+ }
+ 
+ static void eth_set_desc(struct sw *sw, struct _tx_ring *tx_ring, int index,
+@@ -856,18 +850,6 @@ static void cns3xxx_get_drvinfo(struct n
+ 	strcpy(info->bus_info, "internal");
+ }
+ 
+-static int cns3xxx_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+-{
+-	struct port *port = netdev_priv(dev);
+-	return phy_ethtool_gset(port->phydev, cmd);
+-}
+-
+-static int cns3xxx_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+-{
+-	struct port *port = netdev_priv(dev);
+-	return phy_ethtool_sset(port->phydev, cmd);
+-}
+-
+ static int cns3xxx_nway_reset(struct net_device *dev)
+ {
+ 	struct port *port = netdev_priv(dev);
+@@ -876,8 +858,8 @@ static int cns3xxx_nway_reset(struct net
+ 
+ static struct ethtool_ops cns3xxx_ethtool_ops = {
+ 	.get_drvinfo = cns3xxx_get_drvinfo,
+-	.get_settings = cns3xxx_get_settings,
+-	.set_settings = cns3xxx_set_settings,
++	.get_link_ksettings = phy_ethtool_get_link_ksettings,
++	.set_link_ksettings = phy_ethtool_set_link_ksettings,
+ 	.nway_reset = cns3xxx_nway_reset,
+ 	.get_link = ethtool_op_get_link,
+ };
+@@ -1177,7 +1159,7 @@ static int eth_init_one(struct platform_
+ 		goto err_remove_mdio;
+ 	}
+ 
+-	strcpy(napi_dev->name, "switch%d");
++	strcpy(napi_dev->name, "cns3xxx_eth");
+ 	napi_dev->features = NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST;
+ 
+ 	SET_NETDEV_DEV(napi_dev, &pdev->dev);



More information about the lede-commits mailing list