Problem with PHY state machine when using interrupts
Mason
slash.tmp at free.fr
Tue Jul 25 03:51:55 PDT 2017
On 25/07/2017 00:39, Florian Fainelli wrote:
> diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
> index d0626bf5c540..652e24b53f3f 100644
> --- a/drivers/net/phy/phy.c
> +++ b/drivers/net/phy/phy.c
> @@ -968,6 +968,8 @@ void phy_stop(struct phy_device *phydev)
> * of rtnl_lock(), but PHY_HALTED shall guarantee phy_change()
> * will not reenable interrupts.
> */
> + if (phy_interrupt_is_valid(phydev))
> + phy_change(phydev);
> }
> EXPORT_SYMBOL(phy_stop);
>
When setting link down:
- kernel logs Unbalanced enable for IRQ 30 (PHY interrupt)
- serial console hangs
# ip link set eth0 up
[ 125.800057] ENTER nb8800_tangox_reset
[ 125.803790] ++ETH++ gw8 reg=f0026424 val=00
[ 125.817446] ++ETH++ gw8 reg=f0026424 val=01
[ 125.821768] ++ETH++ gw16 reg=f0026420 val=0050
[ 125.826241] ENTER nb8800_hw_init
[ 125.829507] ++ETH++ gw8 reg=f0026000 val=1c
[ 125.833808] ++ETH++ gw8 reg=f0026001 val=05
[ 125.838122] ++ETH++ gw8 reg=f0026004 val=22
[ 125.842421] ++ETH++ gw8 reg=f0026008 val=04
[ 125.846717] ++ETH++ gw8 reg=f0026014 val=0c
[ 125.851015] ++ETH++ gw8 reg=f0026051 val=00
[ 125.855310] ++ETH++ gw8 reg=f0026052 val=ff
[ 125.859607] ++ETH++ gw8 reg=f0026054 val=40
[ 125.863903] ++ETH++ gw8 reg=f0026060 val=00
[ 125.868226] ++ETH++ gw8 reg=f0026061 val=c3
[ 125.872534] ENTER nb8800_mc_init
[ 125.875800] ++ETH++ gw8 reg=f002602e val=00
[ 125.880096] ENTER nb8800_tangox_init
[ 125.883697] ++ETH++ gw8 reg=f0026400 val=01
[ 125.887993] ENTER nb8800_tango4_init
[ 125.891592] ENTER nb8800_update_mac_addr
[ 125.895538] ++ETH++ gw8 reg=f002606a val=00
[ 125.899835] ++ETH++ gw8 reg=f002606b val=16
[ 125.904132] ++ETH++ gw8 reg=f002606c val=e8
[ 125.908429] ++ETH++ gw8 reg=f002606d val=5e
[ 125.912725] ++ETH++ gw8 reg=f002606e val=65
[ 125.917022] ++ETH++ gw8 reg=f002606f val=bc
[ 125.921319] ++ETH++ gw8 reg=f002603c val=00
[ 125.925618] ++ETH++ gw8 reg=f002603d val=16
[ 125.929912] ++ETH++ gw8 reg=f002603e val=e8
[ 125.934210] ++ETH++ gw8 reg=f002603f val=5e
[ 125.938505] ++ETH++ gw8 reg=f0026040 val=65
[ 125.942802] ++ETH++ gw8 reg=f0026041 val=bc
[ 125.947095] ENTER nb8800_open
[ 125.950082] ENTER nb8800_dma_init
[ 125.954025] ++ETH++ gw8 reg=f0026004 val=23
[ 125.958331] ++ETH++ gw8 reg=f0026000 val=1d
[ 126.027848] ENTER nb8800_set_rx_mode
[ 126.031451] ENTER nb8800_mc_init
[ 126.034702] ++ETH++ gw8 reg=f002602e val=00
[ 126.039334] Atheros 8035 ethernet 26000.nb8800-mii:04: PHY state change UP -> AN
[ 126.046838] ENTER nb8800_set_rx_mode
[ 126.050438] ENTER nb8800_mc_init
[ 126.053688] ++ETH++ gw8 reg=f002602e val=00
[ 126.057983] ++ETH++ gw8 reg=f0026028 val=01
[ 126.062279] ++ETH++ gw8 reg=f0026029 val=00
[ 126.066573] ++ETH++ gw8 reg=f002602a val=5e
[ 126.070868] ++ETH++ gw8 reg=f002602b val=00
[ 126.075162] ++ETH++ gw8 reg=f002602c val=00
[ 126.079457] ++ETH++ gw8 reg=f002602d val=01
[ 126.083751] ENTER nb8800_mc_init
[ 126.086997] ++ETH++ gw8 reg=f002602e val=ff
[ 129.426741] ENTER nb8800_link_reconfigure
[ 129.430800] PRIV link=0 speed=0 duplex=0
[ 129.434753] PHYDEV link=1 speed=1000 duplex=1
[ 129.439141] ENTER nb8800_mac_config
[ 129.442662] ++ETH++ gw8 reg=f0026050 val=01
[ 129.446962] ++ETH++ gw8 reg=f002601c val=ff
[ 129.451262] ++ETH++ gw8 reg=f0026044 val=81
[ 129.455563] ENTER nb8800_pause_config
[ 129.459252] ++ETH++ gw8 reg=f0026004 val=2b
[ 129.463558] nb8800 26000.ethernet eth0: Link is Up - 1Gbps/Full - flow control rx/tx
[ 129.471355] Atheros 8035 ethernet 26000.nb8800-mii:04: PHY state change AN -> RUNNING
# ip link set eth0 down
[ 170.933454] ENTER nb8800_set_rx_mode
[ 170.937067] ENTER nb8800_mc_init
[ 170.940318] ++ETH++ gw8 reg=f002602e val=00
[ 170.944613] ++ETH++ gw8 reg=f0026028 val=01
[ 170.948907] ++ETH++ gw8 reg=f0026029 val=00
[ 170.953199] ++ETH++ gw8 reg=f002602a val=5e
[ 170.957494] ++ETH++ gw8 reg=f002602b val=00
[ 170.961786] ++ETH++ gw8 reg=f002602c val=00
[ 170.966079] ++ETH++ gw8 reg=f002602d val=01
[ 170.970371] ENTER nb8800_mc_init
[ 170.973616] ++ETH++ gw8 reg=f002602e val=ff
[ 170.977962] ENTER nb8800_stop
[ 170.981210] ------------[ cut here ]------------
[ 170.985860] WARNING: CPU: 0 PID: 964 at kernel/irq/manage.c:498 __enable_irq+0x44/0x68
[ 170.993815] Unbalanced enable for IRQ 30
[ 170.997751] Modules linked in:
[ 171.000821] CPU: 0 PID: 964 Comm: ip Not tainted 4.13.0-rc1 #154
[ 171.006854] Hardware name: Sigma Tango DT
[ 171.010896] [<c010e88c>] (unwind_backtrace) from [<c010b014>] (show_stack+0x10/0x14)
[ 171.018686] [<c010b014>] (show_stack) from [<c04ab694>] (dump_stack+0x78/0x8c)
[ 171.025950] [<c04ab694>] (dump_stack) from [<c0118228>] (__warn+0xe8/0x100)
[ 171.032948] [<c0118228>] (__warn) from [<c0118278>] (warn_slowpath_fmt+0x38/0x48)
[ 171.040471] [<c0118278>] (warn_slowpath_fmt) from [<c015f0e4>] (__enable_irq+0x44/0x68)
[ 171.048519] [<c015f0e4>] (__enable_irq) from [<c015f13c>] (enable_irq+0x34/0x6c)
[ 171.055956] [<c015f13c>] (enable_irq) from [<c037fa84>] (phy_change+0x98/0x13c)
[ 171.063305] [<c037fa84>] (phy_change) from [<c037fc04>] (phy_stop+0x90/0x94)
[ 171.070391] [<c037fc04>] (phy_stop) from [<c0385330>] (nb8800_stop+0x24/0x84)
[ 171.077567] [<c0385330>] (nb8800_stop) from [<c03fcdb4>] (__dev_close_many+0x88/0xd0)
[ 171.085439] [<c03fcdb4>] (__dev_close_many) from [<c03fcf1c>] (__dev_close+0x24/0x38)
[ 171.093311] [<c03fcf1c>] (__dev_close) from [<c04058c4>] (__dev_change_flags+0x94/0x144)
[ 171.101445] [<c04058c4>] (__dev_change_flags) from [<c040598c>] (dev_change_flags+0x18/0x48)
[ 171.109929] [<c040598c>] (dev_change_flags) from [<c046aed0>] (devinet_ioctl+0x6e0/0x7a0)
[ 171.118152] [<c046aed0>] (devinet_ioctl) from [<c046d360>] (inet_ioctl+0x194/0x1c0)
[ 171.125852] [<c046d360>] (inet_ioctl) from [<c03e5d80>] (sock_ioctl+0x148/0x2fc)
[ 171.133290] [<c03e5d80>] (sock_ioctl) from [<c01e25f0>] (do_vfs_ioctl+0x9c/0x8e8)
[ 171.140814] [<c01e25f0>] (do_vfs_ioctl) from [<c01e2e70>] (SyS_ioctl+0x34/0x5c)
[ 171.148162] [<c01e2e70>] (SyS_ioctl) from [<c01076c0>] (ret_fast_syscall+0x0/0x3c)
[ 171.155769] ---[ end trace ac6e716156da6518 ]---
[ 171.160498] ENTER nb8800_link_reconfigure
[ 171.161322] ++ETH++ gw8 reg=f0026004 val=0b
[ 171.161325] ++ETH++ gw8 reg=f0026044 val=85
[ 171.173176] PRIV link=1 speed=1000 duplex=1
[ 171.177388] PHYDEV link=0 speed=1000 duplex=1
[ 171.181793] nb8800 26000.ethernet eth0: Link is Down
[ 171.661513] ++ETH++ gw8 reg=f0026004 val=2b
[ 171.665812] ++ETH++ gw8 reg=f0026044 val=81
[ 171.670155] ++ETH++ gw8 reg=f0026004 val=2a
^C^C^C^C
Using SysRq to find *where* it hangs...
[ 187.010561] NMI backtrace for cpu 1
[ 187.010567] CPU: 1 PID: 964 Comm: ip Tainted: G W 4.13.0-rc1 #154
[ 187.010569] Hardware name: Sigma Tango DT
[ 187.010571] task: deedb580 task.stack: ded7e000
[ 187.010577] PC is at nb8800_mac_tx+0x4/0x54
[ 187.010580] LR is at nb8800_stop+0x5c/0x84
[ 187.010583] pc : [<c0383ba8>] lr : [<c0385368>] psr: 200f0013
...
[ 187.010675] [<c010bb4c>] (__irq_svc) from [<c0383ba8>] (nb8800_mac_tx+0x4/0x54)
[ 187.010684] [<c0383ba8>] (nb8800_mac_tx) from [<c03fcdb4>] (__dev_close_many+0x88/0xd0)
[ 187.010691] [<c03fcdb4>] (__dev_close_many) from [<c03fcf1c>] (__dev_close+0x24/0x38)
[ 187.010697] [<c03fcf1c>] (__dev_close) from [<c04058c4>] (__dev_change_flags+0x94/0x144)
[ 187.010703] [<c04058c4>] (__dev_change_flags) from [<c040598c>] (dev_change_flags+0x18/0x48)
[ 187.010710] [<c040598c>] (dev_change_flags) from [<c046aed0>] (devinet_ioctl+0x6e0/0x7a0)
[ 187.010717] [<c046aed0>] (devinet_ioctl) from [<c046d360>] (inet_ioctl+0x194/0x1c0)
[ 187.010725] [<c046d360>] (inet_ioctl) from [<c03e5d80>] (sock_ioctl+0x148/0x2fc)
[ 187.010735] [<c03e5d80>] (sock_ioctl) from [<c01e25f0>] (do_vfs_ioctl+0x9c/0x8e8)
[ 187.010743] [<c01e25f0>] (do_vfs_ioctl) from [<c01e2e70>] (SyS_ioctl+0x34/0x5c)
[ 187.010749] [<c01e2e70>] (SyS_ioctl) from [<c01076c0>
I'm not sure that phy_change is supposed to be called from
process context?
/* phy_change - Called by the phy_interrupt to handle PHY changes */
Moving the call to phy_stop() down after all the MAC tear down
avoids the hang.
As far as I understand, when we are shutting everything down,
we don't need the phy_state_machine to run asynchronously.
We can run it synchronously one last time after the delayed
stuff has been disabled.
Regards.
More information about the linux-arm-kernel
mailing list