regression in xgene-enet in 4.8-rc series, oops from xgene_enet_probe

Riku Voipio riku.voipio at linaro.org
Wed Aug 17 01:27:57 PDT 2016


On 26 July 2016 at 03:12, Iyappan Subramanian <isubramanian at apm.com> wrote:
> When the driver is configured as kernel module and when it gets
> unloaded and reloaded, kernel crash was observed.  This patch
> addresses the software cleanup by doing the following,
>
> - Moved register_netdev call after hardware is ready
> - Since ndev is not ready, added set_irq_name to set irq name
> - Since ndev is not ready, changed mdio_bus->parent to pdev->dev
> - Replaced netif_start(stop)_queue by netif_tx_start(stop)_queues
> - Removed napi_del call since it's called by free_netdev
> - Added dev_close call, within remove
> - Added shutdown callback
> - Changed to use dmam_ APIs

Bisecting points this patch, commited as
cb0366b7c16427a25923350b69f53a5b1345a34b the cause of oops when
booting apm mustang:

[    1.670201] ------------[ cut here ]------------
[    1.674804] WARNING: CPU: 2 PID: 1 at ../net/core/dev.c:6696
rollback_registered_many+0x60/0x300
[    1.683543] Modules linked in: realtek
[    1.687291]
[    1.688774] CPU: 2 PID: 1 Comm: swapper/0 Not tainted
4.8.0-rc2-00037-g3ec60b92d3ba #1
[    1.696648] Hardware name: APM X-Gene Mustang board (DT)
[    1.701930] task: ffff8003ee078000 task.stack: ffff8003ee054000
[    1.707819] PC is at rollback_registered_many+0x60/0x300
[    1.713102] LR is at rollback_registered_many+0x30/0x300
[    1.718384] pc : [] lr : [] pstate: 20000045
[    1.725739] sp : ffff8003ee057b00
[    1.729034] x29: ffff8003ee057b00 x28: ffff8003eda1a000
[    1.734338] x27: 0000000000000002 x26: ffff8003ebcba970
[    1.739641] x25: ffff8003eda1a208 x24: ffff8003eda1a010
[    1.744945] x23: ffff8003ee057c58 x22: ffff8003ebcba000
[    1.750247] x21: 00000000ffffffed x20: ffff8003ee057b70
[    1.755549] x19: ffff8003ee057b50 x18: ffff000008dfafff
[    1.760852] x17: 0000000000000007 x16: 0000000000000001
[    1.766154] x15: ffff000008ce2000 x14: ffffffffffffffff
[    1.771458] x13: 0000000000000008 x12: 0000000000000030
[    1.776760] x11: 0000000000000030 x10: 0101010101010101
[    1.782062] x9 : 0000000000000000 x8 : ffff8003df80c700
[    1.787365] x7 : 0000000000000000 x6 : 0000000000000001
[    1.792668] x5 : dead000000000100 x4 : dead000000000200
[    1.797971] x3 : ffff8003ebcba070 x2 : 0000000000000000
[    1.803273] x1 : ffff8003ee057b00 x0 : ffff8003ebcba000
[    1.808575]
[    1.810057] ---[ end trace 93f1dda704e63533 ]---
[    1.814648] Call trace:
[    1.816207] ata2: SATA link down (SStatus 0 SControl 4300)
[    1.822535] Exception stack(0xffff8003ee057930 to 0xffff8003ee057a60)
[    1.828941] 7920:
ffff8003ee057b50 0001000000000000
[    1.836729] 7940: ffff8003ee057b00 ffff000008773c18
ffff8003ee057980 ffff000008849a1c
[    1.844517] 7960: 0000000000000009 ffff000008e50000
ffff8003ee0579a0 ffff0000086eb03c
[    1.852305] 7980: ffff000008dbcde8 ffff8003fffe1ca0
0000000000000040 ffff8003ee057998
[    1.860094] 79a0: ffff8003ee0579e0 ffff0000086eb1b0
0000000000000004 ffff8003ee057a4c
f8003ebcba000 ffff8003ee057b00
[    1.875669] 79e0: 0000000000000000 ffff8003ebcba070
dead000000000200 dead000000000100
[    1.883457] 7a00: 0000000000000001 0000000000000000
ffff8003df80c700 0000000000000000
[    1.884198] ata4: SATA link down (SStatus 0 SControl 4300)
[    1.884211] ata3: SATA link down (SStatus 0 SControl 4300)
[    1.902153] 7a20: 0101010101010101 0000000000000030
0000000000000030 0000000000000008
[    1.909941] 7a40: ffffffffffffffff ffff000008ce2000
0000000000000001 0000000000000007
[    1.917730] [] rollback_registered_many+0x60/0x300
[    1.924050] [] rollback_registered+0x28/0x40
[    1.929852] [] unregister_netdevice_queue+0x78/0xb8
[    1.936259] [] unregister_netdev+0x20/0x30
[    1.941889] [] xgene_enet_probe+0x638/0xf98
[    1.947605] [] platform_drv_probe+0x50/0xb8
[    1.953320] [] driver_probe_device+0x204/0x2b0
[    1.959294] [] __driver_attach+0xac/0xb0
[    1.964751] [] bus_for_each_dev+0x60/0xa0
[    1.970293] [] driver_attach+0x20/0x28
[    1.975576] [] bus_add_driver+0x1d0/0x238
[    1.981118] [] driver_register+0x60/0xf8
[    1.986574] [] __platform_driver_register+0x40/0x48
[    1.992982] [] xgene_enet_driver_init+0x18/0x20
[    1.999044] [] do_one_initcall+0x38/0x128
[    2.004588] [] kernel_init_freeable+0x1ac/0x250
[    2.010651] [] kernel_init+0x10/0x100
[    2.015847] [] ret_from_fork+0x10/0x40
[    2.021152] network todo 'eth%d' but state 0

Picked up from:

https://storage.kernelci.org/mainline/v4.8-rc2-37-g3ec60b92d3ba/arm64-defconfig/lab-cambridge/boot-apm-mustang.html

Visible on all mainline/apt-mustang boot reports. net-next seems to
have a fix for this.

Riku

> Signed-off-by: Iyappan Subramanian <isubramanian at apm.com>
> Tested-by: Fushen Chen <fchen at apm.com>
> Tested-by: Toan Le <toanle at apm.com>
> ---
>  drivers/net/ethernet/apm/xgene/xgene_enet_hw.c   |   2 +-
>  drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 120 ++++++++++++++---------
>  2 files changed, 73 insertions(+), 49 deletions(-)
>
> diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
> index 009fb8e..4f98749 100644
> --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
> +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
> @@ -901,7 +901,7 @@ int xgene_enet_mdio_config(struct xgene_enet_pdata *pdata)
>                  ndev->name);
>
>         mdio_bus->priv = pdata;
> -       mdio_bus->parent = &ndev->dev;
> +       mdio_bus->parent = &pdata->pdev->dev;
>
>         ret = xgene_mdiobus_register(pdata, mdio_bus);
>         if (ret) {
> diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
> index f79950a..87e5929 100644
> --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
> +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
> @@ -608,6 +608,30 @@ static void xgene_enet_timeout(struct net_device *ndev)
>         }
>  }
>
> +static void xgene_enet_set_irq_name(struct net_device *ndev)
> +{
> +       struct xgene_enet_pdata *pdata = netdev_priv(ndev);
> +       struct xgene_enet_desc_ring *ring;
> +       int i;
> +
> +       for (i = 0; i < pdata->rxq_cnt; i++) {
> +               ring = pdata->rx_ring[i];
> +               if (!pdata->cq_cnt) {
> +                       snprintf(ring->irq_name, IRQ_ID_SIZE, "%s-rx-txc",
> +                                ndev->name);
> +               } else {
> +                       snprintf(ring->irq_name, IRQ_ID_SIZE, "%s-rx-%d",
> +                                ndev->name, i);
> +               }
> +       }
> +
> +       for (i = 0; i < pdata->cq_cnt; i++) {
> +               ring = pdata->tx_ring[i]->cp_ring;
> +               snprintf(ring->irq_name, IRQ_ID_SIZE, "%s-txc-%d",
> +                        ndev->name, i);
> +       }
> +}
> +
>  static int xgene_enet_register_irq(struct net_device *ndev)
>  {
>         struct xgene_enet_pdata *pdata = netdev_priv(ndev);
> @@ -615,6 +639,7 @@ static int xgene_enet_register_irq(struct net_device *ndev)
>         struct xgene_enet_desc_ring *ring;
>         int ret = 0, i;
>
> +       xgene_enet_set_irq_name(ndev);
>         for (i = 0; i < pdata->rxq_cnt; i++) {
>                 ring = pdata->rx_ring[i];
>                 irq_set_status_flags(ring->irq, IRQ_DISABLE_UNLAZY);
> @@ -723,7 +748,7 @@ static int xgene_enet_open(struct net_device *ndev)
>
>         mac_ops->tx_enable(pdata);
>         mac_ops->rx_enable(pdata);
> -       netif_start_queue(ndev);
> +       netif_tx_start_all_queues(ndev);
>
>         return ret;
>  }
> @@ -734,7 +759,7 @@ static int xgene_enet_close(struct net_device *ndev)
>         const struct xgene_mac_ops *mac_ops = pdata->mac_ops;
>         int i;
>
> -       netif_stop_queue(ndev);
> +       netif_tx_stop_all_queues(ndev);
>         mac_ops->tx_disable(pdata);
>         mac_ops->rx_disable(pdata);
>
> @@ -759,7 +784,7 @@ static void xgene_enet_delete_ring(struct xgene_enet_desc_ring *ring)
>         dev = ndev_to_dev(ring->ndev);
>
>         pdata->ring_ops->clear(ring);
> -       dma_free_coherent(dev, ring->size, ring->desc_addr, ring->dma);
> +       dmam_free_coherent(dev, ring->size, ring->desc_addr, ring->dma);
>  }
>
>  static void xgene_enet_delete_desc_rings(struct xgene_enet_pdata *pdata)
> @@ -834,7 +859,7 @@ static void xgene_enet_free_desc_ring(struct xgene_enet_desc_ring *ring)
>
>         if (ring->desc_addr) {
>                 pdata->ring_ops->clear(ring);
> -               dma_free_coherent(dev, ring->size, ring->desc_addr, ring->dma);
> +               dmam_free_coherent(dev, ring->size, ring->desc_addr, ring->dma);
>         }
>         devm_kfree(dev, ring);
>  }
> @@ -892,9 +917,10 @@ static struct xgene_enet_desc_ring *xgene_enet_create_desc_ring(
>                         struct net_device *ndev, u32 ring_num,
>                         enum xgene_enet_ring_cfgsize cfgsize, u32 ring_id)
>  {
> -       struct xgene_enet_desc_ring *ring;
>         struct xgene_enet_pdata *pdata = netdev_priv(ndev);
>         struct device *dev = ndev_to_dev(ndev);
> +       struct xgene_enet_desc_ring *ring;
> +       void *irq_mbox_addr;
>         int size;
>
>         size = xgene_enet_get_ring_size(dev, cfgsize);
> @@ -911,8 +937,8 @@ static struct xgene_enet_desc_ring *xgene_enet_create_desc_ring(
>         ring->cfgsize = cfgsize;
>         ring->id = ring_id;
>
> -       ring->desc_addr = dma_zalloc_coherent(dev, size, &ring->dma,
> -                                             GFP_KERNEL);
> +       ring->desc_addr = dmam_alloc_coherent(dev, size, &ring->dma,
> +                                             GFP_KERNEL | __GFP_ZERO);
>         if (!ring->desc_addr) {
>                 devm_kfree(dev, ring);
>                 return NULL;
> @@ -920,14 +946,16 @@ static struct xgene_enet_desc_ring *xgene_enet_create_desc_ring(
>         ring->size = size;
>
>         if (is_irq_mbox_required(pdata, ring)) {
> -               ring->irq_mbox_addr = dma_zalloc_coherent(dev, INTR_MBOX_SIZE,
> -                               &ring->irq_mbox_dma, GFP_KERNEL);
> -               if (!ring->irq_mbox_addr) {
> -                       dma_free_coherent(dev, size, ring->desc_addr,
> -                                         ring->dma);
> +               irq_mbox_addr = dmam_alloc_coherent(dev, INTR_MBOX_SIZE,
> +                                                   &ring->irq_mbox_dma,
> +                                                   GFP_KERNEL | __GFP_ZERO);
> +               if (!irq_mbox_addr) {
> +                       dmam_free_coherent(dev, size, ring->desc_addr,
> +                                          ring->dma);
>                         devm_kfree(dev, ring);
>                         return NULL;
>                 }
> +               ring->irq_mbox_addr = irq_mbox_addr;
>         }
>
>         ring->cmd_base = xgene_enet_ring_cmd_base(pdata, ring);
> @@ -988,6 +1016,7 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev)
>         u8 eth_bufnum = pdata->eth_bufnum;
>         u8 bp_bufnum = pdata->bp_bufnum;
>         u16 ring_num = pdata->ring_num;
> +       __le64 *exp_bufs;
>         u16 ring_id;
>         int i, ret, size;
>
> @@ -1019,13 +1048,6 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev)
>                 rx_ring->nbufpool = NUM_BUFPOOL;
>                 rx_ring->buf_pool = buf_pool;
>                 rx_ring->irq = pdata->irqs[i];
> -               if (!pdata->cq_cnt) {
> -                       snprintf(rx_ring->irq_name, IRQ_ID_SIZE, "%s-rx-txc",
> -                                ndev->name);
> -               } else {
> -                       snprintf(rx_ring->irq_name, IRQ_ID_SIZE, "%s-rx%d",
> -                                ndev->name, i);
> -               }
>                 buf_pool->rx_skb = devm_kcalloc(dev, buf_pool->slots,
>                                                 sizeof(struct sk_buff *),
>                                                 GFP_KERNEL);
> @@ -1052,13 +1074,13 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev)
>                 }
>
>                 size = (tx_ring->slots / 2) * sizeof(__le64) * MAX_EXP_BUFFS;
> -               tx_ring->exp_bufs = dma_zalloc_coherent(dev, size,
> -                                                       &dma_exp_bufs,
> -                                                       GFP_KERNEL);
> -               if (!tx_ring->exp_bufs) {
> +               exp_bufs = dmam_alloc_coherent(dev, size, &dma_exp_bufs,
> +                                              GFP_KERNEL | __GFP_ZERO);
> +               if (!exp_bufs) {
>                         ret = -ENOMEM;
>                         goto err;
>                 }
> +               tx_ring->exp_bufs = exp_bufs;
>
>                 pdata->tx_ring[i] = tx_ring;
>
> @@ -1078,8 +1100,6 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev)
>
>                         cp_ring->irq = pdata->irqs[pdata->rxq_cnt + i];
>                         cp_ring->index = i;
> -                       snprintf(cp_ring->irq_name, IRQ_ID_SIZE, "%s-txc%d",
> -                                ndev->name, i);
>                 }
>
>                 cp_ring->cp_skb = devm_kcalloc(dev, tx_ring->slots,
> @@ -1549,22 +1569,6 @@ static void xgene_enet_napi_add(struct xgene_enet_pdata *pdata)
>         }
>  }
>
> -static void xgene_enet_napi_del(struct xgene_enet_pdata *pdata)
> -{
> -       struct napi_struct *napi;
> -       int i;
> -
> -       for (i = 0; i < pdata->rxq_cnt; i++) {
> -               napi = &pdata->rx_ring[i]->napi;
> -               netif_napi_del(napi);
> -       }
> -
> -       for (i = 0; i < pdata->cq_cnt; i++) {
> -               napi = &pdata->tx_ring[i]->cp_ring->napi;
> -               netif_napi_del(napi);
> -       }
> -}
> -
>  static int xgene_enet_probe(struct platform_device *pdev)
>  {
>         struct net_device *ndev;
> @@ -1628,12 +1632,6 @@ static int xgene_enet_probe(struct platform_device *pdev)
>                 goto err;
>         }
>
> -       ret = register_netdev(ndev);
> -       if (ret) {
> -               netdev_err(ndev, "Failed to register netdev\n");
> -               goto err;
> -       }
> -
>         ret = xgene_enet_init_hw(pdata);
>         if (ret)
>                 goto err_netdev;
> @@ -1648,7 +1646,14 @@ static int xgene_enet_probe(struct platform_device *pdev)
>         }
>
>         xgene_enet_napi_add(pdata);
> +       ret = register_netdev(ndev);
> +       if (ret) {
> +               netdev_err(ndev, "Failed to register netdev\n");
> +               goto err;
> +       }
> +
>         return 0;
> +
>  err_netdev:
>         unregister_netdev(ndev);
>  err:
> @@ -1666,10 +1671,14 @@ static int xgene_enet_remove(struct platform_device *pdev)
>         mac_ops = pdata->mac_ops;
>         ndev = pdata->ndev;
>
> +       rtnl_lock();
> +       if (netif_running(ndev))
> +               dev_close(ndev);
> +       rtnl_unlock();
> +
>         mac_ops->rx_disable(pdata);
>         mac_ops->tx_disable(pdata);
>
> -       xgene_enet_napi_del(pdata);
>         if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII)
>                 xgene_enet_mdio_remove(pdata);
>         unregister_netdev(ndev);
> @@ -1680,6 +1689,20 @@ static int xgene_enet_remove(struct platform_device *pdev)
>         return 0;
>  }
>
> +static void xgene_enet_shutdown(struct platform_device *pdev)
> +{
> +       struct xgene_enet_pdata *pdata;
> +
> +       pdata = platform_get_drvdata(pdev);
> +       if (!pdata)
> +               return;
> +
> +       if (!pdata->ndev)
> +               return;
> +
> +       xgene_enet_remove(pdev);
> +}
> +
>  #ifdef CONFIG_ACPI
>  static const struct acpi_device_id xgene_enet_acpi_match[] = {
>         { "APMC0D05", XGENE_ENET1},
> @@ -1714,6 +1737,7 @@ static struct platform_driver xgene_enet_driver = {
>         },
>         .probe = xgene_enet_probe,
>         .remove = xgene_enet_remove,
> +       .shutdown = xgene_enet_shutdown,
>  };
>
>  module_platform_driver(xgene_enet_driver);
> --
> 1.9.1
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel



More information about the linux-arm-kernel mailing list