nvmf/rdma host crash during heavy load and keep alive recovery

Steve Wise swise at opengridcomputing.com
Tue Sep 27 08:11:05 PDT 2016


> 
> Steve,
> 
> can you retest with the "Introduce blk_quiesce_queue() and blk_resume_queue()"
> series from Bart applied?
> 

Hey Christoph,

To apply Bart's series, I needed to use Jens' for-4.9/block branch.  But I also
wanted the latest nvme fixes in linux-4.8-rc8, so I rebased Jens' branch onto
rc8, then applied Bart's series (which needed a small tweak to patch 2). On top
of this I have some debug patches that will BUG_ON() if it detects freed RDMA
objects (requires mem debug on so freed memory has the 0x6b6b... stamp).   This
code base can be perused at:

https://github.com/larrystevenwise/nvme-fabrics/commits/block-for-4.9

I then tried to reproduce, and still hit a crash.  I'm debugging now.  

[  414.649731] nvme_rdma: nvme_rdma_queue_rq BAZINGA! hctx ffff880fb8fc1548 ns
ffff88101f049798 queue ffff880fbc1443a8 hctx1 ffff880fba47ca68 hctx2
ffff880fb8fc1548 rq ffff880fbb1736c0 req ffff880fbb173830
[  414.649830] nvme_rdma: nvme_rdma_queue_rq BAZINGA! hctx ffff880fbc1164f8 ns
ffff88101f049798 queue ffff880fbc144328 hctx1 ffff880fba47afd8 hctx2
ffff880fbc1164f8 rq ffff880fbb031240 req ffff880fbb0313b0
[  414.649869] ------------[ cut here ]------------
[  414.649870] kernel BUG at drivers/nvme/host/rdma.c:1434!
[  414.649872] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC
[  414.649907] Modules linked in: nvme_rdma nvme_fabrics brd iw_cxgb4 cxgb4
ip6table_filter ip6_tables ebtable_nat ebtables ipt_MASQUERADE
nf_nat_masquerade_ipv4 iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4
nf_defrag_ipv4 xt_state nf_conntrack ipt_REJECT nf_reject_ipv4 xt_CHECKSUM
iptable_mangle iptable_filter ip_tables bridge 8021q mrp garp stp llc cachefiles
fscache rdma_ucm rdma_cm iw_cm ib_ipoib ib_cm ib_uverbs ib_umad ocrdma be2net
iw_nes libcrc32c iw_cxgb3 cxgb3 mdio ib_qib rdmavt mlx5_ib mlx5_core mlx4_ib
mlx4_en mlx4_core ib_mthca ib_core binfmt_misc dm_mirror dm_region_hash dm_log
vhost_net macvtap macvlan vhost tun kvm irqbypass uinput iTCO_wdt
iTCO_vendor_support mxm_wmi pcspkr ipmi_si ipmi_msghandler dm_mod i2c_i801
i2c_smbus sg lpc_ich mfd_core mei_me mei nvme nvme_core igb dca ptp
[  414.649916]  pps_core wmi ext4(E) mbcache(E) jbd2(E) sd_mod(E) ahci(E)
libahci(E) libata(E) mgag200(E) ttm(E) drm_kms_helper(E) drm(E) fb_sys_fops(E)
sysimgblt(E) sysfillrect(E) syscopyarea(E) i2c_algo_bit(E) i2c_core(E) [last
unloaded: cxgb4]
[  414.649918] CPU: 0 PID: 14173 Comm: fio Tainted: G        W   E
4.8.0-rc8-stevo-for-harsha-dbg+ #48
[  414.649919] Hardware name: Supermicro X9DR3-F/X9DR3-F, BIOS 3.2a 07/09/2015
[  414.649920] task: ffff880fa4410040 task.stack: ffff880fa44fc000
[  414.649926] RIP: 0010:[<ffffffffa065eef0>]  [<ffffffffa065eef0>]
nvme_rdma_queue_rq+0x2d0/0x360 [nvme_rdma]
[  414.649927] RSP: 0018:ffff880fa44ff9a8  EFLAGS: 00010296
[  414.649928] RAX: 00000000000000be RBX: ffff880fbc144328 RCX: 0000000000000000
[  414.649929] RDX: 0000000000000001 RSI: 0000000000000286 RDI: 0000000000000286
[  414.649930] RBP: ffff880fa44ffa08 R08: 000000000005aeb2 R09: ffffffff820360de
[  414.649931] R10: 00000000000000e0 R11: 000000000000000f R12: ffff880fbb031240
[  414.649932] R13: ffff880fbc1164f8 R14: ffff880fbb0313b0 R15: ffff88101f049798
[  414.649933] FS:  00007ffb2afc5720(0000) GS:ffff881036e00000(0000)
knlGS:0000000000000000
[  414.649934] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  414.649935] CR2: 00007ffb0942e194 CR3: 0000000fa3636000 CR4: 00000000000406f0
[  414.649936] Stack:
[  414.649939]  ffff880fbc1164f8 ffff880fbb031240 ffff880fbb0313b0
ffff88103681f3f8
[  414.649941]  ffff880fa44ffa08 ffff880fbb7dcc88 ffff880fbb031240
ffff880fbb031240
[  414.649942]  ffff880fa44ffa78 0000000000000000 ffff880fb8f073e0
0000000000000001
[  414.649943] Call Trace:
[  414.649948]  [<ffffffff813394d1>] __blk_mq_run_hw_queue+0x251/0x400
[  414.649952]  [<ffffffff8115aa58>] ? trace_buffer_unlock_commit_regs+0x68/0x80
[  414.649956]  [<ffffffff8116c796>] ? trace_event_buffer_commit+0x146/0x1d0
[  414.649959]  [<ffffffff81330676>] ?
trace_event_raw_event_block_rq+0x116/0x180
[  414.649961]  [<ffffffff81339742>] blk_mq_run_hw_queue+0x72/0x80
[  414.649962]  [<ffffffff81339cd2>] blk_mq_insert_requests+0x112/0x1d0
[  414.649965]  [<ffffffff81197991>] ? __generic_file_write_iter+0xd1/0x1f0
[  414.649969]  [<ffffffff81204cdc>] ? alloc_debug_processing+0x6c/0x1a0
[  414.649971]  [<ffffffff81339f16>] blk_mq_flush_plug_list+0x186/0x1a0
[  414.649974]  [<ffffffff8127c326>] ? aio_run_iocb+0x126/0x2d0
[  414.649976]  [<ffffffff8132f2c2>] blk_flush_plug_list+0x132/0x2e0
[  414.649979]  [<ffffffff812086e5>] ? kmem_cache_alloc+0x165/0x1e0
[  414.649981]  [<ffffffff8127c731>] ? io_submit_one+0x261/0x530
[  414.649983]  [<ffffffff8116c796>] ? trace_event_buffer_commit+0x146/0x1d0
[  414.649984]  [<ffffffff8132f674>] blk_finish_plug+0x34/0x50
[  414.649986]  [<ffffffff8127cb32>] do_io_submit+0x132/0x210
[  414.649988]  [<ffffffff8127cc20>] SyS_io_submit+0x10/0x20
[  414.649992]  [<ffffffff81003e7d>] do_syscall_64+0x7d/0x230
[  414.649997]  [<ffffffff8106f097>] ? do_page_fault+0x37/0x90
[  414.650001]  [<ffffffff816e3f61>] entry_SYSCALL64_slow_path+0x25/0x25
[  414.650020] Code: 49 89 d8 4c 89 64 24 08 48 8b 43 78 4c 89 f9 4c 89 ea 48 c7
c6 50 11 66 a0 48 c7 c7 38 14 66 a0 48 89 04 24 31 c0 e8 d2 68 b3 e0 <0f> 0b eb
fe 80 3d 39 33 00 00 01 0f 84 6d fd ff ff be 8f 05 00
[  414.650023] RIP  [<ffffffffa065eef0>] nvme_rdma_queue_rq+0x2d0/0x360
[nvme_rdma]
[  414.650023]  RSP <ffff880fa44ff9a8>
crash> gdb list *nvme_rdma_queue_rq+0x2d0
0xffffffffa065eef0 is in nvme_rdma_queue_rq (drivers/nvme/host/rdma.c:1434).
1429            if ((uintptr_t)queue->qp->device ==
(uintptr_t)(0x6b6b6b6b6b6b6b6bUL) ||
1430                (uintptr_t)queue->qp->qp_num ==
(uintptr_t)(0x6b6b6b6b6b6b6b6bUL) ||
1431                (uintptr_t)queue->ib_cq->comp_handler ==
(uintptr_t)(0x6b6b6b6b6b6b6b6bUL) ||
1432                (uintptr_t)queue->ib_cq->device ==
(uintptr_t)(0x6b6b6b6b6b6b6b6bUL)) {
1433                    pr_err("%s BAZINGA! hctx %p ns %p queue %p hctx1 %p
hctx2 %p rq %p req %p\n", __func__, hctx, ns, queue, queue->hctx1, queue->hctx2,
rq, req);
1434                    BUG_ON(1);
1435            }
1436            dev = queue->device->dev;
1437            ib_dma_sync_single_for_cpu(dev, sqe->dma,
1438                            sizeof(struct nvme_command), DMA_TO_DEVICE);
crash> nvme_rdma_request ffff880fbb0313b0
struct nvme_rdma_request {
  mr = 0xffff880fbb7d9ad8,
  sqe = {
    cqe = {
      done = 0xffffffffa065d670 <nvme_rdma_send_done>
    },
    data = 0xffff880fbb7dcc88,
    dma = 67570093192
  },
  sge = {{
      addr = 67570093192,
      length = 64,
      lkey = 0
    }, {
      addr = 67902048256,
      length = 2048,
      lkey = 0
    }},
  num_sge = 2,
  nents = 1,
  inline_data = true,
  reg_wr = {
    wr = {
      next = 0xffff880fb65bf518,
      {
        wr_id = 18446612199876269112,
        wr_cqe = 0xffff880fbb031438
      },
      sg_list = 0x0,
      num_sge = 0,
      opcode = IB_WR_REG_MR,
      send_flags = 0,
      ex = {
        imm_data = 0,
        invalidate_rkey = 0
      }
    },
    mr = 0xffff880fbb7d9ad8,
    key = 40554307,
    access = 7
  },
  reg_cqe = {
    done = 0xffffffffa065d6b0 <nvme_rdma_memreg_done>
  },
  queue = 0xffff880fbc144328,
  sg_table = {
    sgl = 0xffff880fbb031458,
    nents = 1,
    orig_nents = 1
  },
  first_sgl = 0xffff880fbb031458
}
crash> nvme_rdma_queue 0xffff880fbc144328
struct nvme_rdma_queue {
  rsp_ring = 0xffff880fbe81c548,
  sig_count = 60 '<',
  queue_size = 128,
  cmnd_capsule_len = 4160,
  ctrl = 0xffff880fc98eca88,
  device = 0xffff880ff0b304b8,
  ib_cq = 0xffff880fbcac3508,
  qp = 0xffff880fbc1874e8,
  flags = 16,
  cm_id = 0xffff880fbc181548,
  cm_error = 0,
  cm_done = {
    done = 0,
    wait = {
      lock = {
        {
          rlock = {
            raw_lock = {
              val = {
                counter = 0
              }
            }
          }
        }
      },
      task_list = {
        next = 0xffff880fbc144388,
        prev = 0xffff880fbc144388
      }
    }
  },
  hctx1 = 0xffff880fba47afd8,
  hctx2 = 0xffff880fbc1164f8
}
crash> gdb x/1g 0xffff880fbcac3508
0xffff880fbcac3508:     0x6b6b6b6b6b6b6b6b
crash>




More information about the Linux-nvme mailing list