nvmf/rdma host crash during heavy load and keep alive recovery

Steve Wise swise at opengridcomputing.com
Wed Aug 10 09:00:40 PDT 2016


> Hey guys, I've rebased the nvmf-4.8-rc branch on top of 4.8-rc2 so I have
> the latest/gratest, and continued debugging this crash.  I see:
> 
> 0) 10 ram disks attached via nvmf/iw_cxgb4, and fio started on all 10
> disks.  This node has 8 cores, so that is 80 connections.
> 1) the cxgb4 interface brought down a few seconds later
> 2) kato fires on all connections
> 3) the interface is brought back up 8 seconds after #1
> 4) 10 seconds after #2 all the qps are destroyed
> 5) reconnects start happening
> 6) a blk request is executed and the nvme_rdma_request struct still has a
> pointer to one of the qps destroyed in 3 and whamo...
> 
> I'm digging into the request cancel logic.  Any ideas/help is greatly
> appreciated...
> 
> Thanks,
> 
> Steve.

Here is the stack that crashed processing a blk request:

crash> bt
PID: 402    TASK: ffff880397968040  CPU: 0   COMMAND: "kworker/0:1H"
 #0 [ffff8803970f7800] machine_kexec at ffffffff8105fc40
 #1 [ffff8803970f7870] __crash_kexec at ffffffff81116908
 #2 [ffff8803970f7940] crash_kexec at ffffffff811169dd
 #3 [ffff8803970f7970] oops_end at ffffffff81032be6
 #4 [ffff8803970f79a0] die at ffffffff810330db
 #5 [ffff8803970f79d0] do_general_protection at ffffffff81030144
 #6 [ffff8803970f7a00] general_protection at ffffffff816e4ca8
    [exception RIP: nvme_rdma_post_send+131]
    RIP: ffffffffa0414083  RSP: ffff8803970f7ab8  RFLAGS: 00010246
    RAX: 6b6b6b6b6b6b6b6b  RBX: ffff8802dd923598  RCX: 0000000000000002
    RDX: ffff8803970f7ae0  RSI: ffff8803970f7ab8  RDI: ffff8802dd9fc518
    RBP: ffff8803970f7af8   R8: ffff8803970f7ab8   R9: 0000000000000000
    R10: 0000000000000000  R11: ffff8802dde6ef58  R12: ffff8802dd923598
    R13: ffff8802dde6eeb0  R14: ffff880399f4c548  R15: ffff8802dde59db8
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
 #7 [ffff8803970f7b00] nvme_rdma_queue_rq at ffffffffa0415c72 [nvme_rdma]
 #8 [ffff8803970f7b50] __blk_mq_run_hw_queue at ffffffff81338324
 #9 [ffff8803970f7ca0] blk_mq_run_work_fn at ffffffff81338552
#10 [ffff8803970f7cb0] process_one_work at ffffffff810a1593
#11 [ffff8803970f7d90] worker_thread at ffffffff810a222d
#12 [ffff8803970f7ec0] kthread at ffffffff810a6d6c
#13 [ffff8803970f7f50] ret_from_fork at ffffffff816e2cbf

Here is the nvme_rdma_request:

crash> nvme_rdma_request ffff8802dde6eeb0
struct nvme_rdma_request {
  mr = 0xffff8802dde5c008,
  sqe = {
    cqe = {
      done = 0xffffffffa0414320 <nvme_rdma_send_done>
    },
    data = 0xffff8802dde59db8,
    dma = 12312747448
  },
  sge = {{
      addr = 12312747448,
      length = 64,
      lkey = 0
    }, {
      addr = 12138727424,
      length = 2048,
      lkey = 0
    }},
  num_sge = 2,
  nents = 1,
  inline_data = true,
  need_inval = false,
  reg_wr = {
    wr = {
      next = 0x0,
      {
        wr_id = 0,
        wr_cqe = 0x0
      },
      sg_list = 0x0,
      num_sge = 0,
      opcode = IB_WR_RDMA_WRITE,
      send_flags = 0,
      ex = {
        imm_data = 0,
        invalidate_rkey = 0
      }
    },
    mr = 0x0,
    key = 0,
    access = 0
  },
  reg_cqe = {
    done = 0x0
  },
  queue = 0xffff8802dd923598,
  sg_table = {
    sgl = 0xffff8802dde6ef58,
    nents = 1,
    orig_nents = 1
  },
  first_sgl = 0xffff8802dde6ef58
}

And here is the nvme_rdma_queue:

crash> nvme_rdma_queue 0xffff8802dd923598
struct nvme_rdma_queue {
  rsp_ring = 0xffff8802dd968008,
  sig_count = 200 '\310',
  queue_size = 128,
  cmnd_capsule_len = 4160,
  ctrl = 0xffff8802dbd5d3d8,
  device = 0xffff880384ceb5e8,
  ib_cq = 0xffff8802dd9d2e68,
  qp = 0xffff8802dd9fc518,
  flags = 0,
  cm_id = 0xffff8802dd9f8008,
  cm_error = 0,
  cm_done = {
    done = 0,
    wait = {
      lock = {
        {
          rlock = {
            raw_lock = {
              val = {
                counter = 0
              }
            }
          }
        }
      },
      task_list = {
        next = 0xffff8802dd9235f8,
        prev = 0xffff8802dd9235f8
      }
    }
  }
}

And see here the ib_qp has been freed:

crash> gdb x/8g 0xffff8802dd9fc518
0xffff8802dd9fc518:     0x6b6b6b6b6b6b6b6b      0x6b6b6b6b6b6b6b6b
0xffff8802dd9fc528:     0x6b6b6b6b6b6b6b6b      0x6b6b6b6b6b6b6b6b
0xffff8802dd9fc538:     0x6b6b6b6b6b6b6b6b      0x6b6b6b6b6b6b6b6b
0xffff8802dd9fc548:     0x6b6b6b6b6b6b6b6b      0x6b6b6b6b6b6b6b6b




More information about the Linux-nvme mailing list