nvmf/rdma host crash during heavy load and keep alive recovery
Steve Wise
swise at opengridcomputing.com
Wed Aug 10 09:00:40 PDT 2016
> Hey guys, I've rebased the nvmf-4.8-rc branch on top of 4.8-rc2 so I have
> the latest/gratest, and continued debugging this crash. I see:
>
> 0) 10 ram disks attached via nvmf/iw_cxgb4, and fio started on all 10
> disks. This node has 8 cores, so that is 80 connections.
> 1) the cxgb4 interface brought down a few seconds later
> 2) kato fires on all connections
> 3) the interface is brought back up 8 seconds after #1
> 4) 10 seconds after #2 all the qps are destroyed
> 5) reconnects start happening
> 6) a blk request is executed and the nvme_rdma_request struct still has a
> pointer to one of the qps destroyed in 3 and whamo...
>
> I'm digging into the request cancel logic. Any ideas/help is greatly
> appreciated...
>
> Thanks,
>
> Steve.
Here is the stack that crashed processing a blk request:
crash> bt
PID: 402 TASK: ffff880397968040 CPU: 0 COMMAND: "kworker/0:1H"
#0 [ffff8803970f7800] machine_kexec at ffffffff8105fc40
#1 [ffff8803970f7870] __crash_kexec at ffffffff81116908
#2 [ffff8803970f7940] crash_kexec at ffffffff811169dd
#3 [ffff8803970f7970] oops_end at ffffffff81032be6
#4 [ffff8803970f79a0] die at ffffffff810330db
#5 [ffff8803970f79d0] do_general_protection at ffffffff81030144
#6 [ffff8803970f7a00] general_protection at ffffffff816e4ca8
[exception RIP: nvme_rdma_post_send+131]
RIP: ffffffffa0414083 RSP: ffff8803970f7ab8 RFLAGS: 00010246
RAX: 6b6b6b6b6b6b6b6b RBX: ffff8802dd923598 RCX: 0000000000000002
RDX: ffff8803970f7ae0 RSI: ffff8803970f7ab8 RDI: ffff8802dd9fc518
RBP: ffff8803970f7af8 R8: ffff8803970f7ab8 R9: 0000000000000000
R10: 0000000000000000 R11: ffff8802dde6ef58 R12: ffff8802dd923598
R13: ffff8802dde6eeb0 R14: ffff880399f4c548 R15: ffff8802dde59db8
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
#7 [ffff8803970f7b00] nvme_rdma_queue_rq at ffffffffa0415c72 [nvme_rdma]
#8 [ffff8803970f7b50] __blk_mq_run_hw_queue at ffffffff81338324
#9 [ffff8803970f7ca0] blk_mq_run_work_fn at ffffffff81338552
#10 [ffff8803970f7cb0] process_one_work at ffffffff810a1593
#11 [ffff8803970f7d90] worker_thread at ffffffff810a222d
#12 [ffff8803970f7ec0] kthread at ffffffff810a6d6c
#13 [ffff8803970f7f50] ret_from_fork at ffffffff816e2cbf
Here is the nvme_rdma_request:
crash> nvme_rdma_request ffff8802dde6eeb0
struct nvme_rdma_request {
mr = 0xffff8802dde5c008,
sqe = {
cqe = {
done = 0xffffffffa0414320 <nvme_rdma_send_done>
},
data = 0xffff8802dde59db8,
dma = 12312747448
},
sge = {{
addr = 12312747448,
length = 64,
lkey = 0
}, {
addr = 12138727424,
length = 2048,
lkey = 0
}},
num_sge = 2,
nents = 1,
inline_data = true,
need_inval = false,
reg_wr = {
wr = {
next = 0x0,
{
wr_id = 0,
wr_cqe = 0x0
},
sg_list = 0x0,
num_sge = 0,
opcode = IB_WR_RDMA_WRITE,
send_flags = 0,
ex = {
imm_data = 0,
invalidate_rkey = 0
}
},
mr = 0x0,
key = 0,
access = 0
},
reg_cqe = {
done = 0x0
},
queue = 0xffff8802dd923598,
sg_table = {
sgl = 0xffff8802dde6ef58,
nents = 1,
orig_nents = 1
},
first_sgl = 0xffff8802dde6ef58
}
And here is the nvme_rdma_queue:
crash> nvme_rdma_queue 0xffff8802dd923598
struct nvme_rdma_queue {
rsp_ring = 0xffff8802dd968008,
sig_count = 200 '\310',
queue_size = 128,
cmnd_capsule_len = 4160,
ctrl = 0xffff8802dbd5d3d8,
device = 0xffff880384ceb5e8,
ib_cq = 0xffff8802dd9d2e68,
qp = 0xffff8802dd9fc518,
flags = 0,
cm_id = 0xffff8802dd9f8008,
cm_error = 0,
cm_done = {
done = 0,
wait = {
lock = {
{
rlock = {
raw_lock = {
val = {
counter = 0
}
}
}
}
},
task_list = {
next = 0xffff8802dd9235f8,
prev = 0xffff8802dd9235f8
}
}
}
}
And see here the ib_qp has been freed:
crash> gdb x/8g 0xffff8802dd9fc518
0xffff8802dd9fc518: 0x6b6b6b6b6b6b6b6b 0x6b6b6b6b6b6b6b6b
0xffff8802dd9fc528: 0x6b6b6b6b6b6b6b6b 0x6b6b6b6b6b6b6b6b
0xffff8802dd9fc538: 0x6b6b6b6b6b6b6b6b 0x6b6b6b6b6b6b6b6b
0xffff8802dd9fc548: 0x6b6b6b6b6b6b6b6b 0x6b6b6b6b6b6b6b6b
More information about the Linux-nvme
mailing list