[PATCH] nvme-rdma: fix deadlock when delete ctrl due to reconnect fail
Sagi Grimberg
sagi at grimberg.me
Mon Jul 27 19:31:58 EDT 2020
>> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
>> index f8f856dc0c67..b381e2cde50a 100644
>> --- a/drivers/nvme/host/rdma.c
>> +++ b/drivers/nvme/host/rdma.c
>> @@ -989,8 +989,7 @@ static void nvme_rdma_teardown_io_queues(struct
>> nvme_rdma_ctrl *ctrl,
>> nvme_cancel_request, &ctrl->ctrl);
>> blk_mq_tagset_wait_completed_request(ctrl->ctrl.tagset);
>> }
>> - if (remove)
>> - nvme_start_queues(&ctrl->ctrl);
>> + nvme_start_queues(&ctrl->ctrl);
>
> This will fail I/O during controller reset, so nak on this.
Can you try this:
--
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index d58231636d11..96c0d664fe9b 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1149,6 +1149,11 @@ static void nvme_rdma_reconnect_ctrl_work(struct
work_struct *work)
return;
requeue:
+ /*
+ * make sure queues are not quiesced due to a reconnect
+ * sequence that failed after creating some I/O queues
+ */
+ nvme_start_queues(ctrl);
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
ctrl->ctrl.nr_reconnects);
nvme_rdma_reconnect_or_remove(ctrl);
--
More information about the Linux-nvme
mailing list