[PATCH RFC v3 05/11] nvme-rdma: use the cancel command to perform an abort if target supports it

Maurizio Lombardi mlombard at redhat.com
Mon Mar 24 03:23:04 PDT 2025


If available, use Cancel command to abort the command that timed
out instead of resetting the controller.

Limit the number of outstanding cancel commands
to a maximum of 2 per queue;
If more than one command time out at the same time, the driver will use a
Cancel command with action flag set to "Multiple commands" to abort all
the commands on the specified queue as a last resort to avoid a
controller reset.

If the cancel command is not supported or if any error is encountered,
the driver will fall back to the normal controller reset.

Signed-off-by: Maurizio Lombardi <mlombard at redhat.com>
---
 drivers/nvme/host/rdma.c | 50 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index b5a0295b5bf4..3de37d116e4a 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -74,12 +74,15 @@ struct nvme_rdma_request {
 	struct nvme_rdma_sgl	data_sgl;
 	struct nvme_rdma_sgl	*metadata_sgl;
 	bool			use_sig_mr;
+	bool			aborted;
 };
 
 enum nvme_rdma_queue_flags {
 	NVME_RDMA_Q_ALLOCATED		= 0,
 	NVME_RDMA_Q_LIVE		= 1,
 	NVME_RDMA_Q_TR_READY		= 2,
+	NVME_RDMA_Q_CANCEL_ONE		= 3,
+	NVME_RDMA_Q_CANCEL_ALL		= 4,
 };
 
 struct nvme_rdma_queue {
@@ -619,6 +622,8 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
 	}
 
 	set_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags);
+	clear_bit(NVME_RDMA_Q_CANCEL_ONE, &queue->flags);
+	clear_bit(NVME_RDMA_Q_CANCEL_ALL, &queue->flags);
 
 	return 0;
 
@@ -1954,16 +1959,18 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq)
 {
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
 	struct nvme_rdma_queue *queue = req->queue;
-	struct nvme_rdma_ctrl *ctrl = queue->ctrl;
+	struct nvme_rdma_ctrl *rdma_ctrl = queue->ctrl;
+	struct nvme_ctrl *ctrl = &rdma_ctrl->ctrl;
 	struct nvme_command *cmd = req->req.cmd;
 	int qid = nvme_rdma_queue_idx(queue);
+	int error, action;
 
-	dev_warn(ctrl->ctrl.device,
+	dev_warn(ctrl->device,
 		 "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout\n",
 		 rq->tag, nvme_cid(rq), cmd->common.opcode,
 		 nvme_fabrics_opcode_str(qid, cmd), qid);
 
-	if (nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_LIVE) {
+	if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) {
 		/*
 		 * If we are resetting, connecting or deleting we should
 		 * complete immediately because we may block controller
@@ -1981,11 +1988,40 @@ static enum blk_eh_timer_return nvme_rdma_timeout(struct request *rq)
 		return BLK_EH_DONE;
 	}
 
+	if (!req->aborted) {
+		if (!nvme_io_command_supported(ctrl, nvme_cmd_cancel) || !qid)
+			goto err_recovery;
+
+		if (!test_and_set_bit(NVME_RDMA_Q_CANCEL_ONE, &queue->flags)) {
+			action = NVME_CANCEL_ACTION_SINGLE_CMD;
+		} else if (!test_and_set_bit(NVME_RDMA_Q_CANCEL_ALL,
+						&queue->flags)) {
+			action = NVME_CANCEL_ACTION_MUL_CMD;
+		} else {
+			/* No free reserved commands.
+			 * this means a "multiple commands" cancel
+			 * is currently under execution and this request
+			 * is likely to be canceled. Mark this
+			 * request as aborted and reset the timer.
+			 */
+			goto abort;
+		}
+
+		error = nvme_submit_cancel_req(ctrl, rq, qid, action);
+		if (error)
+			goto err_recovery;
+
+abort:
+		req->aborted = true;
+		return BLK_EH_RESET_TIMER;
+	}
+
 	/*
 	 * LIVE state should trigger the normal error recovery which will
 	 * handle completing this request.
 	 */
-	nvme_rdma_error_recovery(ctrl);
+err_recovery:
+	nvme_rdma_error_recovery(rdma_ctrl);
 	return BLK_EH_RESET_TIMER;
 }
 
@@ -2009,6 +2045,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
 		return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);
 
 	dev = queue->device->dev;
+	req->aborted = false;
 
 	req->sqe.dma = ib_dma_map_single(dev, req->sqe.data,
 					 sizeof(struct nvme_command),
@@ -2113,6 +2150,7 @@ static void nvme_rdma_complete_rq(struct request *rq)
 	struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
 	struct nvme_rdma_queue *queue = req->queue;
 	struct ib_device *ibdev = queue->device->dev;
+	bool is_cancel = nvme_is_cancel(req->req.cmd);
 
 	if (req->use_sig_mr)
 		nvme_rdma_check_pi_status(req);
@@ -2121,6 +2159,10 @@ static void nvme_rdma_complete_rq(struct request *rq)
 	ib_dma_unmap_single(ibdev, req->sqe.dma, sizeof(struct nvme_command),
 			    DMA_TO_DEVICE);
 	nvme_complete_rq(rq);
+	if (is_cancel) {
+		if (!test_and_clear_bit(NVME_RDMA_Q_CANCEL_ALL, &queue->flags))
+			clear_bit(NVME_RDMA_Q_CANCEL_ONE, &queue->flags);
+	}
 }
 
 static void nvme_rdma_map_queues(struct blk_mq_tag_set *set)
-- 
2.43.5




More information about the Linux-nvme mailing list