[PATCH RFC V2 5/7] nvme-tcp: use the cancel command to perform an abort if target supports it

Thu Sep 12 01:15:08 PDT 2024

If available, use Cancel command to abort the command that timed
out instead of resetting the controller.
Limit the number of outstanding cancel commands
to MAX_RSV_CANCEL_MAX per queue;
If the number of reserved commands runs low, the driver will use a
Cancel command with action flag set to "Multiple commands" to abort all
the commands on the specified queue as a last resort to avoid a
controller reset.

If the cancel command is not supported, if any error is encountered or
if the pool of reserved commands is exhausted,
the driver will fall back to the normal controller reset.

Signed-off-by: Maurizio Lombardi <mlombard at redhat.com>
---
 drivers/nvme/host/tcp.c | 58 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 53 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index a2a47d3ab99f..c4f04b451854 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -121,6 +121,7 @@ struct nvme_tcp_request {
 	size_t			offset;
 	size_t			data_sent;
 	enum nvme_tcp_send_state state;
+	bool			aborted;
 };
 
 enum nvme_tcp_queue_flags {
@@ -172,6 +173,7 @@ struct nvme_tcp_queue {
 	struct completion       tls_complete;
 	int                     tls_err;
 	struct page_frag_cache	pf_cache;
+	atomic_t		cancel_free_cmds;
 
 	void (*state_change)(struct sock *);
 	void (*data_ready)(struct sock *);
@@ -202,6 +204,7 @@ static struct workqueue_struct *nvme_tcp_wq;
 static const struct blk_mq_ops nvme_tcp_mq_ops;
 static const struct blk_mq_ops nvme_tcp_admin_mq_ops;
 static int nvme_tcp_try_send(struct nvme_tcp_queue *queue);
+static void nvme_tcp_complete_rq(struct request *rq);
 
 static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl)
 {
@@ -600,7 +603,7 @@ static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
 		req->status = cqe->status;
 
 	if (!nvme_try_complete_req(rq, req->status, cqe->result))
-		nvme_complete_rq(rq);
+		nvme_tcp_complete_rq(rq);
 	queue->nr_cqe++;
 
 	return 0;
@@ -800,7 +803,7 @@ static inline void nvme_tcp_end_request(struct request *rq, u16 status)
 	union nvme_result res = {};
 
 	if (!nvme_try_complete_req(rq, cpu_to_le16(status << 1), res))
-		nvme_complete_rq(rq);
+		nvme_tcp_complete_rq(rq);
 }
 
 static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
@@ -1658,6 +1661,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
 	mutex_init(&queue->send_mutex);
 	INIT_WORK(&queue->io_work, nvme_tcp_io_work);
 
+	atomic_set(&queue->cancel_free_cmds, NVME_RSV_CANCEL_MAX);
+
 	if (qid > 0)
 		queue->cmnd_capsule_len = nctrl->ioccsz * 16;
 	else
@@ -2444,10 +2449,13 @@ static void nvme_tcp_complete_timed_out(struct request *rq)
 static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq)
 {
 	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
-	struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
 	struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
 	struct nvme_command *cmd = &pdu->cmd;
-	int qid = nvme_tcp_queue_id(req->queue);
+	struct nvme_tcp_queue *queue = req->queue;
+	struct nvme_ctrl *ctrl = &queue->ctrl->ctrl;
+	int qid = nvme_tcp_queue_id(queue);
+	int error, action;
+	int cancel_free_cmds;
 
 	dev_warn(ctrl->device,
 		 "I/O tag %d (%04x) type %d opcode %#x (%s) QID %d timeout\n",
@@ -2472,10 +2480,34 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq)
 		return BLK_EH_DONE;
 	}
 
+	if (!req->aborted) {
+		if (!nvme_io_command_supported(ctrl, nvme_cmd_cancel))
+			goto err_recovery;
+
+		cancel_free_cmds = atomic_dec_return(&queue->cancel_free_cmds);
+		if (cancel_free_cmds < 0) {
+			/* No free reserved commands */
+			goto err_recovery;
+		}
+
+		if (cancel_free_cmds > 0)
+			action = NVME_CANCEL_ACTION_SINGLE_CMD;
+		else
+			action = NVME_CANCEL_ACTION_MUL_CMD;
+
+		error = nvme_submit_cancel_req(ctrl, rq, qid, action);
+		if (error)
+			goto err_recovery;
+
+		req->aborted = true;
+		return BLK_EH_RESET_TIMER;
+	}
+
 	/*
 	 * LIVE state should trigger the normal error recovery which will
 	 * handle completing this request.
 	 */
+err_recovery:
 	nvme_tcp_error_recovery(ctrl);
 	return BLK_EH_RESET_TIMER;
 }
@@ -2520,6 +2552,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
 	req->pdu_len = 0;
 	req->pdu_sent = 0;
 	req->h2cdata_left = 0;
+	req->aborted = false;
 	req->data_len = blk_rq_nr_phys_segments(rq) ?
 				blk_rq_payload_bytes(rq) : 0;
 	req->curr_bio = rq->bio;
@@ -2634,10 +2667,25 @@ static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
 	return len;
 }
 
+static void nvme_tcp_complete_rq(struct request *rq)
+{
+	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
+	struct nvme_tcp_queue *queue = req->queue;
+	struct nvme_command *cmd = req->req.cmd;
+	int c;
+
+	if (cmd && cmd->common.opcode == nvme_cmd_cancel) {
+		c = atomic_inc_return(&queue->cancel_free_cmds);
+		WARN_ON_ONCE(c > NVME_RSV_CANCEL_MAX);
+	}
+
+	nvme_complete_rq(rq);
+}
+
 static const struct blk_mq_ops nvme_tcp_mq_ops = {
 	.queue_rq	= nvme_tcp_queue_rq,
 	.commit_rqs	= nvme_tcp_commit_rqs,
-	.complete	= nvme_complete_rq,
+	.complete	= nvme_tcp_complete_rq,
 	.init_request	= nvme_tcp_init_request,
 	.exit_request	= nvme_tcp_exit_request,
 	.init_hctx	= nvme_tcp_init_hctx,
-- 
2.43.5