[PATCH 7/7] nvme-tcp-offload: Add IO level implementation

Thu Nov 19 09:21:07 EST 2020

From: Dean Balandin <dbalandin at marvell.com>

In this patch, we present the IO level functionality.
The nvme-tcp-offload shall work on the IO-level, meaning the nvme-tcp-offload ULP module shall pass the request to the nvme-tcp-offload vendor driver and shall expect for the request compilation.
No additional handling is needed in between, this design will reduce the CPU utilization as we will describe below.

The nvme-tcp-offload vendor driver shall register to nvme-tcp-offload ULP with the following IO-path ops:
 - init_req
 - map_sg - in order to map the request sg (similar to nvme_rdma_map_data).
 - send_req - in order to pass the request to the handling of the offload driver that shall pass it to the vendor specific device.

The vendor driver will manage the context from which the request will be executed and the request aggregations.
Once the IO completed, the nvme-tcp-offload vendor driver shall call command.done() that shall invoke the nvme-tcp-offload ULP layer for completing the request.

Signed-off-by: Dean Balandin <dbalandin at marvell.com>
Signed-off-by: Shai Malin <smalin at marvell.com>
Signed-off-by: Ariel Elior <aelior at marvell.com>
Signed-off-by: Michal Kalderon <mkalderon at marvell.com>
---
 drivers/nvme/host/tcp-offload.c | 67 ++++++++++++++++++++++++++++++---
 1 file changed, 62 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/tcp-offload.c b/drivers/nvme/host/tcp-offload.c
index baf38526ccb9..6163f8360072 100644
--- a/drivers/nvme/host/tcp-offload.c
+++ b/drivers/nvme/host/tcp-offload.c
@@ -114,7 +114,10 @@ nvme_tcp_ofld_req_done(struct nvme_tcp_ofld_req *req,
 		       union nvme_result *result,
 		       __le16 status)
 {
-	/* Placeholder - complete request with/without error */
+	struct request *rq = blk_mq_rq_from_pdu(req);
+
+	if (!nvme_end_request(rq, cpu_to_le16(status << 1), *result))
+		nvme_complete_rq(rq);
 }
 
 struct nvme_tcp_ofld_dev *
@@ -722,8 +725,10 @@ nvme_tcp_ofld_init_request(struct blk_mq_tag_set *set,
 {
 	struct nvme_tcp_ofld_req *req = blk_mq_rq_to_pdu(rq);
 	struct nvme_tcp_ofld_ctrl *ctrl = set->driver_data;
+	int qid = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
 
-	/* Placeholder - init request */
+	req->queue = &ctrl->queues[qid];
+	nvme_req(rq)->ctrl = &ctrl->nctrl;
 	req->done = nvme_tcp_ofld_req_done;
 	ctrl->dev->ops->init_req(req);
 
@@ -736,11 +741,25 @@ nvme_tcp_ofld_queue_rq(struct blk_mq_hw_ctx *hctx,
 {
 	struct nvme_tcp_ofld_req *req = blk_mq_rq_to_pdu(bd->rq);
 	struct nvme_tcp_ofld_queue *queue = hctx->driver_data;
-	struct nvme_tcp_ofld_ops *ops = queue->dev->ops;
+	struct nvme_tcp_ofld_ctrl *ctrl = queue->ctrl;
+	struct nvme_ns *ns = hctx->queue->queuedata;
+	struct nvme_tcp_ofld_dev *dev = queue->dev;
+	struct nvme_tcp_ofld_ops *ops = dev->ops;
+	struct request *rq = bd->rq;
+	bool queue_ready;
+	int rc;
 
-	/* Call nvme_setup_cmd(...) */
+	queue_ready = test_bit(NVME_TCP_OFLD_Q_LIVE, &queue->flags);
+	if (!nvmf_check_ready(&ctrl->nctrl, rq, queue_ready))
+		return nvmf_fail_nonready_command(&ctrl->nctrl, rq);
 
-	/* Call ops->map_sg(...) */
+	rc = nvme_setup_cmd(ns, rq, &req->nvme_cmd);
+	if (rc)
+		return rc;
+
+	blk_mq_start_request(rq);
+	ops->map_sg(dev, req);
+	ops->send_req(req);
 
 	return BLK_STS_OK;
 }
@@ -815,6 +834,42 @@ static int nvme_tcp_ofld_poll(struct blk_mq_hw_ctx *hctx)
 	return 0;
 }
 
+static enum blk_eh_timer_return
+nvme_tcp_ofld_timeout(struct request *rq, bool reserved)
+{
+	struct nvme_tcp_ofld_req *req = blk_mq_rq_to_pdu(rq);
+	struct nvme_tcp_ofld_ctrl *ctrl = req->queue->ctrl;
+
+	/* Restart the timer if a controller reset is already scheduled. Any
+	 * timed out request would be handled before entering the connecting
+	 * state.
+	 */
+	if (ctrl->nctrl.state == NVME_CTRL_RESETTING)
+		return BLK_EH_RESET_TIMER;
+
+	dev_warn(ctrl->nctrl.device,
+		"queue %d: timeout request %#x type %d\n",
+		nvme_tcp_ofld_qid(req->queue), rq->tag,
+		req->nvme_cmd.common.opcode);
+
+	if (ctrl->nctrl.state != NVME_CTRL_LIVE) {
+		/*
+		 * Teardown immediately if controller times out while starting
+		 * or we are already started error recovery. all outstanding
+		 * requests are completed on shutdown, so we return BLK_EH_DONE.
+		 */
+		flush_work(&ctrl->err_work);
+		nvme_tcp_ofld_teardown_io_queues(&ctrl->nctrl, false);
+		nvme_tcp_ofld_teardown_admin_queue(&ctrl->nctrl, false);
+		return BLK_EH_DONE;
+	}
+
+	dev_warn(ctrl->nctrl.device, "starting error recovery\n");
+	nvme_tcp_ofld_error_recovery(&ctrl->nctrl);
+
+	return BLK_EH_RESET_TIMER;
+}
+
 static struct blk_mq_ops nvme_tcp_ofld_mq_ops = {
 	.queue_rq	= nvme_tcp_ofld_queue_rq,
 	.init_request	= nvme_tcp_ofld_init_request,
@@ -822,6 +877,7 @@ static struct blk_mq_ops nvme_tcp_ofld_mq_ops = {
 	.exit_request	= nvme_tcp_ofld_exit_request,
 	.init_hctx	= nvme_tcp_ofld_init_hctx,
 	.map_queues	= nvme_tcp_ofld_map_queues,
+	.timeout	= nvme_tcp_ofld_timeout,
 	.poll		= nvme_tcp_ofld_poll,
 };
 
@@ -831,6 +887,7 @@ static struct blk_mq_ops nvme_tcp_ofld_admin_mq_ops = {
 	.complete	= nvme_complete_rq,
 	.exit_request	= nvme_tcp_ofld_exit_request,
 	.init_hctx	= nvme_tcp_ofld_init_hctx,
+	.timeout	= nvme_tcp_ofld_timeout,
 };
 
 static const struct nvme_ctrl_ops nvme_tcp_ofld_ctrl_ops = {
-- 
2.22.0