[PATCH V2 1/2] nvme: support fused pci nvme requests

clay.mayers at kioxia.com clay.mayers at kioxia.com
Mon Jan 25 14:58:43 EST 2021


From: Clay Mayers <clay.mayers at kioxia.com>

Adds support for fused nvme commands to be tunneled through a blk_mq
queue and submitted atomically to a pci nvme device queue.

In nvme_queue_rq(), when an nvme cmnd has the first fused flag set,
the nvme cmnd is saved in nvme_request.fctx and the command is not
queued to the device.  Once the nvme cmnd with the second fused flag
set is queued, nvme_request.nrq is used to get back to the first
fused request so both cmnds can be queued to the device atomically.

v2:
Reduced size of nvme_request by pointing to saved cmnd instead so
only fused commands require the extra 64 bytes.  The saved cmnd is
now with the first request to ease clean up.  Flipped second req
to point to the first so a union with the saved cmnd pointer can
be used to limit the fused impact to 8 bytes.

Fixed issue with aborted first fused cmnd being submitted by
unaborted second fused cmnd.

Signed-off-by: Clay Mayers <clay.mayers at kioxia.com>
---
 drivers/nvme/host/nvme.h | 12 ++++++
 drivers/nvme/host/pci.c  | 85 ++++++++++++++++++++++++++++++++++++++--
 2 files changed, 94 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 567f7ad18a91..b41ce7cd4f49 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -157,6 +157,18 @@ struct nvme_request {
 	u8			flags;
 	u16			status;
 	struct nvme_ctrl	*ctrl;
+	union {
+		struct nvme_request *nrq;	/* other fused request */
+		/*
+		 * Between the time the first fused is queued with nvme_queue_rq
+		 * and the second command is queued, the first fused command
+		 * uses fctx instead of nrq.
+		 */
+		struct nvme_fused_ctx {
+			struct nvme_request *nrq2;	/* copy of nrq */
+			struct nvme_command cmnd;	/* copy of 1st fused */
+		} *fctx;
+	};
 };
 
 /*
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3be352403839..ba4798685811 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -491,6 +491,30 @@ static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
 	nvmeq->last_sq_tail = nvmeq->sq_tail;
 }
 
+/**
+ * nvme_submit_cmd2() - Copy fused commands into a queue and ring the doorbell
+ * @nvmeq: The queue to use
+ * @cmd: The first command to send
+ * @cmd2: the second command to send
+ * @write_sq: whether to write to the SQ doorbell
+ */
+static void nvme_submit_cmd2(struct nvme_queue *nvmeq, struct nvme_command *cmd,
+			     struct nvme_command *cmd2, bool write_sq)
+{
+	spin_lock(&nvmeq->sq_lock);
+	memcpy(nvmeq->sq_cmds + (nvmeq->sq_tail << nvmeq->sqes),
+		cmd, sizeof(*cmd));
+	if (++nvmeq->sq_tail == nvmeq->q_depth)
+		nvmeq->sq_tail = 0;
+	memcpy(nvmeq->sq_cmds + (nvmeq->sq_tail << nvmeq->sqes),
+		cmd2, sizeof(*cmd2));
+	if (++nvmeq->sq_tail == nvmeq->q_depth)
+		nvmeq->sq_tail = 0;
+	nvme_write_sq_db(nvmeq, write_sq);
+	spin_unlock(&nvmeq->sq_lock);
+}
+
+
 /**
  * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
  * @nvmeq: The queue to use
@@ -876,6 +900,37 @@ static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
 	return BLK_STS_OK;
 }
 
+static blk_status_t nvme_queue_frq(struct nvme_queue *nvmeq,
+			struct request *req, struct nvme_command *cmnd,
+			bool write_sq)
+{
+	struct nvme_fused_ctx *fctx;
+
+	if (cmnd->common.flags & NVME_CMD_FUSE_FIRST) {
+		/* Save cmnd to submit with 2nd fused */
+		fctx = kmalloc(sizeof(*fctx), GFP_KERNEL);
+		if (!fctx)
+			return BLK_STS_RESOURCE;
+		fctx->nrq2 = nvme_req(req)->nrq;
+		memcpy(&fctx->cmnd, cmnd, sizeof(*cmnd));
+		nvme_req(req)->fctx = fctx;
+		blk_mq_start_request(req);
+		return BLK_STS_OK;
+	}
+	/* handle NVME_CMD_FUSED_SECOND */
+	if (!nvme_req(req)->nrq) {
+		nvme_req(req)->status = NVME_SC_FUSED_FAIL;
+		return BLK_STS_IOERR;	/* First i/o has been canceled */
+	}
+
+	fctx = nvme_req(req)->nrq->fctx;
+	nvme_req(req)->nrq->fctx = NULL;
+	blk_mq_start_request(req);
+	nvme_submit_cmd2(nvmeq, &fctx->cmnd, cmnd, write_sq);
+	kfree(fctx);
+	return BLK_STS_OK;
+}
+
 /*
  * NOTE: ns is NULL when called on the admin queue.
  */
@@ -889,6 +944,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct nvme_command cmnd;
 	blk_status_t ret;
+	int fused;
 
 	iod->aborted = 0;
 	iod->npages = -1;
@@ -917,8 +973,15 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 			goto out_unmap_data;
 	}
 
-	blk_mq_start_request(req);
-	nvme_submit_cmd(nvmeq, &cmnd, bd->last);
+	fused = cmnd.common.flags & (NVME_CMD_FUSE_FIRST|NVME_CMD_FUSE_SECOND);
+	if (likely(!fused)) {
+		blk_mq_start_request(req);
+		nvme_submit_cmd(nvmeq, &cmnd, bd->last);
+	} else {
+		ret = nvme_queue_frq(nvmeq, req, &cmnd, bd->last);
+		if (ret)
+			goto out_unmap_data;
+	}
 	return BLK_STS_OK;
 out_unmap_data:
 	nvme_unmap_data(dev, req);
@@ -2423,6 +2486,22 @@ static void nvme_pci_disable(struct nvme_dev *dev)
 	}
 }
 
+static bool nvme_pci_cancel_rq(struct request *req, void *data, bool reserved)
+{
+	if (unlikely(nvme_req(req)->cmd->common.flags & NVME_CMD_FUSE_FIRST)) {
+		struct nvme_fused_ctx *fctx = nvme_req(req)->fctx;
+
+		/* this will only be set if 2nd fused isn't queued yet */
+		if (unlikely(fctx)) {
+			fctx->nrq2->nrq = NULL;	/* break link of 2nd fused */
+			nvme_req(req)->fctx = NULL;
+			kfree(fctx);
+		}
+	}
+	return nvme_cancel_request(req, data, reserved);
+}
+
+
 static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 {
 	bool dead = true, freeze = false;
@@ -2459,7 +2538,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 	nvme_pci_disable(dev);
 	nvme_reap_pending_cqes(dev);
 
-	blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
+	blk_mq_tagset_busy_iter(&dev->tagset, nvme_pci_cancel_rq, &dev->ctrl);
 	blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
 	blk_mq_tagset_wait_completed_request(&dev->tagset);
 	blk_mq_tagset_wait_completed_request(&dev->admin_tagset);
-- 
2.27.0




More information about the Linux-nvme mailing list