[PATCH] nvmet: pci-epf: Keep completion queues mapped

Wed Mar 12 18:12:06 PDT 2025

Instead of mapping and unmapping the completion queues memory to the
host PCI address space whenever nvmet_pci_epf_cq_work() is called, map
a completion queue to the host PCI address space when the completion
queue is created with nvmet_pci_epf_create_cq() and unmap it when the
completion queue is deleted with nvmet_pci_epf_delete_cq().

This removes the completion queue mapping/unmapping from
nvmet_pci_epf_cq_work() and significantly increases performance. For
a single job 4K random read QD=1 workload, the IOPS is increased from
23 KIOPS to 25 KIOPS. Some significant throughput increasde for high
queue depth and large IOs workloads can also be seen.

Signed-off-by: Damien Le Moal <dlemoal at kernel.org>
---
 drivers/nvme/target/pci-epf.c | 77 ++++++++++++++++++-----------------
 1 file changed, 39 insertions(+), 38 deletions(-)

diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c
index b1e31483f157..6272cd013136 100644
--- a/drivers/nvme/target/pci-epf.c
+++ b/drivers/nvme/target/pci-epf.c
@@ -271,6 +271,36 @@ static inline void nvmet_pci_epf_mem_unmap(struct nvmet_pci_epf *nvme_epf,
 	pci_epc_mem_unmap(epf->epc, epf->func_no, epf->vfunc_no, map);
 }
 
+static int nvmet_pci_epf_map_queue(struct nvmet_pci_epf_ctrl *ctrl,
+				   struct nvmet_pci_epf_queue *queue)
+{
+	struct nvmet_pci_epf *nvme_epf = ctrl->nvme_epf;
+	int ret;
+
+	ret = nvmet_pci_epf_mem_map(nvme_epf, queue->pci_addr,
+				    queue->pci_size, &queue->pci_map);
+	if (ret) {
+		dev_err(ctrl->dev, "Failed to map queue %u (err=%d)\n",
+			queue->qid, ret);
+		return ret;
+	}
+
+	if (queue->pci_map.pci_size < queue->pci_size) {
+		dev_err(ctrl->dev, "Invalid partial mapping of queue %u\n",
+			queue->qid);
+		nvmet_pci_epf_mem_unmap(nvme_epf, &queue->pci_map);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static inline void nvmet_pci_epf_unmap_queue(struct nvmet_pci_epf_ctrl *ctrl,
+					     struct nvmet_pci_epf_queue *queue)
+{
+	nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &queue->pci_map);
+}
+
 struct nvmet_pci_epf_dma_filter {
 	struct device *dev;
 	u32 dma_mask;
@@ -1264,6 +1294,7 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
 	struct nvmet_pci_epf_ctrl *ctrl = tctrl->drvdata;
 	struct nvmet_pci_epf_queue *cq = &ctrl->cq[cqid];
 	u16 status;
+	int ret;
 
 	if (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags))
 		return NVME_SC_QID_INVALID | NVME_STATUS_DNR;
@@ -1298,6 +1329,12 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
 	if (status != NVME_SC_SUCCESS)
 		goto err;
 
+	ret = nvmet_pci_epf_map_queue(ctrl, cq);
+	if (ret) {
+		status = NVME_SC_INTERNAL | NVME_STATUS_DNR;
+		goto err;
+	}
+
 	set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags);
 
 	dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n",
@@ -1322,6 +1359,7 @@ static u16 nvmet_pci_epf_delete_cq(struct nvmet_ctrl *tctrl, u16 cqid)
 	cancel_delayed_work_sync(&cq->work);
 	nvmet_pci_epf_drain_queue(cq);
 	nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
+	nvmet_pci_epf_unmap_queue(ctrl, cq);
 
 	return NVME_SC_SUCCESS;
 }
@@ -1554,36 +1592,6 @@ static void nvmet_pci_epf_free_queues(struct nvmet_pci_epf_ctrl *ctrl)
 	ctrl->cq = NULL;
 }
 
-static int nvmet_pci_epf_map_queue(struct nvmet_pci_epf_ctrl *ctrl,
-				   struct nvmet_pci_epf_queue *queue)
-{
-	struct nvmet_pci_epf *nvme_epf = ctrl->nvme_epf;
-	int ret;
-
-	ret = nvmet_pci_epf_mem_map(nvme_epf, queue->pci_addr,
-				      queue->pci_size, &queue->pci_map);
-	if (ret) {
-		dev_err(ctrl->dev, "Failed to map queue %u (err=%d)\n",
-			queue->qid, ret);
-		return ret;
-	}
-
-	if (queue->pci_map.pci_size < queue->pci_size) {
-		dev_err(ctrl->dev, "Invalid partial mapping of queue %u\n",
-			queue->qid);
-		nvmet_pci_epf_mem_unmap(nvme_epf, &queue->pci_map);
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-static inline void nvmet_pci_epf_unmap_queue(struct nvmet_pci_epf_ctrl *ctrl,
-					     struct nvmet_pci_epf_queue *queue)
-{
-	nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &queue->pci_map);
-}
-
 static void nvmet_pci_epf_exec_iod_work(struct work_struct *work)
 {
 	struct nvmet_pci_epf_iod *iod =
@@ -1747,11 +1755,7 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
 	struct nvme_completion *cqe;
 	struct nvmet_pci_epf_iod *iod;
 	unsigned long flags;
-	int ret, n = 0;
-
-	ret = nvmet_pci_epf_map_queue(ctrl, cq);
-	if (ret)
-		goto again;
+	int ret = 0, n = 0;
 
 	while (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags) && ctrl->link_up) {
 
@@ -1798,8 +1802,6 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
 		n++;
 	}
 
-	nvmet_pci_epf_unmap_queue(ctrl, cq);
-
 	/*
 	 * We do not support precise IRQ coalescing time (100ns units as per
 	 * NVMe specifications). So if we have posted completion entries without
@@ -1808,7 +1810,6 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
 	if (n)
 		nvmet_pci_epf_raise_irq(ctrl, cq, true);
 
-again:
 	if (ret < 0)
 		queue_delayed_work(system_highpri_wq, &cq->work,
 				   NVMET_PCI_EPF_CQ_RETRY_INTERVAL);
-- 
2.48.1