[PATCHv5 8/8] nvme-pci: convert metadata mapping to dma iter
Keith Busch
kbusch at meta.com
Fri Aug 8 08:58:26 PDT 2025
From: Keith Busch <kbusch at kernel.org>
Aligns data and metadata to the similar dma mapping scheme and removes
one more user of the scatter-gather dma mapping.
Signed-off-by: Keith Busch <kbusch at kernel.org>
---
drivers/nvme/host/pci.c | 159 +++++++++++++++++++++-------------------
1 file changed, 82 insertions(+), 77 deletions(-)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index decb3ad1508a7..ab9d37d0e05dd 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -172,9 +172,7 @@ struct nvme_dev {
u32 last_ps;
bool hmb;
struct sg_table *hmb_sgt;
-
mempool_t *dmavec_mempool;
- mempool_t *iod_meta_mempool;
/* shadow doorbell buffer support: */
__le32 *dbbuf_dbs;
@@ -264,6 +262,12 @@ enum nvme_iod_flags {
/* DMA mapped with PCI_P2PDMA_MAP_BUS_ADDR */
IOD_P2P_BUS_ADDR = 1U << 3,
+
+ /* Metadata DMA mapped with PCI_P2PDMA_MAP_BUS_ADDR */
+ IOD_META_P2P_BUS_ADDR = 1U << 4,
+
+ /* Metadata using non-coalesced MPTR */
+ IOD_META_MPTR = 1U << 5,
};
struct nvme_dma_vec {
@@ -281,13 +285,14 @@ struct nvme_iod {
u8 nr_descriptors;
unsigned int total_len;
+ unsigned int meta_total_len;
struct dma_iova_state dma_state;
+ struct dma_iova_state meta_dma_state;
void *descriptors[NVME_MAX_NR_DESCRIPTORS];
struct nvme_dma_vec *dma_vecs;
unsigned int nr_dma_vecs;
dma_addr_t meta_dma;
- struct sg_table meta_sgt;
struct nvme_sgl_desc *meta_descriptor;
};
@@ -644,6 +649,11 @@ static inline struct dma_pool *nvme_dma_pool(struct nvme_queue *nvmeq,
return nvmeq->descriptor_pools.large;
}
+static inline bool nvme_pci_cmd_use_meta_sgl(struct nvme_command *cmd)
+{
+ return (cmd->common.flags & NVME_CMD_SGL_ALL) == NVME_CMD_SGL_METASEG;
+}
+
static inline bool nvme_pci_cmd_use_sgl(struct nvme_command *cmd)
{
return cmd->common.flags &
@@ -711,6 +721,43 @@ static void __nvme_free_sgls(struct device *dma_dev, struct nvme_sgl_desc *sge,
le32_to_cpu(sg_list[i].length), dir);
}
+static void nvme_free_meta_sgls(struct nvme_iod *iod, struct device *dma_dev,
+ enum dma_data_direction dir)
+{
+ struct nvme_sgl_desc *sge = iod->meta_descriptor;
+
+ __nvme_free_sgls(dma_dev, sge, &sge[1], dir);
+}
+
+static void nvme_unmap_metadata(struct request *req)
+{
+ struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
+ enum dma_data_direction dir = rq_dma_dir(req);
+ struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ struct device *dma_dev = nvmeq->dev->dev;
+
+ if (iod->flags & IOD_META_MPTR) {
+ dma_unmap_page(dma_dev, iod->meta_dma,
+ rq_integrity_vec(req).bv_len,
+ rq_dma_dir(req));
+ return;
+ }
+
+ if (!blk_rq_dma_unmap(req, dma_dev, &iod->meta_dma_state,
+ iod->meta_total_len,
+ iod->flags & IOD_META_P2P_BUS_ADDR)) {
+ if (nvme_pci_cmd_use_meta_sgl(&iod->cmd))
+ nvme_free_meta_sgls(iod, dma_dev, dir);
+ else
+ dma_unmap_page(dma_dev, iod->meta_dma,
+ iod->meta_total_len, dir);
+ }
+
+ if (iod->meta_descriptor)
+ dma_pool_free(nvmeq->descriptor_pools.small,
+ iod->meta_descriptor, iod->meta_dma);
+}
+
static void nvme_free_sgls(struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -1023,70 +1070,59 @@ static blk_status_t nvme_map_data(struct request *req)
return nvme_pci_setup_data_prp(req, &iter);
}
-static void nvme_pci_sgl_set_data_sg(struct nvme_sgl_desc *sge,
- struct scatterlist *sg)
-{
- sge->addr = cpu_to_le64(sg_dma_address(sg));
- sge->length = cpu_to_le32(sg_dma_len(sg));
- sge->type = NVME_SGL_FMT_DATA_DESC << 4;
-}
-
static blk_status_t nvme_pci_setup_meta_sgls(struct request *req)
{
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
- struct nvme_dev *dev = nvmeq->dev;
+ unsigned int entries = req->nr_integrity_segments;
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ struct nvme_dev *dev = nvmeq->dev;
struct nvme_sgl_desc *sg_list;
- struct scatterlist *sgl, *sg;
- unsigned int entries;
+ struct blk_dma_iter iter;
dma_addr_t sgl_dma;
- int rc, i;
-
- iod->meta_sgt.sgl = mempool_alloc(dev->iod_meta_mempool, GFP_ATOMIC);
- if (!iod->meta_sgt.sgl)
- return BLK_STS_RESOURCE;
+ int i = 0;
- sg_init_table(iod->meta_sgt.sgl, req->nr_integrity_segments);
- iod->meta_sgt.orig_nents = blk_rq_map_integrity_sg(req,
- iod->meta_sgt.sgl);
- if (!iod->meta_sgt.orig_nents)
- goto out_free_sg;
+ if (!blk_rq_integrity_dma_map_iter_start(req, dev->dev,
+ &iod->meta_dma_state, &iter))
+ return iter.status;
- rc = dma_map_sgtable(dev->dev, &iod->meta_sgt, rq_dma_dir(req),
- DMA_ATTR_NO_WARN);
- if (rc)
- goto out_free_sg;
+ if (iter.p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR)
+ iod->flags |= IOD_META_P2P_BUS_ADDR;
+ else if (blk_rq_dma_map_coalesce(&iod->meta_dma_state))
+ entries = 1;
+
+ if (entries == 1 && !(nvme_req(req)->flags & NVME_REQ_USERCMD)) {
+ iod->cmd.common.metadata = cpu_to_le64(iter.addr);
+ iod->meta_total_len = iter.len;
+ iod->meta_dma = iter.addr;
+ iod->meta_descriptor = NULL;
+ return BLK_STS_OK;
+ }
sg_list = dma_pool_alloc(nvmeq->descriptor_pools.small, GFP_ATOMIC,
&sgl_dma);
if (!sg_list)
- goto out_unmap_sg;
+ return BLK_STS_RESOURCE;
- entries = iod->meta_sgt.nents;
iod->meta_descriptor = sg_list;
iod->meta_dma = sgl_dma;
-
iod->cmd.common.flags = NVME_CMD_SGL_METASEG;
iod->cmd.common.metadata = cpu_to_le64(sgl_dma);
-
- sgl = iod->meta_sgt.sgl;
if (entries == 1) {
- nvme_pci_sgl_set_data_sg(sg_list, sgl);
+ iod->meta_total_len = iter.len;
+ nvme_pci_sgl_set_data(sg_list, &iter);
return BLK_STS_OK;
}
sgl_dma += sizeof(*sg_list);
- nvme_pci_sgl_set_seg(sg_list, sgl_dma, entries);
- for_each_sg(sgl, sg, entries, i)
- nvme_pci_sgl_set_data_sg(&sg_list[i + 1], sg);
-
- return BLK_STS_OK;
+ do {
+ nvme_pci_sgl_set_data(&sg_list[++i], &iter);
+ iod->meta_total_len += iter.len;
+ } while (blk_rq_integrity_dma_map_iter_next(req, dev->dev, &iter));
-out_unmap_sg:
- dma_unmap_sgtable(dev->dev, &iod->meta_sgt, rq_dma_dir(req), 0);
-out_free_sg:
- mempool_free(iod->meta_sgt.sgl, dev->iod_meta_mempool);
- return BLK_STS_RESOURCE;
+ nvme_pci_sgl_set_seg(sg_list, sgl_dma, i);
+ if (unlikely(iter.status))
+ nvme_unmap_metadata(req);
+ return iter.status;
}
static blk_status_t nvme_pci_setup_meta_mptr(struct request *req)
@@ -1099,6 +1135,7 @@ static blk_status_t nvme_pci_setup_meta_mptr(struct request *req)
if (dma_mapping_error(nvmeq->dev->dev, iod->meta_dma))
return BLK_STS_IOERR;
iod->cmd.common.metadata = cpu_to_le64(iod->meta_dma);
+ iod->flags |= IOD_META_MPTR;
return BLK_STS_OK;
}
@@ -1120,7 +1157,7 @@ static blk_status_t nvme_prep_rq(struct request *req)
iod->flags = 0;
iod->nr_descriptors = 0;
iod->total_len = 0;
- iod->meta_sgt.nents = 0;
+ iod->meta_total_len = 0;
ret = nvme_setup_cmd(req->q->queuedata, req);
if (ret)
@@ -1231,25 +1268,6 @@ static void nvme_queue_rqs(struct rq_list *rqlist)
*rqlist = requeue_list;
}
-static __always_inline void nvme_unmap_metadata(struct request *req)
-{
- struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
- struct nvme_dev *dev = nvmeq->dev;
-
- if (!iod->meta_sgt.nents) {
- dma_unmap_page(dev->dev, iod->meta_dma,
- rq_integrity_vec(req).bv_len,
- rq_dma_dir(req));
- return;
- }
-
- dma_pool_free(nvmeq->descriptor_pools.small, iod->meta_descriptor,
- iod->meta_dma);
- dma_unmap_sgtable(dev->dev, &iod->meta_sgt, rq_dma_dir(req), 0);
- mempool_free(iod->meta_sgt.sgl, dev->iod_meta_mempool);
-}
-
static __always_inline void nvme_pci_unmap_rq(struct request *req)
{
if (blk_integrity_rq(req))
@@ -3055,7 +3073,6 @@ static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown)
static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev)
{
- size_t meta_size = sizeof(struct scatterlist) * (NVME_MAX_META_SEGS + 1);
size_t alloc_size = sizeof(struct nvme_dma_vec) * NVME_MAX_SEGS;
dev->dmavec_mempool = mempool_create_node(1,
@@ -3064,17 +3081,7 @@ static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev)
dev_to_node(dev->dev));
if (!dev->dmavec_mempool)
return -ENOMEM;
-
- dev->iod_meta_mempool = mempool_create_node(1,
- mempool_kmalloc, mempool_kfree,
- (void *)meta_size, GFP_KERNEL,
- dev_to_node(dev->dev));
- if (!dev->iod_meta_mempool)
- goto free;
return 0;
-free:
- mempool_destroy(dev->dmavec_mempool);
- return -ENOMEM;
}
static void nvme_free_tagset(struct nvme_dev *dev)
@@ -3524,7 +3531,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
nvme_free_queues(dev, 0);
out_release_iod_mempool:
mempool_destroy(dev->dmavec_mempool);
- mempool_destroy(dev->iod_meta_mempool);
out_dev_unmap:
nvme_dev_unmap(dev);
out_uninit_ctrl:
@@ -3588,7 +3594,6 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_dbbuf_dma_free(dev);
nvme_free_queues(dev, 0);
mempool_destroy(dev->dmavec_mempool);
- mempool_destroy(dev->iod_meta_mempool);
nvme_release_descriptor_pools(dev);
nvme_dev_unmap(dev);
nvme_uninit_ctrl(&dev->ctrl);
--
2.47.3
More information about the Linux-nvme
mailing list