[PATCH 1/2] nvme-pci: meta-transfer via sgl
Kanchan Joshi
joshi.k at samsung.com
Wed Oct 18 11:30:02 PDT 2023
Introduce the ability to transfer the metadata buffer using sgl.
Also add a nvme request flag 'NVME_REQ_FORCE_SGL' that mandates both
data and meta transfer via sgl.
This is a prepatory patch to enable unprivileged passthrough via SGL.
Suggested-by: Christoph Hellwig <hch at lst.de>
Signed-off-by: Kanchan Joshi <joshi.k at samsung.com>
---
drivers/nvme/host/nvme.h | 6 ++++
drivers/nvme/host/pci.c | 63 ++++++++++++++++++++++++++++++++++++----
2 files changed, 64 insertions(+), 5 deletions(-)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index f35647c470af..58f8efe1ace9 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -184,6 +184,7 @@ enum {
NVME_REQ_CANCELLED = (1 << 0),
NVME_REQ_USERCMD = (1 << 1),
NVME_MPATH_IO_STATS = (1 << 2),
+ NVME_REQ_FORCE_SGL = (1 << 3),
};
static inline struct nvme_request *nvme_req(struct request *req)
@@ -1043,6 +1044,11 @@ static inline void nvme_start_request(struct request *rq)
blk_mq_start_request(rq);
}
+static inline bool nvme_ctrl_meta_sgl_supported(struct nvme_ctrl *ctrl)
+{
+ return ctrl->sgls & (1 << 19);
+}
+
static inline bool nvme_ctrl_sgl_supported(struct nvme_ctrl *ctrl)
{
return ctrl->sgls & ((1 << 0) | (1 << 1));
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3f0c9ee09a12..1907b1c9919a 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -123,6 +123,7 @@ struct nvme_dev {
struct device *dev;
struct dma_pool *prp_page_pool;
struct dma_pool *prp_small_pool;
+ struct dma_pool *meta_sgl_pool;
unsigned online_queues;
unsigned max_qid;
unsigned io_queues[HCTX_MAX_TYPES];
@@ -236,6 +237,8 @@ struct nvme_iod {
unsigned int dma_len; /* length of single DMA segment mapping */
dma_addr_t first_dma;
dma_addr_t meta_dma;
+ dma_addr_t meta_dma_sg;
+ struct nvme_sgl_desc *meta_sgl;
struct sg_table sgt;
union nvme_descriptor list[NVME_MAX_NR_ALLOCATIONS];
};
@@ -772,18 +775,23 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
blk_status_t ret = BLK_STS_RESOURCE;
int rc;
+ bool force_sgl = nvme_req(req)->flags & NVME_REQ_FORCE_SGL;
+
+ if (force_sgl && !nvme_ctrl_sgl_supported(&dev->ctrl))
+ return BLK_STS_IOERR;
if (blk_rq_nr_phys_segments(req) == 1) {
struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
struct bio_vec bv = req_bvec(req);
if (!is_pci_p2pdma_page(bv.bv_page)) {
- if (bv.bv_offset + bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2)
+ if (!force_sgl &&
+ bv.bv_offset + bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2)
return nvme_setup_prp_simple(dev, req,
&cmnd->rw, &bv);
- if (nvmeq->qid && sgl_threshold &&
- nvme_ctrl_sgl_supported(&dev->ctrl))
+ if (nvmeq->qid && nvme_ctrl_sgl_supported(&dev->ctrl)
+ && (sgl_threshold || force_sgl))
return nvme_setup_sgl_simple(dev, req,
&cmnd->rw, &bv);
}
@@ -806,7 +814,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
goto out_free_sg;
}
- if (nvme_pci_use_sgls(dev, req, iod->sgt.nents))
+ if (force_sgl || nvme_pci_use_sgls(dev, req, iod->sgt.nents))
ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw);
else
ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
@@ -825,13 +833,44 @@ static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
struct nvme_command *cmnd)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ bool force_sgl = nvme_req(req)->flags & NVME_REQ_FORCE_SGL;
+ blk_status_t ret;
iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req),
rq_dma_dir(req), 0);
if (dma_mapping_error(dev->dev, iod->meta_dma))
return BLK_STS_IOERR;
- cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
+
+ if (!force_sgl) {
+ cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
+ return BLK_STS_OK;
+ }
+
+ if (!nvme_ctrl_meta_sgl_supported(&dev->ctrl)) {
+ WARN_ONCE(1, "controller does not support meta sgl.");
+ ret = BLK_STS_IOERR;
+ goto out_unmap;
+ }
+
+ iod->meta_sgl = dma_pool_alloc(dev->meta_sgl_pool, GFP_KERNEL,
+ &iod->meta_dma_sg);
+ if (!iod->meta_sgl) {
+ ret = BLK_STS_IOERR;
+ goto out_unmap;
+ }
+
+ iod->meta_sgl->addr = cpu_to_le64(iod->meta_dma);
+ iod->meta_sgl->length = cpu_to_le32(rq_integrity_vec(req)->bv_len);
+ iod->meta_sgl->type = NVME_SGL_FMT_DATA_DESC << 4;
+ cmnd->rw.metadata = cpu_to_le64(iod->meta_dma_sg);
+ cmnd->rw.flags = NVME_CMD_SGL_METASEG;
+
return BLK_STS_OK;
+
+out_unmap:
+ dma_unmap_page(dev->dev, iod->meta_dma,
+ rq_integrity_vec(req)->bv_len, rq_dma_dir(req));
+ return ret;
}
static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req)
@@ -968,6 +1007,11 @@ static __always_inline void nvme_pci_unmap_rq(struct request *req)
dma_unmap_page(dev->dev, iod->meta_dma,
rq_integrity_vec(req)->bv_len, rq_dma_dir(req));
+
+ if (nvme_req(req)->flags & NVME_REQ_FORCE_SGL)
+ dma_pool_free(dev->meta_sgl_pool,
+ (void *)iod->meta_sgl,
+ iod->meta_dma_sg);
}
if (blk_rq_nr_phys_segments(req))
@@ -2644,6 +2688,14 @@ static int nvme_setup_prp_pools(struct nvme_dev *dev)
dma_pool_destroy(dev->prp_page_pool);
return -ENOMEM;
}
+ /* for metadata sgl */
+ dev->meta_sgl_pool = dma_pool_create("meta sg 16", dev->dev, 16, 16, 0);
+ if (!dev->meta_sgl_pool) {
+ dma_pool_destroy(dev->prp_page_pool);
+ dma_pool_destroy(dev->prp_small_pool);
+ return -ENOMEM;
+ }
+
return 0;
}
@@ -2651,6 +2703,7 @@ static void nvme_release_prp_pools(struct nvme_dev *dev)
{
dma_pool_destroy(dev->prp_page_pool);
dma_pool_destroy(dev->prp_small_pool);
+ dma_pool_destroy(dev->meta_sgl_pool);
}
static int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev)
--
2.25.1
More information about the Linux-nvme
mailing list