[PATCH-part-2 9/9] nvme: force sgls on user passthrough if possible

Keith Busch kbusch at meta.com
Wed Sep 4 11:38:17 PDT 2024


From: Keith Busch <kbusch at kernel.org>

With capable hardware, this is one way to guard against short buffers.
The consequences of getting the interface wrong can cause corruption and
crash kernels, and utilizing the mptr sgl feature leverages the protocol
to catch such incorrect usage. See CVE-2023-6238.

To emphasize the danger with using this interface, the kernel will be
tainted if the user accesses this interface without hardware capable of
guaranteeing transfer lengths.

Signed-off-by: Keith Busch <kbusch at kernel.org>
---
 drivers/nvme/host/ioctl.c | 17 +++++++++++++++++
 drivers/nvme/host/nvme.h  |  2 ++
 drivers/nvme/host/pci.c   | 20 ++++++++++++++------
 3 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index f1d58e70933f5..cf889a0e79338 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -116,12 +116,22 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
 		unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
 		u32 meta_seed, struct io_uring_cmd *ioucmd, unsigned int flags)
 {
+	struct nvme_ctrl *ctrl = nvme_req(req)->ctrl;
 	struct request_queue *q = req->q;
 	struct nvme_ns *ns = q->queuedata;
 	struct block_device *bdev = ns ? ns->disk->part0 : NULL;
 	struct bio *bio = NULL;
 	int ret;
 
+	if (bdev) {
+		if (nvme_ctrl_sgl_supported(ctrl)) {
+			nvme_req(req)->flags |= NVME_REQ_USE_SGLS;
+		} else {
+			dev_warn_once(ctrl->device, "using unchecked buffer\n");
+			add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+		}
+	}
+
 	if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
 		struct iov_iter iter;
 
@@ -146,6 +156,13 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
 	if (bdev) {
 		bio_set_dev(bio, bdev);
 		if (meta_buffer && meta_len) {
+			if (nvme_ctrl_meta_sgl_supported(ctrl)) {
+				nvme_req(req)->flags |= NVME_REQ_USE_META_SGLS;
+			} else {
+				dev_warn_once(ctrl->device,
+					      "using unchecked meta buffer\n");
+				add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+			}
 			ret = bio_integrity_map_user(bio, meta_buffer, meta_len,
 						     meta_seed);
 			if (ret)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 699cc36e596fa..3c27486acecdc 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -197,6 +197,8 @@ enum {
 	NVME_REQ_USERCMD		= (1 << 1),
 	NVME_MPATH_IO_STATS		= (1 << 2),
 	NVME_MPATH_CNT_ACTIVE		= (1 << 3),
+	NVME_REQ_USE_SGLS		= (1 << 4),
+	NVME_REQ_USE_META_SGLS		= (1 << 5),
 };
 
 static inline struct nvme_request *nvme_req(struct request *req)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index a0a10451d7da8..eb2ac47f2bd54 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -522,7 +522,8 @@ static inline bool nvme_pci_sgl_capable(struct nvme_dev *dev,
 
 static inline bool nvme_pci_metadata_use_sgls(struct request *req)
 {
-	return blk_rq_integrity_segments(req) > 1;
+	return blk_rq_integrity_segments(req) > 1 ||
+		nvme_req(req)->flags & NVME_REQ_USE_META_SGLS;
 }
 
 static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req,
@@ -536,13 +537,20 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req,
 		return false;
 	if (nvme_pci_metadata_use_sgls(req))
 		return true;
-	return avg_seg_size >= sgl_threshold;
+	if (avg_seg_size < sgl_threshold)
+		return nvme_req(req)->flags & NVME_REQ_USE_SGLS;
+	return true;
 }
 
-static inline bool nvme_pci_use_prps(struct bio_vec *bv)
+static inline bool nvme_pci_use_prps(struct request *req, struct bio_vec *bv)
 {
-	unsigned int off = bv->bv_offset & (NVME_CTRL_PAGE_SIZE - 1);
-	return  off + bv->bv_len <= NVME_CTRL_PAGE_SIZE * 2;
+	unsigned int off;
+
+	if (nvme_pci_metadata_use_sgls(req))
+		return false;
+
+	off = bv->bv_offset & (NVME_CTRL_PAGE_SIZE - 1);
+	return off + bv->bv_len <= NVME_CTRL_PAGE_SIZE * 2;
 }
 
 static void nvme_free_prps(struct nvme_dev *dev, struct request *req)
@@ -835,7 +843,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req)
 		struct bio_vec bv = req_bvec(req);
 
 		if (!is_pci_p2pdma_page(bv.bv_page)) {
-			if (nvme_pci_use_prps(&bv))
+			if (nvme_pci_use_prps(req, &bv))
 				return nvme_setup_prp_simple(dev, req, &bv);
 			if (nvme_pci_sgl_capable(dev, req))
 				return nvme_setup_sgl_simple(dev, req, &bv);
-- 
2.43.5




More information about the Linux-nvme mailing list