[PATCH v4] nvme: fix corruption for passthrough meta/data

Kanchan Joshi joshi.k at samsung.com
Thu Oct 12 22:14:58 PDT 2023


User can specify a smaller meta buffer than what the device is
wired to update/access. Kernel makes a copy of the meta buffer into
which the device does DMA.
As a result, the device overwrites the unrelated kernel memory, causing
random kernel crashes.

Same issue is possible for extended-lba case also. When user specifies a
short unaligned buffer, the kernel makes a copy and uses that for DMA.

Detect these situations and prevent corruption for unprivileged user
passthrough. No change to status-quo for privileged/root user.

Fixes: 63263d60e0f9 ("nvme: Use metadata for passthrough commands")
Cc: stable at vger.kernel.org

Reported-by: Vincent Fu <vincent.fu at samsung.com>
Signed-off-by: Kanchan Joshi <joshi.k at samsung.com>
---
Changes since v3:
- Block only unprivileged user
- Harden the checks by disallowing everything for which data length
  (nlb) can not be determined
- Separate the bounce buffer checks to a different function
- Factor in CSIs beyond NVM and ZNS

Changes since v2:
- Handle extended-lba case: short unaligned buffer IO
- Reduce the scope of check to only well-known commands
- Do not check early. Move it deeper so that check gets executed less
  often
- Combine two patches into one.

Changes since v1:
- Revise the check to exclude PRACT=1 case

 drivers/nvme/host/ioctl.c | 116 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 116 insertions(+)

diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index d8ff796fd5f2..57160ca02e65 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -96,6 +96,76 @@ static void __user *nvme_to_user_ptr(uintptr_t ptrval)
 	return (void __user *)ptrval;
 }
 
+static inline bool nvme_nlb_in_cdw12(struct nvme_ns *ns, u8 opcode)
+{
+	u8 csi = ns->head->ids.csi;
+
+	if (csi != NVME_CSI_NVM && csi != NVME_CSI_ZNS)
+		return false;
+
+	switch (opcode) {
+	case nvme_cmd_read:
+	case nvme_cmd_write:
+	case nvme_cmd_compare:
+	case nvme_cmd_zone_append:
+		return true;
+	default:
+		return false;
+	}
+}
+
+/*
+ * NVMe has no separate field to encode the metadata length expected
+ * (except when using SGLs).
+ *
+ * Because of that we can't allow to transfer arbitrary metadata, as
+ * a metadata buffer that is shorted than what the device expects for
+ * the command will lead to arbitrary kernel (if bounce buffering) or
+ * userspace (if not) memory corruption.
+ *
+ * Check that external metadata is only specified for the few commands
+ * where we know the length based of other fields, and that it fits
+ * the actual data transfer from/to the device.
+ */
+static bool nvme_validate_metadata_len(struct request *req, unsigned meta_len)
+{
+	struct nvme_ns *ns = req->q->queuedata;
+	struct nvme_command *c = nvme_req(req)->cmd;
+	u32 len_by_nlb;
+
+	/* Do not guard admin */
+	if (capable(CAP_SYS_ADMIN))
+		return true;
+
+	/* Block commands that do not have nlb in cdw12 */
+	if (!nvme_nlb_in_cdw12(ns, c->common.opcode)) {
+		dev_err(ns->ctrl->device,
+			"unknown metadata command %c\n", c->common.opcode);
+		return false;
+	}
+
+	/* Skip when PI is inserted or stripped and not transferred */
+	if (ns->ms == ns->pi_size &&
+	    (c->rw.control & cpu_to_le16(NVME_RW_PRINFO_PRACT)))
+		return true;
+
+	if (ns->features & NVME_NS_EXT_LBAS) {
+		dev_err(ns->ctrl->device,
+			"requires extended LBAs for metadata\n");
+		return false;
+	}
+
+	len_by_nlb = (le16_to_cpu(c->rw.length) + 1) * ns->ms;
+	if (meta_len < len_by_nlb) {
+		dev_err(ns->ctrl->device,
+			"metadata length (%u instad of %u) is too small.\n",
+			meta_len, len_by_nlb);
+		return false;
+	}
+
+	return true;
+}
+
 static void *nvme_add_user_metadata(struct request *req, void __user *ubuf,
 		unsigned len, u32 seed)
 {
@@ -104,6 +174,9 @@ static void *nvme_add_user_metadata(struct request *req, void __user *ubuf,
 	void *buf;
 	struct bio *bio = req->bio;
 
+	if (!nvme_validate_metadata_len(req, len))
+		return ERR_PTR(-EINVAL);
+
 	buf = kmalloc(len, GFP_KERNEL);
 	if (!buf)
 		goto out;
@@ -134,6 +207,41 @@ static void *nvme_add_user_metadata(struct request *req, void __user *ubuf,
 	return ERR_PTR(ret);
 }
 
+static bool nvme_validate_buffer_len(struct nvme_ns *ns, struct nvme_command *c,
+				     unsigned meta_len, unsigned data_len)
+{
+	u32 mlen_by_nlb, dlen_by_nlb;
+
+	/* Do not guard admin */
+	if (capable(CAP_SYS_ADMIN))
+		return true;
+
+	/* Block commands that do not have nlb in cdw12 */
+	if (!nvme_nlb_in_cdw12(ns, c->common.opcode)) {
+		dev_err(ns->ctrl->device,
+			"unknown metadata command %c.\n", c->common.opcode);
+		return false;
+	}
+
+	/* When PI is inserted or stripped and not transferred.*/
+	if (ns->ms == ns->pi_size &&
+	    (c->rw.control & cpu_to_le16(NVME_RW_PRINFO_PRACT)))
+		mlen_by_nlb = 0;
+	else
+		mlen_by_nlb = (le16_to_cpu(c->rw.length) + 1) * ns->ms;
+
+	dlen_by_nlb = (le16_to_cpu(c->rw.length) + 1) << ns->lba_shift;
+
+	if (data_len < (dlen_by_nlb + mlen_by_nlb)) {
+		dev_err(ns->ctrl->device,
+			"buffer length (%u instad of %u) is too small.\n",
+			data_len, dlen_by_nlb + mlen_by_nlb);
+		return false;
+	}
+
+	return true;
+}
+
 static int nvme_finish_user_metadata(struct request *req, void __user *ubuf,
 		void *meta, unsigned len, int ret)
 {
@@ -202,6 +310,14 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
 		}
 		*metap = meta;
 	}
+	/* Guard for a short bounce buffer */
+	if (bio->bi_private) {
+		if (!nvme_validate_buffer_len(ns, nvme_req(req)->cmd,
+					      meta_len, bufflen)) {
+			ret = -EINVAL;
+			goto out_unmap;
+		}
+	}
 
 	return ret;
 
-- 
2.25.1




More information about the Linux-nvme mailing list