[PATCH blktests 2/2] nvme: add nvme metadata passthrough test

Shin'ichiro Kawasaki shinichiro.kawasaki at wdc.com
Sun Jun 15 19:07:16 PDT 2025


From: Keith Busch <kbusch at kernel.org>

Get more coverage on nvme metadata passthrough. Specifically in this
test, read-only metadata is targeted as this had been a gap in previous
test coveraged.

Link: https://lore.kernel.org/linux-block/20250603184752.1185676-1-csander@purestorage.com/
Signed-off-by: Keith Busch <kbusch at kernel.org>
[Shin'ichiro: added test device requirement checks and .gitignore line]
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki at wdc.com>
---
 src/.gitignore              |   1 +
 src/Makefile                |   1 +
 src/nvme-passthrough-meta.c | 232 ++++++++++++++++++++++++++++++++++++
 tests/nvme/064              |  34 ++++++
 tests/nvme/064.out          |   2 +
 5 files changed, 270 insertions(+)
 create mode 100644 src/nvme-passthrough-meta.c
 create mode 100755 tests/nvme/064
 create mode 100644 tests/nvme/064.out

diff --git a/src/.gitignore b/src/.gitignore
index df7aff5..399a046 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -9,3 +9,4 @@
 /sg/syzkaller1
 /zbdioctl
 /miniublk
+/nvme-passthrough-meta
diff --git a/src/Makefile b/src/Makefile
index a94e5f2..f91ac62 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -13,6 +13,7 @@ C_TARGETS := \
 	loop_change_fd \
 	loop_get_status_null \
 	mount_clear_sock \
+	nvme-passthrough-meta \
 	nbdsetsize \
 	openclose \
 	sg/dxfer-from-dev \
diff --git a/src/nvme-passthrough-meta.c b/src/nvme-passthrough-meta.c
new file mode 100644
index 0000000..29980a2
--- /dev/null
+++ b/src/nvme-passthrough-meta.c
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: GPL-3.0+
+// Copyright (C) 2025 Keith Busch
+
+/*
+ * Simple test exercising the user metadata interfaces used by nvme passthrough
+ * commands.
+ */
+#define _GNU_SOURCE
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <inttypes.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <linux/types.h>
+
+#ifndef _LINUX_NVME_IOCTL_H
+#define _LINUX_NVME_IOCTL_H
+struct nvme_passthru_cmd {
+	__u8    opcode;
+	__u8    flags;
+	__u16   rsvd1;
+	__u32   nsid;
+	__u32   cdw2;
+	__u32   cdw3;
+	__u64   metadata;
+	__u64   addr;
+	__u32   metadata_len;
+	__u32   data_len;
+	__u32   cdw10;
+	__u32   cdw11;
+	__u32   cdw12;
+	__u32   cdw13;
+	__u32   cdw14;
+	__u32   cdw15;
+	__u32   timeout_ms;
+	__u32   result;
+};
+
+#define NVME_IOCTL_ID		_IO('N', 0x40)
+#define NVME_IOCTL_ADMIN_CMD    _IOWR('N', 0x41, struct nvme_passthru_cmd)
+#define NVME_IOCTL_IO_CMD       _IOWR('N', 0x43, struct nvme_passthru_cmd)
+#endif /* _UAPI_LINUX_NVME_IOCTL_H */
+
+struct nvme_lbaf {
+	__le16	ms;
+	__u8	ds;
+	__u8	rp;
+};
+
+struct nvme_id_ns {
+	__le64	nsze;
+	__le64	ncap;
+	__le64	nuse;
+	__u8	nsfeat;
+	__u8	nlbaf;
+	__u8	flbas;
+	__u8	mc;
+	__u8	dpc;
+	__u8	dps;
+	__u8	nmic;
+	__u8	rescap;
+	__u8	fpi;
+	__u8	dlfeat;
+	__le16	nawun;
+	__le16	nawupf;
+	__le16	nacwu;
+	__le16	nabsn;
+	__le16	nabo;
+	__le16	nabspf;
+	__le16	noiob;
+	__u8	nvmcap[16];
+	__le16	npwg;
+	__le16	npwa;
+	__le16	npdg;
+	__le16	npda;
+	__le16	nows;
+	__u8	rsvd74[18];
+	__le32	anagrpid;
+	__u8	rsvd96[3];
+	__u8	nsattr;
+	__le16	nvmsetid;
+	__le16	endgid;
+	__u8	nguid[16];
+	__u8	eui64[8];
+	struct nvme_lbaf lbaf[64];
+	__u8	vs[3712];
+};
+
+#define BUFFER_SIZE (32768)
+
+int main(int argc, char **argv)
+{
+	int ret, fd, nsid, blocks, meta_buffer_size;
+	void *buffer, *mptr = NULL, *meta = NULL;
+	struct nvme_passthru_cmd cmd;
+	struct nvme_lbaf lbaf;
+	struct nvme_id_ns ns;
+
+	__u64 block_size;
+	__u16 meta_size;
+
+	if (argc < 2) {
+		fprintf(stderr, "usage: %s /dev/nvmeXnY", argv[0]);
+		return EINVAL;
+	}
+
+	fd = open(argv[1], O_RDONLY);
+	if (fd < 0)
+		return fd;
+
+	nsid = ioctl(fd, NVME_IOCTL_ID);
+	if (nsid < 0) {
+		perror("namespace id");
+		return errno;
+	}
+
+	cmd = (struct nvme_passthru_cmd) {
+		.opcode		= 0x6,
+		.nsid		= nsid,
+		.addr		= (__u64)(uintptr_t)&ns,
+		.data_len       = sizeof(ns),
+	};
+
+	ret = ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd);
+	if (ret < 0) {
+		perror("id-ns");
+		return errno;
+	}
+
+	lbaf = ns.lbaf[ns.flbas & 0xf];
+	block_size = 1 << lbaf.ds;
+	meta_size = lbaf.ms;
+
+	/* format not appropriate for this test */
+	if (meta_size == 0) {
+		fprintf(stderr, "Device format does not have metadata\n");
+		return -EINVAL;
+	}
+
+	blocks = BUFFER_SIZE / block_size;
+	meta_buffer_size = blocks * meta_size;
+
+	buffer = malloc(BUFFER_SIZE);
+	mptr = mmap(NULL, 8192, PROT_READ | PROT_WRITE,
+		MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	if (mptr == MAP_FAILED) {
+		perror("mmap");
+		return errno;
+	}
+
+	/* this should directly use the user space buffer */
+	meta = mptr;
+	cmd = (struct nvme_passthru_cmd) {
+		.opcode		= 1,
+		.nsid		= nsid,
+		.addr		= (uintptr_t)buffer,
+		.metadata       = (uintptr_t)meta,
+		.data_len       = BUFFER_SIZE,
+		.metadata_len   = meta_buffer_size,
+		.cdw12		= blocks - 1,
+	};
+
+	ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+	if (ret < 0) {
+		perror("nvme-write");
+		return ret;
+	}
+
+	cmd.opcode = 2;
+	ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+	if (ret < 0) {
+		perror("nvme-read");
+		return ret;
+	}
+
+	/*
+	 * this offset should either force a kernel copy if we don't have
+	 * contiguous pages, or test the device's metadata sgls
+	 */
+	meta = mptr + 4096 - 16;
+	cmd.opcode = 1;
+	cmd.metadata = (uintptr_t)meta;
+
+	ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+	if (ret < 0) {
+		perror("nvme-write (offset)");
+		return errno;
+	}
+
+	cmd.opcode = 2;
+	ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+	if (ret < 0) {
+		perror("nvme-read (offset)");
+		return errno;
+	}
+
+	/*
+	 * This buffer is read-only, so should not be successful with commands
+	 * where it is the destination (reads)
+	 */
+	mptr = mmap(NULL, 8192, PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	if (mptr == MAP_FAILED) {
+		perror("mmap");
+		return errno;
+	}
+
+	meta = mptr;
+
+	cmd.opcode = 1;
+	cmd.metadata = (uintptr_t)meta;
+	ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+	if (ret < 0) {
+		perror("nvme-write (prot_read)");
+		return ret;
+	}
+
+	cmd.opcode = 2;
+	ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+	if (ret == 0) {
+		perror("nvme-read (expect Failure)");
+		return EFAULT;
+	}
+
+	return 0;
+}
diff --git a/tests/nvme/064 b/tests/nvme/064
new file mode 100755
index 0000000..a10b6ee
--- /dev/null
+++ b/tests/nvme/064
@@ -0,0 +1,34 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-3.0+
+# Copyright (C) 2025 Keith Busch <kbusch at kernel.org>
+#
+# Test out metadata through the passthrough interfaces. This test confirms the
+# fix by the kernel commit 43a67dd812c5 ("block: flip iter directions in
+# blk_rq_integrity_map_user()"). This test requires TEST_DEV as a namespace
+# formatted with metadata, and extended LBA disabled. Such namespace can be
+# prepared with QEMU NVME emulation specifying -device option with "ms=8",
+# "ms=16" or "ms=64".
+
+. tests/nvme/rc
+
+requires() {
+	_nvme_requires
+}
+
+device_requires() {
+	_test_dev_has_metadata
+	_test_dev_disables_extended_lba
+}
+
+DESCRIPTION="exercise the nvme metadata usage with passthrough commands"
+QUICK=1
+
+test_device() {
+	echo "Running ${TEST_NAME}"
+
+	if ! src/nvme-passthrough-meta "${TEST_DEV}"; then
+		echo "src/nvme-passthrough-meta failed"
+	fi
+
+	echo "Test complete"
+}
diff --git a/tests/nvme/064.out b/tests/nvme/064.out
new file mode 100644
index 0000000..5b34d4e
--- /dev/null
+++ b/tests/nvme/064.out
@@ -0,0 +1,2 @@
+Running nvme/064
+Test complete
-- 
2.49.0




More information about the Linux-nvme mailing list