[PATCH blktests] block tests: nvme metadata passthrough

Keith Busch kbusch at meta.com
Thu Jun 5 17:30:15 PDT 2025


From: Keith Busch <kbusch at kernel.org>

Get more coverage on nvme metadata passthrough. Specifically in this
test, read-only metadata is targeted as this had been a gap in previous
test coveraged.

Link: https://lore.kernel.org/linux-block/20250603184752.1185676-1-csander@purestorage.com/
Signed-off-by: Keith Busch <kbusch at kernel.org>
---
 src/Makefile                |   1 +
 src/nvme-passthrough-meta.c | 219 ++++++++++++++++++++++++++++++++++++
 tests/nvme/064              |  22 ++++
 tests/nvme/064.out          |   2 +
 4 files changed, 244 insertions(+)
 create mode 100644 src/nvme-passthrough-meta.c
 create mode 100755 tests/nvme/064
 create mode 100644 tests/nvme/064.out

diff --git a/src/Makefile b/src/Makefile
index a94e5f2..f91ac62 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -13,6 +13,7 @@ C_TARGETS := \
 	loop_change_fd \
 	loop_get_status_null \
 	mount_clear_sock \
+	nvme-passthrough-meta \
 	nbdsetsize \
 	openclose \
 	sg/dxfer-from-dev \
diff --git a/src/nvme-passthrough-meta.c b/src/nvme-passthrough-meta.c
new file mode 100644
index 0000000..a8a5b1b
--- /dev/null
+++ b/src/nvme-passthrough-meta.c
@@ -0,0 +1,219 @@
+#define _GNU_SOURCE
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <inttypes.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <linux/types.h>
+
+#ifndef _LINUX_NVME_IOCTL_H
+#define _LINUX_NVME_IOCTL_H
+struct nvme_passthru_cmd {
+	__u8    opcode;
+	__u8    flags;
+	__u16   rsvd1;
+	__u32   nsid;
+	__u32   cdw2;
+	__u32   cdw3;
+	__u64   metadata;
+	__u64   addr;
+	__u32   metadata_len;
+	__u32   data_len;
+	__u32   cdw10;
+	__u32   cdw11;
+	__u32   cdw12;
+	__u32   cdw13;
+	__u32   cdw14;
+	__u32   cdw15;
+	__u32   timeout_ms;
+	__u32   result;
+};
+
+#define NVME_IOCTL_ID		_IO('N', 0x40)
+#define NVME_IOCTL_ADMIN_CMD    _IOWR('N', 0x41, struct nvme_passthru_cmd)
+#define NVME_IOCTL_IO_CMD       _IOWR('N', 0x43, struct nvme_passthru_cmd)
+#endif /* _UAPI_LINUX_NVME_IOCTL_H */
+
+struct nvme_lbaf {
+	__le16	ms;
+	__u8	ds;
+	__u8	rp;
+};
+
+struct nvme_id_ns {
+	__le64	nsze;
+	__le64	ncap;
+	__le64	nuse;
+	__u8	nsfeat;
+	__u8	nlbaf;
+	__u8	flbas;
+	__u8	mc;
+	__u8	dpc;
+	__u8	dps;
+	__u8	nmic;
+	__u8	rescap;
+	__u8	fpi;
+	__u8	dlfeat;
+	__le16	nawun;
+	__le16	nawupf;
+	__le16	nacwu;
+	__le16	nabsn;
+	__le16	nabo;
+	__le16	nabspf;
+	__le16	noiob;
+	__u8	nvmcap[16];
+	__le16	npwg;
+	__le16	npwa;
+	__le16	npdg;
+	__le16	npda;
+	__le16	nows;
+	__u8	rsvd74[18];
+	__le32	anagrpid;
+	__u8	rsvd96[3];
+	__u8	nsattr;
+	__le16	nvmsetid;
+	__le16	endgid;
+	__u8	nguid[16];
+	__u8	eui64[8];
+	struct nvme_lbaf lbaf[64];
+	__u8	vs[3712];
+};
+
+#define BUFFER_SIZE (32768)
+
+int main(int argc, char **argv)
+{
+	int ret, fd, nsid, blocks, meta_buffer_size;
+	void *buffer, *mptr = NULL, *meta = NULL;
+	struct nvme_passthru_cmd cmd;
+	struct nvme_lbaf lbaf;
+	struct nvme_id_ns ns;
+
+	__u64 block_size;
+	__u16 meta_size;
+
+	if (argc < 2) {
+		fprintf(stderr, "usage: %s /dev/nvmeXnY", argv[0]);
+		return EINVAL;
+	}
+
+	fd = open(argv[1], O_RDONLY);
+	if (fd < 0)
+		return fd;
+
+	nsid = ioctl(fd, NVME_IOCTL_ID);
+	if (nsid < 0) {
+		perror("namespace id");
+		return errno;
+	}
+
+	cmd = (struct nvme_passthru_cmd) {
+		.opcode	= 0x6,
+		.nsid	= nsid,
+		.addr	= (__u64)(uintptr_t)&ns,
+		.data_len       = 4096,
+	};
+
+	ret = ioctl(fd, NVME_IOCTL_ADMIN_CMD, &cmd);
+	if (ret < 0) {
+		perror("id-ns");
+		return errno;
+	}
+
+	lbaf = ns.lbaf[ns.flbas & 0xf];
+	block_size = 1 << lbaf.ds;
+	meta_size = lbaf.ms;
+
+	/* format not appropriate for this test */
+	if (meta_size == 0)
+		return 0;
+
+	blocks = BUFFER_SIZE / block_size;
+	meta_buffer_size = blocks * meta_size;
+
+	buffer = malloc(BUFFER_SIZE);
+	mptr = mmap(NULL, 8192, PROT_READ | PROT_WRITE,
+		MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	if (mptr == MAP_FAILED) {
+		perror("mmap");
+		return errno;
+	}
+
+	/* this should directly use the user space buffer */
+	meta = mptr;
+	cmd = (struct nvme_passthru_cmd) {
+		.opcode		= 1,
+		.nsid		= 1,
+		.addr		= (uintptr_t)buffer,
+		.metadata       = (uintptr_t)meta,
+		.data_len       = BUFFER_SIZE,
+		.metadata_len   = meta_buffer_size,
+		.cdw12		= blocks - 1,
+	};
+	ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+	if (ret < 0) {
+		perror("nvme-write");
+		return ret;
+	}
+
+	cmd.opcode = 2;
+	ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+	if (ret < 0) {
+		perror("nvme-read");
+		return ret;
+	}
+
+	/*
+	 * this offset should either force a kernel copy if we don't have
+	 * contiguous pages, or test the device's metadata sgls
+	 */
+	meta = mptr + 4096 - 16;
+	cmd.opcode = 1;
+	cmd.metadata = (uintptr_t)meta;
+
+	ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+	if (ret < 0) {
+		perror("nvme-write");
+		return errno;
+	}
+
+	cmd.opcode = 2;
+	ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+	if (ret < 0) {
+		perror("nvme-read");
+		return errno;
+	}
+
+	/* This should not be mappable for write commands */
+	mptr = mmap(NULL, 8192, PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	if (mptr == MAP_FAILED) {
+		perror("mmap");
+		return errno;
+	}
+
+	meta = mptr;
+
+	cmd.opcode = 1;
+	cmd.metadata = (uintptr_t)meta;
+	ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+	if (ret == 0) {
+		perror("nvme-write (expect Failure)");
+		return EFAULT;
+	}
+
+	cmd.opcode = 2;
+	ret = ioctl(fd, NVME_IOCTL_IO_CMD, &cmd);
+	if (ret < 0) {
+		perror("nvme-read");
+		return ret;
+	}
+
+	return 0;
+}
diff --git a/tests/nvme/064 b/tests/nvme/064
new file mode 100755
index 0000000..ed9c565
--- /dev/null
+++ b/tests/nvme/064
@@ -0,0 +1,22 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-3.0+
+# Copyright (C) 2025 Keith Busch <kbusch at kernel.org>
+#
+# Test out metadata through the passthrough interfaces
+
+. tests/nvme/rc
+
+requires() {
+	_nvme_requires
+}
+
+DESCRIPTION="exercise the nvme metadata usage with passthrough commands"
+QUICK=1
+
+test() {
+	echo "Running ${TEST_NAME}"
+
+	src/nvme-passthrough-meta ${TEST_DEV}
+
+	echo "Test complete"
+}
diff --git a/tests/nvme/064.out b/tests/nvme/064.out
new file mode 100644
index 0000000..5b34d4e
--- /dev/null
+++ b/tests/nvme/064.out
@@ -0,0 +1,2 @@
+Running nvme/064
+Test complete
-- 
2.47.1




More information about the Linux-nvme mailing list