[PATCH 08/12] nvme: Add copy offloading support

Bart Van Assche bvanassche at acm.org
Fri Apr 24 15:41:57 PDT 2026


From: Nitesh Shetty <nj.shetty at samsung.com>

Add support for the NVMe Copy command. This command supports a single
destination range and up to 256 source ranges.

Add trace event support for nvme_copy_cmd.

Signed-off-by: Kanchan Joshi <joshi.k at samsung.com>
Signed-off-by: Nitesh Shetty <nj.shetty at samsung.com>
Signed-off-by: Javier González <javier.gonz at samsung.com>
Signed-off-by: Anuj Gupta <anuj20.g at samsung.com>
[ bvanassche: generalized Copy support from one to 256 source ranges; fixed
  an endianness issue in nvme_config_copy(); renamed rsvd91 into rsvd81 and
  verified the offset with pahole ]
Signed-off-by: Bart Van Assche <bvanassche at acm.org>
---
 drivers/nvme/host/constants.c |   1 +
 drivers/nvme/host/core.c      | 106 ++++++++++++++++++++++++++++++++++
 drivers/nvme/host/trace.c     |  19 ++++++
 include/linux/nvme.h          |  46 ++++++++++++++-
 4 files changed, 169 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c
index dc90df9e13a2..b80c7c7fb629 100644
--- a/drivers/nvme/host/constants.c
+++ b/drivers/nvme/host/constants.c
@@ -19,6 +19,7 @@ static const char * const nvme_ops[] = {
 	[nvme_cmd_resv_report] = "Reservation Report",
 	[nvme_cmd_resv_acquire] = "Reservation Acquire",
 	[nvme_cmd_resv_release] = "Reservation Release",
+	[nvme_cmd_copy] = "Copy Offload",
 	[nvme_cmd_zone_mgmt_send] = "Zone Management Send",
 	[nvme_cmd_zone_mgmt_recv] = "Zone Management Receive",
 	[nvme_cmd_zone_append] = "Zone Append",
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1e33af94c24b..6f3c1fde112f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -6,6 +6,7 @@
 
 #include <linux/async.h>
 #include <linux/blkdev.h>
+#include <linux/blk-copy.h>
 #include <linux/blk-mq.h>
 #include <linux/blk-integrity.h>
 #include <linux/compat.h>
@@ -821,6 +822,87 @@ static inline void nvme_setup_flush(struct nvme_ns *ns,
 	cmnd->common.nsid = cpu_to_le32(ns->head->ns_id);
 }
 
+/*
+ * Translate REQ_OP_COPY_SRC and REQ_OP_COPY_DST bios into an NVMe Copy command.
+ * The NVMe copy command supports multiple source LBA ranges, a single
+ * destination LBA range, and also supports copying across NVMe namespaces. This
+ * implementation supports all these features except copying across NVMe
+ * namespaces.
+ */
+static inline blk_status_t nvme_setup_copy_offload(struct nvme_ns *ns,
+						   struct request *req,
+						   struct nvme_command *cmnd)
+{
+	const u32 nr_range = blk_copy_bio_count(req, REQ_OP_COPY_SRC);
+	struct nvme_ns *src_ns, *dst_ns;
+	struct bio *src_bio = NULL, *dst_bio;
+	struct nvme_copy_range *range;
+	u16 control = 0;
+	u64 dlba;
+
+	dst_bio = blk_first_copy_bio(req, REQ_OP_COPY_DST);
+
+	if (WARN_ON_ONCE(!dst_bio))
+		return BLK_STS_IOERR;
+
+	/* TO DO: derive dst_ns from dst_bio. */
+	dst_ns = ns;
+	dlba = nvme_sect_to_lba(dst_ns->head, dst_bio->bi_iter.bi_sector);
+
+	if (req->cmd_flags & REQ_FUA)
+		control |= NVME_RW_FUA;
+
+	if (req->cmd_flags & REQ_FAILFAST_DEV)
+		control |= NVME_RW_LR;
+
+	*cmnd = (typeof(*cmnd)){
+		.copy = {
+			.opcode = nvme_cmd_copy,
+			.nsid = cpu_to_le32(dst_ns->head->ns_id),
+			.control = cpu_to_le16(control),
+			.sdlba = cpu_to_le64(dlba),
+			.desfmt_prinfor = 2, /* DESFMT=2 */
+			.nr_range = nr_range - 1, /* 0's based */
+		}
+	};
+
+	range = kmalloc_array(nr_range, sizeof(*range),
+			      GFP_ATOMIC | __GFP_ZERO | __GFP_NOWARN);
+	if (!range)
+		return BLK_STS_RESOURCE;
+
+	for (unsigned int i = 0; i < nr_range; i++) {
+		u64 slba;
+		u32 nslb;
+
+		if (!src_bio)
+			src_bio = blk_first_copy_bio(req, REQ_OP_COPY_SRC);
+		else
+			src_bio = blk_next_copy_bio(src_bio);
+		if (WARN_ON_ONCE(!src_bio))
+			goto free_range;
+		/* TO DO: derive src_ns from src_bio. */
+		src_ns = ns;
+		slba = nvme_sect_to_lba(src_ns->head,
+					src_bio->bi_iter.bi_sector);
+		nslb = src_bio->bi_iter.bi_size >> src_ns->head->lba_shift;
+		range[i].nsid = cpu_to_le32(src_ns->head->ns_id); /* requires DESFMT=2 */
+		range[i].slba = cpu_to_le64(slba);
+		range[i].nlb = cpu_to_le16(nslb - 1);
+	}
+
+	req->special_vec.bv_page = virt_to_page(range);
+	req->special_vec.bv_offset = offset_in_page(range);
+	req->special_vec.bv_len = sizeof(*range) * nr_range;
+	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
+
+	return BLK_STS_OK;
+
+free_range:
+	kfree(range);
+	return BLK_STS_IOERR;
+}
+
 static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 		struct nvme_command *cmnd)
 {
@@ -1122,6 +1204,10 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
 	case REQ_OP_ZONE_APPEND:
 		ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_zone_append);
 		break;
+	case REQ_OP_COPY_DST:
+	case REQ_OP_COPY_SRC:
+		ret = nvme_setup_copy_offload(ns, req, cmd);
+		break;
 	default:
 		WARN_ON_ONCE(1);
 		return BLK_STS_IOERR;
@@ -1884,6 +1970,21 @@ static bool nvme_init_integrity(struct nvme_ns_head *head,
 	return true;
 }
 
+static void nvme_config_copy(struct nvme_ns *ns, struct nvme_id_ns *id,
+			     struct queue_limits *lim)
+{
+	struct nvme_ctrl *ctrl = ns->ctrl;
+
+	if (!(ctrl->oncs & NVME_CTRL_ONCS_COPY)) {
+		lim->max_copy_hw_sectors = 0;
+		return;
+	}
+	lim->max_copy_hw_sectors = nvme_lba_to_sect(ns->head,
+						    le16_to_cpu(id->mssrl));
+	lim->max_copy_src_segments = 256;
+	lim->max_copy_dst_segments = 1;
+}
+
 static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
 {
 	return uuid_equal(&a->uuid, &b->uuid) &&
@@ -2416,6 +2517,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
 	if (!nvme_update_disk_info(ns, id, nvm, &lim))
 		capacity = 0;
 
+	nvme_config_copy(ns, id, &lim);
 	if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
 	    ns->head->ids.csi == NVME_CSI_ZNS)
 		nvme_update_zone_info(ns, &lim, &zi);
@@ -2542,6 +2644,9 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
 		lim.physical_block_size = ns_lim->physical_block_size;
 		lim.io_min = ns_lim->io_min;
 		lim.io_opt = ns_lim->io_opt;
+		lim.max_copy_hw_sectors = UINT_MAX;
+		lim.max_copy_src_segments = U16_MAX;
+		lim.max_copy_dst_segments = U16_MAX;
 		queue_limits_stack_bdev(&lim, ns->disk->part0, 0,
 					ns->head->disk->disk_name);
 		if (unsupported)
@@ -5368,6 +5473,7 @@ static inline void _nvme_check_size(void)
 	BUILD_BUG_ON(sizeof(struct nvme_download_firmware) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_dsm_cmd) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_copy_command) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_write_zeroes_cmd) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_get_log_page_command) != 64);
diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c
index ad25ad1e4041..7096ade7740c 100644
--- a/drivers/nvme/host/trace.c
+++ b/drivers/nvme/host/trace.c
@@ -153,6 +153,23 @@ static const char *nvme_trace_read_write(struct trace_seq *p, u8 *cdw10)
 	return ret;
 }
 
+static const char *nvme_trace_copy(struct trace_seq *p, u8 *cdw10)
+{
+	const char *ret = trace_seq_buffer_ptr(p);
+	u64 sdlba = get_unaligned_le64(cdw10);
+	u8 nr_range = get_unaligned_le16(cdw10 + 8);
+	u16 control = get_unaligned_le16(cdw10 + 10);
+	u32 dsmgmt = get_unaligned_le32(cdw10 + 12);
+	u32 reftag = get_unaligned_le32(cdw10 + 16);
+
+	trace_seq_printf(p,
+		"sdlba=%llu, nr_range=%u, ctrl=0x%x, dsmgmt=%u, reftag=%u",
+		sdlba, nr_range, control, dsmgmt, reftag);
+	trace_seq_putc(p, 0);
+
+	return ret;
+}
+
 static const char *nvme_trace_dsm(struct trace_seq *p, u8 *cdw10)
 {
 	const char *ret = trace_seq_buffer_ptr(p);
@@ -386,6 +403,8 @@ const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p,
 		return nvme_trace_resv_rel(p, cdw10);
 	case nvme_cmd_resv_report:
 		return nvme_trace_resv_report(p, cdw10);
+	case nvme_cmd_copy:
+		return nvme_trace_copy(p, cdw10);
 	default:
 		return nvme_trace_common(p, cdw10);
 	}
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 041f30931a90..ead8e5128e3b 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -376,7 +376,7 @@ struct nvme_id_ctrl {
 	__u8			nvscc;
 	__u8			nwpc;
 	__le16			acwu;
-	__u8			rsvd534[2];
+	__le16			ocfs;
 	__le32			sgls;
 	__le32			mnan;
 	__u8			rsvd544[224];
@@ -404,6 +404,7 @@ enum {
 	NVME_CTRL_ONCS_WRITE_ZEROES		= 1 << 3,
 	NVME_CTRL_ONCS_RESERVATIONS		= 1 << 5,
 	NVME_CTRL_ONCS_TIMESTAMP		= 1 << 6,
+	NVME_CTRL_ONCS_COPY			= 1 << 8,
 	NVME_CTRL_VWC_PRESENT			= 1 << 0,
 	NVME_CTRL_OACS_SEC_SUPP                 = 1 << 0,
 	NVME_CTRL_OACS_NS_MNGT_SUPP		= 1 << 3,
@@ -458,7 +459,10 @@ struct nvme_id_ns {
 	__le16			npdg;
 	__le16			npda;
 	__le16			nows;
-	__u8			rsvd74[18];
+	__le16			mssrl;
+	__le32			mcl;
+	__u8			msrc;
+	__u8			rsvd81[11];
 	__le32			anagrpid;
 	__u8			rsvd96[3];
 	__u8			nsattr;
@@ -967,6 +971,7 @@ enum nvme_opcode {
 	nvme_cmd_resv_acquire	= 0x11,
 	nvme_cmd_io_mgmt_recv	= 0x12,
 	nvme_cmd_resv_release	= 0x15,
+	nvme_cmd_copy		= 0x19,
 	nvme_cmd_zone_mgmt_send	= 0x79,
 	nvme_cmd_zone_mgmt_recv	= 0x7a,
 	nvme_cmd_zone_append	= 0x7d,
@@ -991,7 +996,8 @@ enum nvme_opcode {
 		nvme_opcode_name(nvme_cmd_resv_release),	\
 		nvme_opcode_name(nvme_cmd_zone_mgmt_send),	\
 		nvme_opcode_name(nvme_cmd_zone_mgmt_recv),	\
-		nvme_opcode_name(nvme_cmd_zone_append))
+		nvme_opcode_name(nvme_cmd_zone_append),		\
+		nvme_opcode_name(nvme_cmd_copy))
 
 
 
@@ -1169,6 +1175,39 @@ struct nvme_dsm_range {
 	__le64			slba;
 };
 
+struct nvme_copy_command {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2;
+	__le64			metadata;
+	union nvme_data_ptr	dptr;
+	__le64			sdlba;
+	__u8			nr_range;
+	__u8			desfmt_prinfor;
+	__le16			control;
+	__le16			rsvd13;
+	__le16			dspec;
+	__le32			ilbrt;
+	__le16			lbat;
+	__le16			lbatm;
+};
+
+struct nvme_copy_range {
+	__le32			nsid;	/* DESFMT=2 only */
+	__le32			rsvd1;
+	__le64			slba;
+	__le16			nlb;
+	__le16			rsvd18;
+	__le32			rsvd20;
+	__le32			eilbrt;
+	__le16			elbat;
+	__le16			elbatm;
+};
+
+static_assert(sizeof(struct nvme_copy_range) == 32);
+
 struct nvme_write_zeroes_cmd {
 	__u8			opcode;
 	__u8			flags;
@@ -2001,6 +2040,7 @@ struct nvme_command {
 		struct nvme_download_firmware dlfw;
 		struct nvme_format_cmd format;
 		struct nvme_dsm_cmd dsm;
+		struct nvme_copy_command copy;
 		struct nvme_write_zeroes_cmd write_zeroes;
 		struct nvme_zone_mgmt_send_cmd zms;
 		struct nvme_zone_mgmt_recv_cmd zmr;



More information about the Linux-nvme mailing list