[PATCH 3/5] nvme: add support for copy offload

Keith Busch kbusch at meta.com
Wed May 21 15:31:05 PDT 2025


From: Keith Busch <kbusch at kernel.org>

Register the nvme namespace copy capablities with the request_queue
limits and implement support for the REQ_OP_COPY operation.

Signed-off-by: Keith Busch <kbusch at kernel.org>
---
 drivers/nvme/host/core.c | 61 ++++++++++++++++++++++++++++++++++++++++
 include/linux/nvme.h     | 42 ++++++++++++++++++++++++++-
 2 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f69a232a000ac..3134fe85b1abc 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -888,6 +888,52 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 	return BLK_STS_OK;
 }
 
+static inline blk_status_t nvme_setup_copy(struct nvme_ns *ns,
+		struct request *req, struct nvme_command *cmnd)
+{
+	struct nvme_copy_range *range;
+	struct req_iterator iter;
+	struct bio_vec bvec;
+	u16 control = 0;
+	int i = 0;
+
+	static const size_t alloc_size = sizeof(*range) * NVME_COPY_MAX_RANGES;
+
+	if (WARN_ON_ONCE(blk_rq_nr_phys_segments(req) >= NVME_COPY_MAX_RANGES))
+		return BLK_STS_IOERR;
+
+	range = kzalloc(alloc_size, GFP_ATOMIC | __GFP_NOWARN);
+	if (!range)
+		return BLK_STS_RESOURCE;
+
+	if (req->cmd_flags & REQ_FUA)
+	        control |= NVME_RW_FUA;
+	if (req->cmd_flags & REQ_FAILFAST_DEV)
+	        control |= NVME_RW_LR;
+
+	rq_for_each_copy_bvec(bvec, req, iter) {
+		u64 slba = nvme_sect_to_lba(ns->head, bvec.bv_sector);
+		u64 nlb = nvme_sect_to_lba(ns->head, bvec.bv_sectors) - 1;
+
+		range[i].slba = cpu_to_le64(slba);
+		range[i].nlb = cpu_to_le16(nlb);
+	        i++;
+	}
+
+	memset(cmnd, 0, sizeof(*cmnd));
+	cmnd->copy.opcode = nvme_cmd_copy;
+	cmnd->copy.nsid = cpu_to_le32(ns->head->ns_id);
+	cmnd->copy.nr_range = i - 1;
+	cmnd->copy.sdlba = cpu_to_le64(nvme_sect_to_lba(ns->head,
+						blk_rq_pos(req)));
+	cmnd->copy.control = cpu_to_le16(control);
+
+	bvec_set_virt(&req->special_vec, range, alloc_size);
+	req->rq_flags |= RQF_SPECIAL_PAYLOAD;
+
+	return BLK_STS_OK;
+}
+
 static void nvme_set_app_tag(struct request *req, struct nvme_command *cmnd)
 {
 	cmnd->rw.lbat = cpu_to_le16(bio_integrity(req->bio)->app_tag);
@@ -1106,6 +1152,9 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
 	case REQ_OP_DISCARD:
 		ret = nvme_setup_discard(ns, req, cmd);
 		break;
+	case REQ_OP_COPY:
+		ret = nvme_setup_copy(ns, req, cmd);
+		break;
 	case REQ_OP_READ:
 		ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_read);
 		break;
@@ -2119,6 +2168,15 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
 		lim->max_write_zeroes_sectors = UINT_MAX;
 	else
 		lim->max_write_zeroes_sectors = ns->ctrl->max_zeroes_sectors;
+
+	if (ns->ctrl->oncs & NVME_CTRL_ONCS_NVMCPYS && id->mssrl && id->mcl) {
+		u32 mcss = bs * le16_to_cpu(id->mssrl) >> SECTOR_SHIFT;
+		u32 mcs = bs * le32_to_cpu(id->mcl) >> SECTOR_SHIFT;
+
+		lim->max_copy_segment_sectors = mcss;
+		lim->max_copy_sectors = mcs;
+		lim->max_copy_segments = id->msrc + 1;
+	}
 	return valid;
 }
 
@@ -2526,6 +2584,9 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
 			nvme_init_integrity(ns->head, &lim, info);
 		lim.max_write_streams = ns_lim->max_write_streams;
 		lim.write_stream_granularity = ns_lim->write_stream_granularity;
+		lim.max_copy_segment_sectors = ns_lim->max_copy_segment_sectors;
+		lim.max_copy_sectors = ns_lim->max_copy_sectors;
+		lim.max_copy_segments = ns_lim->max_copy_segments;
 		ret = queue_limits_commit_update(ns->head->disk->queue, &lim);
 
 		set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk));
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 51308f65b72fd..14f46ad1330b6 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -404,6 +404,7 @@ enum {
 	NVME_CTRL_ONCS_WRITE_ZEROES		= 1 << 3,
 	NVME_CTRL_ONCS_RESERVATIONS		= 1 << 5,
 	NVME_CTRL_ONCS_TIMESTAMP		= 1 << 6,
+	NVME_CTRL_ONCS_NVMCPYS                  = 1 << 8,
 	NVME_CTRL_VWC_PRESENT			= 1 << 0,
 	NVME_CTRL_OACS_SEC_SUPP                 = 1 << 0,
 	NVME_CTRL_OACS_NS_MNGT_SUPP		= 1 << 3,
@@ -458,7 +459,10 @@ struct nvme_id_ns {
 	__le16			npdg;
 	__le16			npda;
 	__le16			nows;
-	__u8			rsvd74[18];
+	__le16			mssrl;
+	__le32			mcl;
+	__u8			msrc;
+	__u8			rsvd81[11];
 	__le32			anagrpid;
 	__u8			rsvd96[3];
 	__u8			nsattr;
@@ -956,6 +960,7 @@ enum nvme_opcode {
 	nvme_cmd_resv_acquire	= 0x11,
 	nvme_cmd_io_mgmt_recv	= 0x12,
 	nvme_cmd_resv_release	= 0x15,
+	nvme_cmd_copy		= 0x19,
 	nvme_cmd_zone_mgmt_send	= 0x79,
 	nvme_cmd_zone_mgmt_recv	= 0x7a,
 	nvme_cmd_zone_append	= 0x7d,
@@ -978,6 +983,7 @@ enum nvme_opcode {
 		nvme_opcode_name(nvme_cmd_resv_acquire),	\
 		nvme_opcode_name(nvme_cmd_io_mgmt_recv),	\
 		nvme_opcode_name(nvme_cmd_resv_release),	\
+		nvme_opcode_name(nvme_cmd_copy),		\
 		nvme_opcode_name(nvme_cmd_zone_mgmt_send),	\
 		nvme_opcode_name(nvme_cmd_zone_mgmt_recv),	\
 		nvme_opcode_name(nvme_cmd_zone_append))
@@ -1158,6 +1164,39 @@ struct nvme_dsm_range {
 	__le64			slba;
 };
 
+struct nvme_copy_cmd {
+	__u8			opcode;
+	__u8			flags;
+	__u16			command_id;
+	__le32			nsid;
+	__u64			rsvd2;
+	__le64			metadata;
+	union nvme_data_ptr	dptr;
+	__le64			sdlba;
+	__u8			nr_range;
+	__u8			format;
+	__le16			control;
+	__le16			cev;
+	__le16			dspec;
+	__le32			lbtl;
+	__le16			lbat;
+	__le16			lbatm;
+};
+
+#define NVME_COPY_MAX_RANGES   128
+struct nvme_copy_range {
+	__le32			spars;
+	__u32			rsvd4;
+	__le64			slba;
+	__le16			nlb;
+	__le16			cetype;
+	__le16			cev;
+	__le16			sopt;
+	__le32			elbt;
+	__le16			elbat;
+	__le16			elbatm;
+};
+
 struct nvme_write_zeroes_cmd {
 	__u8			opcode;
 	__u8			flags;
@@ -1985,6 +2024,7 @@ struct nvme_command {
 		struct nvme_download_firmware dlfw;
 		struct nvme_format_cmd format;
 		struct nvme_dsm_cmd dsm;
+		struct nvme_copy_cmd copy;
 		struct nvme_write_zeroes_cmd write_zeroes;
 		struct nvme_zone_mgmt_send_cmd zms;
 		struct nvme_zone_mgmt_recv_cmd zmr;
-- 
2.47.1




More information about the Linux-nvme mailing list