[PATCH 3/5] nvme: add support for copy offload
Caleb Sander Mateos
csander at purestorage.com
Wed May 21 17:47:36 PDT 2025
On Wed, May 21, 2025 at 3:31 PM Keith Busch <kbusch at meta.com> wrote:
>
> From: Keith Busch <kbusch at kernel.org>
>
> Register the nvme namespace copy capablities with the request_queue
nit: "capabilities"
> limits and implement support for the REQ_OP_COPY operation.
>
> Signed-off-by: Keith Busch <kbusch at kernel.org>
> ---
> drivers/nvme/host/core.c | 61 ++++++++++++++++++++++++++++++++++++++++
> include/linux/nvme.h | 42 ++++++++++++++++++++++++++-
> 2 files changed, 102 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index f69a232a000ac..3134fe85b1abc 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -888,6 +888,52 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
> return BLK_STS_OK;
> }
>
> +static inline blk_status_t nvme_setup_copy(struct nvme_ns *ns,
> + struct request *req, struct nvme_command *cmnd)
> +{
> + struct nvme_copy_range *range;
> + struct req_iterator iter;
> + struct bio_vec bvec;
> + u16 control = 0;
> + int i = 0;
Make this unsigned to avoid sign extension when used as an index?
> +
> + static const size_t alloc_size = sizeof(*range) * NVME_COPY_MAX_RANGES;
> +
> + if (WARN_ON_ONCE(blk_rq_nr_phys_segments(req) >= NVME_COPY_MAX_RANGES))
Should be > instead of >=?
> + return BLK_STS_IOERR;
> +
> + range = kzalloc(alloc_size, GFP_ATOMIC | __GFP_NOWARN);
> + if (!range)
> + return BLK_STS_RESOURCE;
> +
> + if (req->cmd_flags & REQ_FUA)
> + control |= NVME_RW_FUA;
> + if (req->cmd_flags & REQ_FAILFAST_DEV)
> + control |= NVME_RW_LR;
> +
> + rq_for_each_copy_bvec(bvec, req, iter) {
> + u64 slba = nvme_sect_to_lba(ns->head, bvec.bv_sector);
> + u64 nlb = nvme_sect_to_lba(ns->head, bvec.bv_sectors) - 1;
> +
> + range[i].slba = cpu_to_le64(slba);
> + range[i].nlb = cpu_to_le16(nlb);
> + i++;
> + }
> +
> + memset(cmnd, 0, sizeof(*cmnd));
> + cmnd->copy.opcode = nvme_cmd_copy;
> + cmnd->copy.nsid = cpu_to_le32(ns->head->ns_id);
> + cmnd->copy.nr_range = i - 1;
> + cmnd->copy.sdlba = cpu_to_le64(nvme_sect_to_lba(ns->head,
> + blk_rq_pos(req)));
> + cmnd->copy.control = cpu_to_le16(control);
> +
> + bvec_set_virt(&req->special_vec, range, alloc_size);
alloc_size should be sizeof(*range) * i? Otherwise this exceeds the
amount of data used by the Copy command, which not all controllers
support (see bit LLDTS of SGLS in the Identify Controller data
structure). We have seen the same behavior with Dataset Management
(always specifying 4 KB of data), which also passes the maximum size
of the allocation to bvec_set_virt().
> + req->rq_flags |= RQF_SPECIAL_PAYLOAD;
> +
> + return BLK_STS_OK;
> +}
> +
> static void nvme_set_app_tag(struct request *req, struct nvme_command *cmnd)
> {
> cmnd->rw.lbat = cpu_to_le16(bio_integrity(req->bio)->app_tag);
> @@ -1106,6 +1152,9 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
> case REQ_OP_DISCARD:
> ret = nvme_setup_discard(ns, req, cmd);
> break;
> + case REQ_OP_COPY:
> + ret = nvme_setup_copy(ns, req, cmd);
> + break;
> case REQ_OP_READ:
> ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_read);
> break;
> @@ -2119,6 +2168,15 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
> lim->max_write_zeroes_sectors = UINT_MAX;
> else
> lim->max_write_zeroes_sectors = ns->ctrl->max_zeroes_sectors;
> +
> + if (ns->ctrl->oncs & NVME_CTRL_ONCS_NVMCPYS && id->mssrl && id->mcl) {
Are the checks of MSSRL and MCL necessary? The spec says controllers
that support Copy are not allowed to set them to 0.
Best,
Caleb
> + u32 mcss = bs * le16_to_cpu(id->mssrl) >> SECTOR_SHIFT;
> + u32 mcs = bs * le32_to_cpu(id->mcl) >> SECTOR_SHIFT;
> +
> + lim->max_copy_segment_sectors = mcss;
> + lim->max_copy_sectors = mcs;
> + lim->max_copy_segments = id->msrc + 1;
> + }
> return valid;
> }
>
> @@ -2526,6 +2584,9 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
> nvme_init_integrity(ns->head, &lim, info);
> lim.max_write_streams = ns_lim->max_write_streams;
> lim.write_stream_granularity = ns_lim->write_stream_granularity;
> + lim.max_copy_segment_sectors = ns_lim->max_copy_segment_sectors;
> + lim.max_copy_sectors = ns_lim->max_copy_sectors;
> + lim.max_copy_segments = ns_lim->max_copy_segments;
> ret = queue_limits_commit_update(ns->head->disk->queue, &lim);
>
> set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk));
> diff --git a/include/linux/nvme.h b/include/linux/nvme.h
> index 51308f65b72fd..14f46ad1330b6 100644
> --- a/include/linux/nvme.h
> +++ b/include/linux/nvme.h
> @@ -404,6 +404,7 @@ enum {
> NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3,
> NVME_CTRL_ONCS_RESERVATIONS = 1 << 5,
> NVME_CTRL_ONCS_TIMESTAMP = 1 << 6,
> + NVME_CTRL_ONCS_NVMCPYS = 1 << 8,
> NVME_CTRL_VWC_PRESENT = 1 << 0,
> NVME_CTRL_OACS_SEC_SUPP = 1 << 0,
> NVME_CTRL_OACS_NS_MNGT_SUPP = 1 << 3,
> @@ -458,7 +459,10 @@ struct nvme_id_ns {
> __le16 npdg;
> __le16 npda;
> __le16 nows;
> - __u8 rsvd74[18];
> + __le16 mssrl;
> + __le32 mcl;
> + __u8 msrc;
> + __u8 rsvd81[11];
> __le32 anagrpid;
> __u8 rsvd96[3];
> __u8 nsattr;
> @@ -956,6 +960,7 @@ enum nvme_opcode {
> nvme_cmd_resv_acquire = 0x11,
> nvme_cmd_io_mgmt_recv = 0x12,
> nvme_cmd_resv_release = 0x15,
> + nvme_cmd_copy = 0x19,
> nvme_cmd_zone_mgmt_send = 0x79,
> nvme_cmd_zone_mgmt_recv = 0x7a,
> nvme_cmd_zone_append = 0x7d,
> @@ -978,6 +983,7 @@ enum nvme_opcode {
> nvme_opcode_name(nvme_cmd_resv_acquire), \
> nvme_opcode_name(nvme_cmd_io_mgmt_recv), \
> nvme_opcode_name(nvme_cmd_resv_release), \
> + nvme_opcode_name(nvme_cmd_copy), \
> nvme_opcode_name(nvme_cmd_zone_mgmt_send), \
> nvme_opcode_name(nvme_cmd_zone_mgmt_recv), \
> nvme_opcode_name(nvme_cmd_zone_append))
> @@ -1158,6 +1164,39 @@ struct nvme_dsm_range {
> __le64 slba;
> };
>
> +struct nvme_copy_cmd {
> + __u8 opcode;
> + __u8 flags;
> + __u16 command_id;
> + __le32 nsid;
> + __u64 rsvd2;
> + __le64 metadata;
> + union nvme_data_ptr dptr;
> + __le64 sdlba;
> + __u8 nr_range;
> + __u8 format;
> + __le16 control;
> + __le16 cev;
> + __le16 dspec;
> + __le32 lbtl;
> + __le16 lbat;
> + __le16 lbatm;
> +};
> +
> +#define NVME_COPY_MAX_RANGES 128
> +struct nvme_copy_range {
> + __le32 spars;
> + __u32 rsvd4;
> + __le64 slba;
> + __le16 nlb;
> + __le16 cetype;
> + __le16 cev;
> + __le16 sopt;
> + __le32 elbt;
> + __le16 elbat;
> + __le16 elbatm;
> +};
> +
> struct nvme_write_zeroes_cmd {
> __u8 opcode;
> __u8 flags;
> @@ -1985,6 +2024,7 @@ struct nvme_command {
> struct nvme_download_firmware dlfw;
> struct nvme_format_cmd format;
> struct nvme_dsm_cmd dsm;
> + struct nvme_copy_cmd copy;
> struct nvme_write_zeroes_cmd write_zeroes;
> struct nvme_zone_mgmt_send_cmd zms;
> struct nvme_zone_mgmt_recv_cmd zmr;
> --
> 2.47.1
>
>
More information about the Linux-nvme
mailing list