[PATCH 3/5] nvme: add support for copy offload

Caleb Sander Mateos csander at purestorage.com
Wed May 21 17:47:36 PDT 2025


On Wed, May 21, 2025 at 3:31 PM Keith Busch <kbusch at meta.com> wrote:
>
> From: Keith Busch <kbusch at kernel.org>
>
> Register the nvme namespace copy capablities with the request_queue

nit: "capabilities"

> limits and implement support for the REQ_OP_COPY operation.
>
> Signed-off-by: Keith Busch <kbusch at kernel.org>
> ---
>  drivers/nvme/host/core.c | 61 ++++++++++++++++++++++++++++++++++++++++
>  include/linux/nvme.h     | 42 ++++++++++++++++++++++++++-
>  2 files changed, 102 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index f69a232a000ac..3134fe85b1abc 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -888,6 +888,52 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
>         return BLK_STS_OK;
>  }
>
> +static inline blk_status_t nvme_setup_copy(struct nvme_ns *ns,
> +               struct request *req, struct nvme_command *cmnd)
> +{
> +       struct nvme_copy_range *range;
> +       struct req_iterator iter;
> +       struct bio_vec bvec;
> +       u16 control = 0;
> +       int i = 0;

Make this unsigned to avoid sign extension when used as an index?

> +
> +       static const size_t alloc_size = sizeof(*range) * NVME_COPY_MAX_RANGES;
> +
> +       if (WARN_ON_ONCE(blk_rq_nr_phys_segments(req) >= NVME_COPY_MAX_RANGES))

Should be > instead of >=?

> +               return BLK_STS_IOERR;
> +
> +       range = kzalloc(alloc_size, GFP_ATOMIC | __GFP_NOWARN);
> +       if (!range)
> +               return BLK_STS_RESOURCE;
> +
> +       if (req->cmd_flags & REQ_FUA)
> +               control |= NVME_RW_FUA;
> +       if (req->cmd_flags & REQ_FAILFAST_DEV)
> +               control |= NVME_RW_LR;
> +
> +       rq_for_each_copy_bvec(bvec, req, iter) {
> +               u64 slba = nvme_sect_to_lba(ns->head, bvec.bv_sector);
> +               u64 nlb = nvme_sect_to_lba(ns->head, bvec.bv_sectors) - 1;
> +
> +               range[i].slba = cpu_to_le64(slba);
> +               range[i].nlb = cpu_to_le16(nlb);
> +               i++;
> +       }
> +
> +       memset(cmnd, 0, sizeof(*cmnd));
> +       cmnd->copy.opcode = nvme_cmd_copy;
> +       cmnd->copy.nsid = cpu_to_le32(ns->head->ns_id);
> +       cmnd->copy.nr_range = i - 1;
> +       cmnd->copy.sdlba = cpu_to_le64(nvme_sect_to_lba(ns->head,
> +                                               blk_rq_pos(req)));
> +       cmnd->copy.control = cpu_to_le16(control);
> +
> +       bvec_set_virt(&req->special_vec, range, alloc_size);

alloc_size should be sizeof(*range) * i? Otherwise this exceeds the
amount of data used by the Copy command, which not all controllers
support (see bit LLDTS of SGLS in the Identify Controller data
structure). We have seen the same behavior with Dataset Management
(always specifying 4 KB of data), which also passes the maximum size
of the allocation to bvec_set_virt().

> +       req->rq_flags |= RQF_SPECIAL_PAYLOAD;
> +
> +       return BLK_STS_OK;
> +}
> +
>  static void nvme_set_app_tag(struct request *req, struct nvme_command *cmnd)
>  {
>         cmnd->rw.lbat = cpu_to_le16(bio_integrity(req->bio)->app_tag);
> @@ -1106,6 +1152,9 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req)
>         case REQ_OP_DISCARD:
>                 ret = nvme_setup_discard(ns, req, cmd);
>                 break;
> +       case REQ_OP_COPY:
> +               ret = nvme_setup_copy(ns, req, cmd);
> +               break;
>         case REQ_OP_READ:
>                 ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_read);
>                 break;
> @@ -2119,6 +2168,15 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
>                 lim->max_write_zeroes_sectors = UINT_MAX;
>         else
>                 lim->max_write_zeroes_sectors = ns->ctrl->max_zeroes_sectors;
> +
> +       if (ns->ctrl->oncs & NVME_CTRL_ONCS_NVMCPYS && id->mssrl && id->mcl) {

Are the checks of MSSRL and MCL necessary? The spec says controllers
that support Copy are not allowed to set them to 0.

Best,
Caleb

> +               u32 mcss = bs * le16_to_cpu(id->mssrl) >> SECTOR_SHIFT;
> +               u32 mcs = bs * le32_to_cpu(id->mcl) >> SECTOR_SHIFT;
> +
> +               lim->max_copy_segment_sectors = mcss;
> +               lim->max_copy_sectors = mcs;
> +               lim->max_copy_segments = id->msrc + 1;
> +       }
>         return valid;
>  }
>
> @@ -2526,6 +2584,9 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
>                         nvme_init_integrity(ns->head, &lim, info);
>                 lim.max_write_streams = ns_lim->max_write_streams;
>                 lim.write_stream_granularity = ns_lim->write_stream_granularity;
> +               lim.max_copy_segment_sectors = ns_lim->max_copy_segment_sectors;
> +               lim.max_copy_sectors = ns_lim->max_copy_sectors;
> +               lim.max_copy_segments = ns_lim->max_copy_segments;
>                 ret = queue_limits_commit_update(ns->head->disk->queue, &lim);
>
>                 set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk));
> diff --git a/include/linux/nvme.h b/include/linux/nvme.h
> index 51308f65b72fd..14f46ad1330b6 100644
> --- a/include/linux/nvme.h
> +++ b/include/linux/nvme.h
> @@ -404,6 +404,7 @@ enum {
>         NVME_CTRL_ONCS_WRITE_ZEROES             = 1 << 3,
>         NVME_CTRL_ONCS_RESERVATIONS             = 1 << 5,
>         NVME_CTRL_ONCS_TIMESTAMP                = 1 << 6,
> +       NVME_CTRL_ONCS_NVMCPYS                  = 1 << 8,
>         NVME_CTRL_VWC_PRESENT                   = 1 << 0,
>         NVME_CTRL_OACS_SEC_SUPP                 = 1 << 0,
>         NVME_CTRL_OACS_NS_MNGT_SUPP             = 1 << 3,
> @@ -458,7 +459,10 @@ struct nvme_id_ns {
>         __le16                  npdg;
>         __le16                  npda;
>         __le16                  nows;
> -       __u8                    rsvd74[18];
> +       __le16                  mssrl;
> +       __le32                  mcl;
> +       __u8                    msrc;
> +       __u8                    rsvd81[11];
>         __le32                  anagrpid;
>         __u8                    rsvd96[3];
>         __u8                    nsattr;
> @@ -956,6 +960,7 @@ enum nvme_opcode {
>         nvme_cmd_resv_acquire   = 0x11,
>         nvme_cmd_io_mgmt_recv   = 0x12,
>         nvme_cmd_resv_release   = 0x15,
> +       nvme_cmd_copy           = 0x19,
>         nvme_cmd_zone_mgmt_send = 0x79,
>         nvme_cmd_zone_mgmt_recv = 0x7a,
>         nvme_cmd_zone_append    = 0x7d,
> @@ -978,6 +983,7 @@ enum nvme_opcode {
>                 nvme_opcode_name(nvme_cmd_resv_acquire),        \
>                 nvme_opcode_name(nvme_cmd_io_mgmt_recv),        \
>                 nvme_opcode_name(nvme_cmd_resv_release),        \
> +               nvme_opcode_name(nvme_cmd_copy),                \
>                 nvme_opcode_name(nvme_cmd_zone_mgmt_send),      \
>                 nvme_opcode_name(nvme_cmd_zone_mgmt_recv),      \
>                 nvme_opcode_name(nvme_cmd_zone_append))
> @@ -1158,6 +1164,39 @@ struct nvme_dsm_range {
>         __le64                  slba;
>  };
>
> +struct nvme_copy_cmd {
> +       __u8                    opcode;
> +       __u8                    flags;
> +       __u16                   command_id;
> +       __le32                  nsid;
> +       __u64                   rsvd2;
> +       __le64                  metadata;
> +       union nvme_data_ptr     dptr;
> +       __le64                  sdlba;
> +       __u8                    nr_range;
> +       __u8                    format;
> +       __le16                  control;
> +       __le16                  cev;
> +       __le16                  dspec;
> +       __le32                  lbtl;
> +       __le16                  lbat;
> +       __le16                  lbatm;
> +};
> +
> +#define NVME_COPY_MAX_RANGES   128
> +struct nvme_copy_range {
> +       __le32                  spars;
> +       __u32                   rsvd4;
> +       __le64                  slba;
> +       __le16                  nlb;
> +       __le16                  cetype;
> +       __le16                  cev;
> +       __le16                  sopt;
> +       __le32                  elbt;
> +       __le16                  elbat;
> +       __le16                  elbatm;
> +};
> +
>  struct nvme_write_zeroes_cmd {
>         __u8                    opcode;
>         __u8                    flags;
> @@ -1985,6 +2024,7 @@ struct nvme_command {
>                 struct nvme_download_firmware dlfw;
>                 struct nvme_format_cmd format;
>                 struct nvme_dsm_cmd dsm;
> +               struct nvme_copy_cmd copy;
>                 struct nvme_write_zeroes_cmd write_zeroes;
>                 struct nvme_zone_mgmt_send_cmd zms;
>                 struct nvme_zone_mgmt_recv_cmd zmr;
> --
> 2.47.1
>
>



More information about the Linux-nvme mailing list