[PATCH 4/5] block: add support for vectored copies
Hannes Reinecke
hare at suse.de
Thu May 22 06:58:18 PDT 2025
On 5/22/25 00:31, Keith Busch wrote:
> From: Keith Busch <kbusch at kernel.org>
>
> Copy offload can be used to defrad or garbage collect data spread across
Defrag?
> the disk. Most storage protocols provide a way to specifiy multiple
> sources in a single copy commnd, so introduce kernel and user space
> interfaces to accomplish that.
>
> Signed-off-by: Keith Busch <kbusch at kernel.org>
> ---
> block/blk-lib.c | 50 ++++++++++++++++++++++++----------
> block/ioctl.c | 59 +++++++++++++++++++++++++++++++++++++++++
> include/linux/blkdev.h | 2 ++
> include/uapi/linux/fs.h | 14 ++++++++++
> 4 files changed, 111 insertions(+), 14 deletions(-)
>
Any specific reason why this is a different patch, and not folded into
patch 2? It really feels odd to continuously updating interfaces which
have been added with the same patchset...
> diff --git a/block/blk-lib.c b/block/blk-lib.c
> index a538acbaa2cd7..7513b876a5399 100644
> --- a/block/blk-lib.c
> +++ b/block/blk-lib.c
> @@ -424,26 +424,46 @@ static int __blkdev_copy(struct block_device *bdev, sector_t dst_sector,
> }
>
> static int blkdev_copy_offload(struct block_device *bdev, sector_t dst_sector,
> - sector_t src_sector, sector_t nr_sects, gfp_t gfp)
> + struct bio_vec *bv, int nr_vecs, gfp_t gfp)
> {
> + unsigned size = 0;
> struct bio *bio;
> - int ret;
> -
> - struct bio_vec bv = {
> - .bv_sector = src_sector,
> - .bv_sectors = nr_sects,
> - };
> + int ret, i;
>
> - bio = bio_alloc(bdev, 1, REQ_OP_COPY, gfp);
> - bio_add_copy_src(bio, &bv);
> + bio = bio_alloc(bdev, nr_vecs, REQ_OP_COPY, gfp);
> + for (i = 0; i < nr_vecs; i++) {
> + size += bv[i].bv_sectors << SECTOR_SHIFT;
> + bio_add_copy_src(bio, &bv[i]);
> + }
> bio->bi_iter.bi_sector = dst_sector;
> - bio->bi_iter.bi_size = nr_sects << SECTOR_SHIFT;
> + bio->bi_iter.bi_size = size;
>
> ret = submit_bio_wait(bio);
> bio_put(bio);
> return ret;
> +}
> +
> +/**
> + * blkdev_copy_range - copy range of sectors to a destination
> + * @dst_sector: start sector of the destination to copy to
> + * @bv: vector of source sectors
> + * @nr_vecs: number of source sector vectors
> + * @gfp: allocation flags to use
> + */
> +int blkdev_copy_range(struct block_device *bdev, sector_t dst_sector,
> + struct bio_vec *bv, int nr_vecs, gfp_t gfp)
> +{
> + int ret, i;
>
> + if (bdev_copy_sectors(bdev))
> + return blkdev_copy_offload(bdev, dst_sector, bv, nr_vecs, gfp);
> +
> + for (i = 0, ret = 0; i < nr_vecs && !ret; i++)
> + ret = __blkdev_copy(bdev, dst_sector, bv[i].bv_sector,
> + bv[i].bv_sectors, gfp);
> + return ret;
> }
> +EXPORT_SYMBOL_GPL(blkdev_copy_range);
>
> /**
> * blkdev_copy - copy source sectors to a destination on the same block device
> @@ -455,9 +475,11 @@ static int blkdev_copy_offload(struct block_device *bdev, sector_t dst_sector,
> int blkdev_copy(struct block_device *bdev, sector_t dst_sector,
> sector_t src_sector, sector_t nr_sects, gfp_t gfp)
> {
> - if (bdev_copy_sectors(bdev))
> - return blkdev_copy_offload(bdev, dst_sector, src_sector,
> - nr_sects, gfp);
> - return __blkdev_copy(bdev, dst_sector, src_sector, nr_sects, gfp);
> + struct bio_vec bv = {
> + .bv_sector = src_sector,
> + .bv_sectors = nr_sects,
> + };
> +
> + return blkdev_copy_range(bdev, dst_sector, &bv, 1, gfp);
> }
> EXPORT_SYMBOL_GPL(blkdev_copy);
> diff --git a/block/ioctl.c b/block/ioctl.c
> index 6f03c65867348..4b5095be19e1a 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -241,6 +241,63 @@ static int blk_ioctl_copy(struct block_device *bdev, blk_mode_t mode,
> return blkdev_copy(bdev, dst, src, nr, GFP_KERNEL);
> }
>
> +static int blk_ioctl_copy_vec(struct block_device *bdev, blk_mode_t mode,
> + void __user *argp)
> +{
> + sector_t align = bdev_logical_block_size(bdev) >> SECTOR_SHIFT;
> + struct bio_vec *bv, fast_bv[UIO_FASTIOV];
> + struct copy_range cr;
> + int i, nr, ret;
> + __u64 dst;
> +
> + if (!(mode & BLK_OPEN_WRITE))
> + return -EBADF;
> + if (copy_from_user(&cr, argp, sizeof(cr)))
> + return -EFAULT;
> + if (!(IS_ALIGNED(cr.dst_sector, align)))
> + return -EINVAL;
> +
> + nr = cr.nr_ranges;
> + if (nr <= UIO_FASTIOV) {
> + bv = fast_bv;
> + } else {
> + bv = kmalloc_array(nr, sizeof(*bv), GFP_KERNEL);
> + if (!bv)
> + return -ENOMEM;
> + }
> +
> + dst = cr.dst_sector;
> + for (i = 0; i < nr; i++) {
> + struct copy_source csrc;
> + __u64 nr_sects, src;
> +
> + if (copy_from_user(&csrc,
> + (void __user *)(cr.sources + i * sizeof(csrc)),
> + sizeof(csrc))) {
> + ret = -EFAULT;
> + goto out;
> + }
> +
> + nr_sects = csrc.nr_sectors;
> + src = csrc.src_sector;
> + if (!(IS_ALIGNED(src | nr_sects, align)) ||
> + (src < dst && src + nr_sects > dst) ||
> + (dst < src && dst + nr_sects > src)) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + bv[i].bv_sectors = nr_sects;
> + bv[i].bv_sector = src;
> + }
> +
> + ret = blkdev_copy_range(bdev, dst, bv, nr, GFP_KERNEL);
> +out:
> + if (bv != fast_bv)
> + kfree(bv);
> + return ret;
> +}
> +
> static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
> unsigned long arg)
> {
> @@ -605,6 +662,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode,
> return blk_ioctl_secure_erase(bdev, mode, argp);
> case BLKCPY:
> return blk_ioctl_copy(bdev, mode, argp);
> + case BLKCPY_VEC:
> + return blk_ioctl_copy_vec(bdev, mode, argp);
> case BLKZEROOUT:
> return blk_ioctl_zeroout(bdev, mode, arg);
> case BLKGETDISKSEQ:
And that makes it even worse; introducing two ioctls which basically do
the same thing (or where one is actually a special case of the other)
is probably not what we should be doing.
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare at suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
More information about the Linux-nvme
mailing list