[PATCH 4/5] block: add support for vectored copies

Keith Busch kbusch at meta.com
Wed May 21 15:31:06 PDT 2025


From: Keith Busch <kbusch at kernel.org>

Copy offload can be used to defrad or garbage collect data spread across
the disk. Most storage protocols provide a way to specifiy multiple
sources in a single copy commnd, so introduce kernel and user space
interfaces to accomplish that.

Signed-off-by: Keith Busch <kbusch at kernel.org>
---
 block/blk-lib.c         | 50 ++++++++++++++++++++++++----------
 block/ioctl.c           | 59 +++++++++++++++++++++++++++++++++++++++++
 include/linux/blkdev.h  |  2 ++
 include/uapi/linux/fs.h | 14 ++++++++++
 4 files changed, 111 insertions(+), 14 deletions(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index a538acbaa2cd7..7513b876a5399 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -424,26 +424,46 @@ static int __blkdev_copy(struct block_device *bdev, sector_t dst_sector,
 }
 
 static int blkdev_copy_offload(struct block_device *bdev, sector_t dst_sector,
-		sector_t src_sector, sector_t nr_sects, gfp_t gfp)
+		struct bio_vec *bv, int nr_vecs, gfp_t gfp)
 {
+	unsigned size = 0;
 	struct bio *bio;
-	int ret;
-
-	struct bio_vec bv = {
-		.bv_sector = src_sector,
-		.bv_sectors = nr_sects,
-	};
+	int ret, i;
 
-	bio = bio_alloc(bdev, 1, REQ_OP_COPY, gfp);
-	bio_add_copy_src(bio, &bv);
+	bio = bio_alloc(bdev, nr_vecs, REQ_OP_COPY, gfp);
+	for (i = 0; i < nr_vecs; i++) {
+		size += bv[i].bv_sectors << SECTOR_SHIFT;
+		bio_add_copy_src(bio, &bv[i]);
+	}
 	bio->bi_iter.bi_sector = dst_sector;
-	bio->bi_iter.bi_size = nr_sects << SECTOR_SHIFT;
+	bio->bi_iter.bi_size = size;
 
 	ret = submit_bio_wait(bio);
 	bio_put(bio);
 	return ret;
+}
+
+/**
+ * blkdev_copy_range - copy range of sectors to a destination
+ * @dst_sector:	start sector of the destination to copy to
+ * @bv:		vector of source sectors
+ * @nr_vecs:	number of source sector vectors
+ * @gfp:	allocation flags to use
+ */
+int blkdev_copy_range(struct block_device *bdev, sector_t dst_sector,
+		struct bio_vec *bv, int nr_vecs, gfp_t gfp)
+{
+	int ret, i;
 
+	if (bdev_copy_sectors(bdev))
+		return blkdev_copy_offload(bdev, dst_sector, bv, nr_vecs, gfp);
+
+	for (i = 0, ret = 0; i < nr_vecs && !ret; i++)
+		ret = __blkdev_copy(bdev, dst_sector, bv[i].bv_sector,
+				bv[i].bv_sectors, gfp);
+	return ret;
 }
+EXPORT_SYMBOL_GPL(blkdev_copy_range);
 
 /**
  * blkdev_copy - copy source sectors to a destination on the same block device
@@ -455,9 +475,11 @@ static int blkdev_copy_offload(struct block_device *bdev, sector_t dst_sector,
 int blkdev_copy(struct block_device *bdev, sector_t dst_sector,
 		sector_t src_sector, sector_t nr_sects, gfp_t gfp)
 {
-	if (bdev_copy_sectors(bdev))
-		return blkdev_copy_offload(bdev, dst_sector, src_sector,
-					nr_sects, gfp);
-	return __blkdev_copy(bdev, dst_sector, src_sector, nr_sects, gfp);
+	struct bio_vec bv = {
+		.bv_sector = src_sector,
+		.bv_sectors = nr_sects,
+	};
+
+	return blkdev_copy_range(bdev, dst_sector, &bv, 1, gfp);
 }
 EXPORT_SYMBOL_GPL(blkdev_copy);
diff --git a/block/ioctl.c b/block/ioctl.c
index 6f03c65867348..4b5095be19e1a 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -241,6 +241,63 @@ static int blk_ioctl_copy(struct block_device *bdev, blk_mode_t mode,
 	return blkdev_copy(bdev, dst, src, nr, GFP_KERNEL);
 }
 
+static int blk_ioctl_copy_vec(struct block_device *bdev, blk_mode_t mode,
+		void __user *argp)
+{
+	sector_t align = bdev_logical_block_size(bdev) >> SECTOR_SHIFT;
+	struct bio_vec *bv, fast_bv[UIO_FASTIOV];
+	struct copy_range cr;
+	int i, nr, ret;
+	__u64 dst;
+
+	if (!(mode & BLK_OPEN_WRITE))
+		return -EBADF;
+	if (copy_from_user(&cr, argp, sizeof(cr)))
+		return -EFAULT;
+	if (!(IS_ALIGNED(cr.dst_sector, align)))
+		return -EINVAL;
+
+	nr = cr.nr_ranges;
+	if (nr <= UIO_FASTIOV) {
+		bv = fast_bv;
+	} else {
+		bv = kmalloc_array(nr, sizeof(*bv), GFP_KERNEL);
+		if (!bv)
+			return -ENOMEM;
+	}
+
+	dst = cr.dst_sector;
+	for (i = 0; i < nr; i++) {
+		struct copy_source csrc;
+		__u64 nr_sects, src;
+
+		if (copy_from_user(&csrc,
+				(void __user *)(cr.sources + i * sizeof(csrc)),
+				sizeof(csrc))) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		nr_sects = csrc.nr_sectors;
+		src = csrc.src_sector;
+		if (!(IS_ALIGNED(src | nr_sects, align)) ||
+		    (src < dst && src + nr_sects > dst) ||
+		    (dst < src && dst + nr_sects > src)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		bv[i].bv_sectors = nr_sects;
+		bv[i].bv_sector = src;
+	}
+
+	ret = blkdev_copy_range(bdev, dst, bv, nr, GFP_KERNEL);
+out:
+	if (bv != fast_bv)
+		kfree(bv);
+	return ret;
+}
+
 static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
 		unsigned long arg)
 {
@@ -605,6 +662,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode,
 		return blk_ioctl_secure_erase(bdev, mode, argp);
 	case BLKCPY:
 		return blk_ioctl_copy(bdev, mode, argp);
+	case BLKCPY_VEC:
+		return blk_ioctl_copy_vec(bdev, mode, argp);
 	case BLKZEROOUT:
 		return blk_ioctl_zeroout(bdev, mode, arg);
 	case BLKGETDISKSEQ:
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e39ba0e91d43e..a77f2298754b5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1182,6 +1182,8 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp);
 int blkdev_copy(struct block_device *bdev, sector_t dst_sector,
 		sector_t src_sector, sector_t nr_sects, gfp_t gfp);
+int blkdev_copy_range(struct block_device *bdev, sector_t dst_sector,
+		struct bio_vec *bv, int nr_vecs, gfp_t gfp);
 
 #define BLKDEV_ZERO_NOUNMAP	(1 << 0)  /* do not free blocks */
 #define BLKDEV_ZERO_NOFALLBACK	(1 << 1)  /* don't write explicit zeroes */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 534f157ce22e9..aed965f74ea2c 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -218,6 +218,20 @@ struct fsxattr {
 /* [0] = destination lba, [1] = source lba, [2] = number of sectors */
 #define BLKCPY _IOWR(0x12,142,__u64[3])
 
+struct copy_source {
+	__u64 src_sector;
+	__u64 nr_sectors;
+};
+
+struct copy_range {
+	__u64	dst_sector;
+	__u16	nr_ranges;
+	__u8	rsvd[6];
+	__u64	sources; /* user space pointer to struct copy_source[] */
+};
+#define BLKCPY_VEC _IOWR(0x12,143,struct copy_range)
+
+
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */
 #define FIGETBSZ   _IO(0x00,2)	/* get the block size used for bmap */
-- 
2.47.1




More information about the Linux-nvme mailing list