[PATCH 4/5] block: add support for vectored copies
Keith Busch
kbusch at meta.com
Wed May 21 15:31:06 PDT 2025
From: Keith Busch <kbusch at kernel.org>
Copy offload can be used to defrad or garbage collect data spread across
the disk. Most storage protocols provide a way to specifiy multiple
sources in a single copy commnd, so introduce kernel and user space
interfaces to accomplish that.
Signed-off-by: Keith Busch <kbusch at kernel.org>
---
block/blk-lib.c | 50 ++++++++++++++++++++++++----------
block/ioctl.c | 59 +++++++++++++++++++++++++++++++++++++++++
include/linux/blkdev.h | 2 ++
include/uapi/linux/fs.h | 14 ++++++++++
4 files changed, 111 insertions(+), 14 deletions(-)
diff --git a/block/blk-lib.c b/block/blk-lib.c
index a538acbaa2cd7..7513b876a5399 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -424,26 +424,46 @@ static int __blkdev_copy(struct block_device *bdev, sector_t dst_sector,
}
static int blkdev_copy_offload(struct block_device *bdev, sector_t dst_sector,
- sector_t src_sector, sector_t nr_sects, gfp_t gfp)
+ struct bio_vec *bv, int nr_vecs, gfp_t gfp)
{
+ unsigned size = 0;
struct bio *bio;
- int ret;
-
- struct bio_vec bv = {
- .bv_sector = src_sector,
- .bv_sectors = nr_sects,
- };
+ int ret, i;
- bio = bio_alloc(bdev, 1, REQ_OP_COPY, gfp);
- bio_add_copy_src(bio, &bv);
+ bio = bio_alloc(bdev, nr_vecs, REQ_OP_COPY, gfp);
+ for (i = 0; i < nr_vecs; i++) {
+ size += bv[i].bv_sectors << SECTOR_SHIFT;
+ bio_add_copy_src(bio, &bv[i]);
+ }
bio->bi_iter.bi_sector = dst_sector;
- bio->bi_iter.bi_size = nr_sects << SECTOR_SHIFT;
+ bio->bi_iter.bi_size = size;
ret = submit_bio_wait(bio);
bio_put(bio);
return ret;
+}
+
+/**
+ * blkdev_copy_range - copy range of sectors to a destination
+ * @dst_sector: start sector of the destination to copy to
+ * @bv: vector of source sectors
+ * @nr_vecs: number of source sector vectors
+ * @gfp: allocation flags to use
+ */
+int blkdev_copy_range(struct block_device *bdev, sector_t dst_sector,
+ struct bio_vec *bv, int nr_vecs, gfp_t gfp)
+{
+ int ret, i;
+ if (bdev_copy_sectors(bdev))
+ return blkdev_copy_offload(bdev, dst_sector, bv, nr_vecs, gfp);
+
+ for (i = 0, ret = 0; i < nr_vecs && !ret; i++)
+ ret = __blkdev_copy(bdev, dst_sector, bv[i].bv_sector,
+ bv[i].bv_sectors, gfp);
+ return ret;
}
+EXPORT_SYMBOL_GPL(blkdev_copy_range);
/**
* blkdev_copy - copy source sectors to a destination on the same block device
@@ -455,9 +475,11 @@ static int blkdev_copy_offload(struct block_device *bdev, sector_t dst_sector,
int blkdev_copy(struct block_device *bdev, sector_t dst_sector,
sector_t src_sector, sector_t nr_sects, gfp_t gfp)
{
- if (bdev_copy_sectors(bdev))
- return blkdev_copy_offload(bdev, dst_sector, src_sector,
- nr_sects, gfp);
- return __blkdev_copy(bdev, dst_sector, src_sector, nr_sects, gfp);
+ struct bio_vec bv = {
+ .bv_sector = src_sector,
+ .bv_sectors = nr_sects,
+ };
+
+ return blkdev_copy_range(bdev, dst_sector, &bv, 1, gfp);
}
EXPORT_SYMBOL_GPL(blkdev_copy);
diff --git a/block/ioctl.c b/block/ioctl.c
index 6f03c65867348..4b5095be19e1a 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -241,6 +241,63 @@ static int blk_ioctl_copy(struct block_device *bdev, blk_mode_t mode,
return blkdev_copy(bdev, dst, src, nr, GFP_KERNEL);
}
+static int blk_ioctl_copy_vec(struct block_device *bdev, blk_mode_t mode,
+ void __user *argp)
+{
+ sector_t align = bdev_logical_block_size(bdev) >> SECTOR_SHIFT;
+ struct bio_vec *bv, fast_bv[UIO_FASTIOV];
+ struct copy_range cr;
+ int i, nr, ret;
+ __u64 dst;
+
+ if (!(mode & BLK_OPEN_WRITE))
+ return -EBADF;
+ if (copy_from_user(&cr, argp, sizeof(cr)))
+ return -EFAULT;
+ if (!(IS_ALIGNED(cr.dst_sector, align)))
+ return -EINVAL;
+
+ nr = cr.nr_ranges;
+ if (nr <= UIO_FASTIOV) {
+ bv = fast_bv;
+ } else {
+ bv = kmalloc_array(nr, sizeof(*bv), GFP_KERNEL);
+ if (!bv)
+ return -ENOMEM;
+ }
+
+ dst = cr.dst_sector;
+ for (i = 0; i < nr; i++) {
+ struct copy_source csrc;
+ __u64 nr_sects, src;
+
+ if (copy_from_user(&csrc,
+ (void __user *)(cr.sources + i * sizeof(csrc)),
+ sizeof(csrc))) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ nr_sects = csrc.nr_sectors;
+ src = csrc.src_sector;
+ if (!(IS_ALIGNED(src | nr_sects, align)) ||
+ (src < dst && src + nr_sects > dst) ||
+ (dst < src && dst + nr_sects > src)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ bv[i].bv_sectors = nr_sects;
+ bv[i].bv_sector = src;
+ }
+
+ ret = blkdev_copy_range(bdev, dst, bv, nr, GFP_KERNEL);
+out:
+ if (bv != fast_bv)
+ kfree(bv);
+ return ret;
+}
+
static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
unsigned long arg)
{
@@ -605,6 +662,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode,
return blk_ioctl_secure_erase(bdev, mode, argp);
case BLKCPY:
return blk_ioctl_copy(bdev, mode, argp);
+ case BLKCPY_VEC:
+ return blk_ioctl_copy_vec(bdev, mode, argp);
case BLKZEROOUT:
return blk_ioctl_zeroout(bdev, mode, arg);
case BLKGETDISKSEQ:
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e39ba0e91d43e..a77f2298754b5 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1182,6 +1182,8 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp);
int blkdev_copy(struct block_device *bdev, sector_t dst_sector,
sector_t src_sector, sector_t nr_sects, gfp_t gfp);
+int blkdev_copy_range(struct block_device *bdev, sector_t dst_sector,
+ struct bio_vec *bv, int nr_vecs, gfp_t gfp);
#define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */
#define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 534f157ce22e9..aed965f74ea2c 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -218,6 +218,20 @@ struct fsxattr {
/* [0] = destination lba, [1] = source lba, [2] = number of sectors */
#define BLKCPY _IOWR(0x12,142,__u64[3])
+struct copy_source {
+ __u64 src_sector;
+ __u64 nr_sectors;
+};
+
+struct copy_range {
+ __u64 dst_sector;
+ __u16 nr_ranges;
+ __u8 rsvd[6];
+ __u64 sources; /* user space pointer to struct copy_source[] */
+};
+#define BLKCPY_VEC _IOWR(0x12,143,struct copy_range)
+
+
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
#define FIBMAP _IO(0x00,1) /* bmap access */
#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */
--
2.47.1
More information about the Linux-nvme
mailing list