[PATCH 1/5] block: new sector copy api

Keith Busch kbusch at meta.com
Wed May 21 15:31:03 PDT 2025


From: Keith Busch <kbusch at kernel.org>

Provide a basic block level api to copy a range of a block device's
sectors to a new destination on the same device. This just reads the
source data into host memory, then writes it back out to the device at
the requested destination.

Signed-off-by: Keith Busch <kbusch at kernel.org>
---
 block/blk-lib.c         | 62 +++++++++++++++++++++++++++++++++++++++++
 block/ioctl.c           | 30 ++++++++++++++++++++
 include/linux/blkdev.h  |  2 ++
 include/uapi/linux/fs.h |  3 ++
 4 files changed, 97 insertions(+)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 4c9f20a689f7b..a819ded0ed3a9 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -368,3 +368,65 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
 	return ret;
 }
 EXPORT_SYMBOL(blkdev_issue_secure_erase);
+
+/**
+ * blkdev_copy - copy source sectors to a destination on the same block device
+ * @dst_sector:	start sector of the destination to copy to
+ * @src_sector:	start sector of the source to copy from
+ * @nr_sects:	number of sectors to copy
+ * @gfp:	allocation flags to use
+ */
+int blkdev_copy(struct block_device *bdev, sector_t dst_sector,
+		sector_t src_sector, sector_t nr_sects, gfp_t gfp)
+{
+	unsigned int nr_vecs = __blkdev_sectors_to_bio_pages(nr_sects);
+	unsigned int len = (unsigned int)nr_sects << SECTOR_SHIFT;
+	unsigned int size = min(len, nr_vecs * PAGE_SIZE);
+	struct bio *bio;
+	int ret = 0;
+	void *buf;
+
+	if (nr_sects > UINT_MAX >> SECTOR_SHIFT)
+		return -EINVAL;
+
+	buf = kvmalloc(size, gfp);
+	if (!buf)
+		return -ENOMEM;
+
+	nr_vecs = bio_add_max_vecs(buf, size);
+	bio = bio_alloc(bdev, nr_vecs, 0, gfp);
+
+	if (is_vmalloc_addr(buf))
+		bio_add_vmalloc(bio, buf, size);
+	else
+		bio_add_virt_nofail(bio, buf, size);
+
+	while (len) {
+		size = min(len, size);
+
+		bio_reset(bio, bdev, REQ_OP_READ);
+		bio->bi_iter.bi_sector = src_sector;
+		bio->bi_iter.bi_size = size;
+
+		ret = submit_bio_wait(bio);
+		if (ret)
+			break;
+
+		bio_reset(bio, bdev, REQ_OP_WRITE);
+		bio->bi_iter.bi_sector = dst_sector;
+		bio->bi_iter.bi_size = size;
+
+		ret = submit_bio_wait(bio);
+		if (ret)
+			break;
+
+		src_sector += size >> SECTOR_SHIFT;
+		dst_sector += size >> SECTOR_SHIFT;
+		len -= size;
+	}
+
+	bio_put(bio);
+	kvfree(buf);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(blkdev_copy);
diff --git a/block/ioctl.c b/block/ioctl.c
index e472cc1030c60..6f03c65867348 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -212,6 +212,34 @@ static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode,
 	return err;
 }
 
+static int blk_ioctl_copy(struct block_device *bdev, blk_mode_t mode,
+		void __user *argp)
+{
+	unsigned int lbs = bdev_logical_block_size(bdev) >> SECTOR_SHIFT;
+	uint64_t dst, src, end, nr, range[3];
+
+	if (!(mode & BLK_OPEN_WRITE))
+		return -EBADF;
+	if (copy_from_user(range, argp, sizeof(range)))
+		return -EFAULT;
+
+	dst = range[0];
+	src = range[1];
+	nr = range[2];
+
+	if (!(IS_ALIGNED(dst | src | nr, lbs)))
+		return -EINVAL;
+	if (check_add_overflow(src, nr - 1, &end))
+		return -EINVAL;
+	if (end >= bdev_nr_sectors(bdev))
+		return -EINVAL;
+	if (src < dst && src + nr > dst)
+		return -EINVAL;
+	if (dst < src && dst + nr > src)
+		return -EINVAL;
+
+	return blkdev_copy(bdev, dst, src, nr, GFP_KERNEL);
+}
 
 static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
 		unsigned long arg)
@@ -575,6 +603,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode,
 		return blk_ioctl_discard(bdev, mode, arg);
 	case BLKSECDISCARD:
 		return blk_ioctl_secure_erase(bdev, mode, argp);
+	case BLKCPY:
+		return blk_ioctl_copy(bdev, mode, argp);
 	case BLKZEROOUT:
 		return blk_ioctl_zeroout(bdev, mode, arg);
 	case BLKGETDISKSEQ:
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 332b56f323d92..b7d71b126ec9b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1176,6 +1176,8 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, struct bio **biop);
 int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp);
+int blkdev_copy(struct block_device *bdev, sector_t dst_sector,
+		sector_t src_sector, sector_t nr_sects, gfp_t gfp);
 
 #define BLKDEV_ZERO_NOUNMAP	(1 << 0)  /* do not free blocks */
 #define BLKDEV_ZERO_NOFALLBACK	(1 << 1)  /* don't write explicit zeroes */
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index e762e1af650c4..534f157ce22e9 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -215,6 +215,9 @@ struct fsxattr {
 /* 130-136 are used by zoned block device ioctls (uapi/linux/blkzoned.h) */
 /* 137-141 are used by blk-crypto ioctls (uapi/linux/blk-crypto.h) */
 
+/* [0] = destination lba, [1] = source lba, [2] = number of sectors */
+#define BLKCPY _IOWR(0x12,142,__u64[3])
+
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */
 #define FIGETBSZ   _IO(0x00,2)	/* get the block size used for bmap */
-- 
2.47.1




More information about the Linux-nvme mailing list