[PATCH 4/7] block: Introduce a new ioctl for simple copy
Darrick J. Wong
djwong at kernel.org
Wed Aug 18 09:17:50 PDT 2021
On Wed, Aug 18, 2021 at 09:07:54PM +0530, Nitesh Shetty wrote:
> On Wed, Aug 18, 2021 at 5:06 AM Darrick J. Wong <djwong at kernel.org> wrote:
> >
> > On Tue, Aug 17, 2021 at 03:44:20PM +0530, SelvaKumar S wrote:
> > > From: Nitesh Shetty <nj.shetty at samsung.com>
> > >
> > > Add new BLKCOPY ioctl that offloads copying of one or more sources ranges
> > > to a destination in the device. COPY ioctl accepts a 'copy_range'
> > > structure that contains destination (in sectors), no of sources and
> > > pointer to the array of source ranges. Each source range is represented by
> > > 'range_entry' that contains start and length of source ranges (in sectors)
> > >
> > > MAX_COPY_NR_RANGE, limits the number of entries for the IOCTL and
> > > MAX_COPY_TOTAL_LENGTH limits the total copy length, IOCTL can handle.
> > >
> > > Example code, to issue BLKCOPY:
> > > /* Sample example to copy three source-ranges [0, 8] [16, 8] [32,8] to
> > > * [64,24], on the same device */
> > >
> > > int main(void)
> > > {
> > > int ret, fd;
> > > struct range_entry source_range[] = {{.src = 0, .len = 8},
> > > {.src = 16, .len = 8}, {.src = 32, .len = 8},};
> > > struct copy_range cr;
> > >
> > > cr.dest = 64;
> > > cr.nr_range = 3;
> > > cr.range_list = (__u64)&source_range;
> > >
> > > fd = open("/dev/nvme0n1", O_RDWR);
> > > if (fd < 0) return 1;
> > >
> > > ret = ioctl(fd, BLKCOPY, &cr);
> > > if (ret < 0) printf("copy failure\n");
> > >
> > > close(fd);
> > >
> > > return ret;
> > > }
> > >
> > > Signed-off-by: Nitesh Shetty <nj.shetty at samsung.com>
> > > Signed-off-by: SelvaKumar S <selvakuma.s1 at samsung.com>
> > > Signed-off-by: Kanchan Joshi <joshi.k at samsung.com>
> > > ---
> > > block/ioctl.c | 33 +++++++++++++++++++++++++++++++++
> > > include/uapi/linux/fs.h | 8 ++++++++
> > > 2 files changed, 41 insertions(+)
> > >
> > > diff --git a/block/ioctl.c b/block/ioctl.c
> > > index eb0491e90b9a..2af56d01e9fe 100644
> > > --- a/block/ioctl.c
> > > +++ b/block/ioctl.c
> > > @@ -143,6 +143,37 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
> > > GFP_KERNEL, flags);
> > > }
> > >
> > > +static int blk_ioctl_copy(struct block_device *bdev, fmode_t mode,
> > > + unsigned long arg)
> > > +{
> > > + struct copy_range crange;
> > > + struct range_entry *rlist;
> > > + int ret;
> > > +
> > > + if (!(mode & FMODE_WRITE))
> > > + return -EBADF;
> > > +
> > > + if (copy_from_user(&crange, (void __user *)arg, sizeof(crange)))
> > > + return -EFAULT;
> > > +
> > > + rlist = kmalloc_array(crange.nr_range, sizeof(*rlist),
> > > + GFP_KERNEL);
> > > + if (!rlist)
> > > + return -ENOMEM;
> > > +
> > > + if (copy_from_user(rlist, (void __user *)crange.range_list,
> > > + sizeof(*rlist) * crange.nr_range)) {
> > > + ret = -EFAULT;
> > > + goto out;
> > > + }
> > > +
> > > + ret = blkdev_issue_copy(bdev, crange.nr_range, rlist, bdev, crange.dest,
> > > + GFP_KERNEL, 0);
> > > +out:
> > > + kfree(rlist);
> > > + return ret;
> > > +}
> > > +
> > > static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
> > > unsigned long arg)
> > > {
> > > @@ -468,6 +499,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
> > > case BLKSECDISCARD:
> > > return blk_ioctl_discard(bdev, mode, arg,
> > > BLKDEV_DISCARD_SECURE);
> > > + case BLKCOPY:
> > > + return blk_ioctl_copy(bdev, mode, arg);
> > > case BLKZEROOUT:
> > > return blk_ioctl_zeroout(bdev, mode, arg);
> > > case BLKGETDISKSEQ:
> > > diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> > > index 7a97b588d892..4183688ff398 100644
> > > --- a/include/uapi/linux/fs.h
> > > +++ b/include/uapi/linux/fs.h
> > > @@ -76,6 +76,13 @@ struct range_entry {
> > > __u64 len;
> > > };
> > >
> > > +struct copy_range {
> > > + __u64 dest;
> > > + __u64 nr_range;
> >
> > If the maximum number of elements in the range list is 1<<12, there's no
> > need for this to be larger than a u16, right?
> >
> > > + __u64 range_list;
> >
> > Pointers embedded in a structure are /not/ a good idea, because this
> > will create a lot of compatibility headaches for 32-bit binaries running
> > on 64-bit kernels. Please just make the size of this header structure
> > a multiple of 8 bytes and put the range_entry list immediately after it.
> >
> > struct copy_range {
> > __s64 dest_offset;
> > __u32 nr_range_entries;
> > __u32 flags;
> > __u64 reserved[2];
> > };
> >
> > struct __user range_entry *re = ((struct range_entry *)(copyhead + 1));
> >
> > copy_from_user(&urk, re...);
> >
> > --D
> >
> Thanks, this is better. 'Reserved' field was there to be used for
> future extension of the interface.
> Now that you mentioned 'flags', it seems we can do away with
> 'reserved' fields altogether?
We still want the reserved-must-be-zero fields so that adding the first
field or two doesn't require changes to the pointer arithmetic.
Also, I suppose you could make the relationship between copy_range and
range_entry more explicit:
struct copy_range {
__s64 dest_offset;
__u32 nr_range_entries;
__u32 flags;
__u64 reserved[2];
/* must come last */
struct range_entry entries[];
};
struct __user range_entry *re = ©head->entries[0];
--D
>
> Regards,
> Nitesh Shetty
More information about the Linux-nvme
mailing list