[PATCH 10/12] block: add direct-io partial sector read support
Keith Busch
kbusch at fb.com
Thu Jun 30 13:42:10 PDT 2022
From: Keith Busch <kbusch at kernel.org>
Enable direct io to read partial sectors if the block device supports bit
buckets.
Signed-off-by: Keith Busch <kbusch at kernel.org>
---
block/fops.c | 69 ++++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 56 insertions(+), 13 deletions(-)
diff --git a/block/fops.c b/block/fops.c
index f37af5924cef..5eee8cef7ce0 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -46,9 +46,10 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb)
static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
struct iov_iter *iter, unsigned int nr_pages,
- struct block_device *bdev, loff_t pos)
+ struct block_device *bdev, loff_t pos, u16 skip, u16 trunc)
{
struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
+ u16 bucket_bytes = skip + trunc;
bool should_dirty = false;
struct bio bio;
ssize_t ret;
@@ -72,10 +73,19 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
bio.bi_ioprio = iocb->ki_ioprio;
+ if (bucket_bytes) {
+ bio_set_flag(&bio, BIO_BIT_BUCKET);
+ if (skip)
+ blk_add_bb_page(&bio, skip);
+ }
+
ret = bio_iov_iter_get_pages(&bio, iter);
if (unlikely(ret))
goto out;
- ret = bio.bi_iter.bi_size;
+
+ if (trunc)
+ blk_add_bb_page(&bio, trunc);
+ ret = bio.bi_iter.bi_size - bucket_bytes;
if (iov_iter_rw(iter) == WRITE)
task_io_account_write(ret);
@@ -157,13 +167,15 @@ static void blkdev_bio_end_io(struct bio *bio)
}
static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
- unsigned int nr_pages, struct block_device *bdev, loff_t pos)
+ unsigned int nr_pages, struct block_device *bdev, loff_t pos,
+ u16 skip, u16 trunc)
{
struct blk_plug plug;
struct blkdev_dio *dio;
struct bio *bio;
bool is_read = (iov_iter_rw(iter) == READ), is_sync;
unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
+ u16 bucket_bytes = skip + trunc;
int ret = 0;
if (iocb->ki_flags & IOCB_ALLOC_CACHE)
@@ -199,6 +211,14 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
bio->bi_end_io = blkdev_bio_end_io;
bio->bi_ioprio = iocb->ki_ioprio;
+ if (bucket_bytes) {
+ bio_set_flag(bio, BIO_BIT_BUCKET);
+ if (skip) {
+ blk_add_bb_page(bio, skip);
+ skip = 0;
+ }
+ }
+
ret = bio_iov_iter_get_pages(bio, iter);
if (unlikely(ret)) {
bio->bi_status = BLK_STS_IOERR;
@@ -206,6 +226,11 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
break;
}
+ if (trunc && !iov_iter_count(iter)) {
+ blk_add_bb_page(bio, trunc);
+ trunc = 0;
+ }
+
if (is_read) {
if (dio->flags & DIO_SHOULD_DIRTY)
bio_set_pages_dirty(bio);
@@ -218,7 +243,8 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
dio->size += bio->bi_iter.bi_size;
pos += bio->bi_iter.bi_size;
- nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS);
+ nr_pages = bio_iov_vecs_to_alloc_partial(iter, BIO_MAX_VECS, 0,
+ trunc);
if (!nr_pages) {
submit_bio(bio);
break;
@@ -244,7 +270,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
if (!ret)
ret = blk_status_to_errno(dio->bio.bi_status);
if (likely(!ret))
- ret = dio->size;
+ ret = dio->size - bucket_bytes;
bio_put(&dio->bio);
return ret;
@@ -277,10 +303,11 @@ static void blkdev_bio_end_io_async(struct bio *bio)
static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
struct iov_iter *iter, unsigned int nr_pages,
- struct block_device *bdev, loff_t pos)
+ struct block_device *bdev, loff_t pos, u16 skip, u16 trunc)
{
bool is_read = iov_iter_rw(iter) == READ;
unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
+ u16 bucket_bytes = skip + trunc;
struct blkdev_dio *dio;
struct bio *bio;
int ret = 0;
@@ -296,6 +323,12 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
bio->bi_end_io = blkdev_bio_end_io_async;
bio->bi_ioprio = iocb->ki_ioprio;
+ if (bucket_bytes) {
+ bio_set_flag(bio, BIO_BIT_BUCKET);
+ if (skip)
+ blk_add_bb_page(bio, skip);
+ }
+
if (iov_iter_is_bvec(iter)) {
/*
* Users don't rely on the iterator being in any particular
@@ -311,7 +344,11 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
return ret;
}
}
- dio->size = bio->bi_iter.bi_size;
+
+ if (trunc)
+ blk_add_bb_page(bio, trunc);
+
+ dio->size = bio->bi_iter.bi_size - bucket_bytes;
if (is_read) {
if (iter_is_iovec(iter)) {
@@ -338,23 +375,29 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
{
struct block_device *bdev = iocb->ki_filp->private_data;
loff_t pos = iocb->ki_pos;
+ u16 skip = 0, trunc = 0;
unsigned int nr_pages;
if (!iov_iter_count(iter))
return 0;
- if (blkdev_dio_unaligned(bdev, pos, iter))
- return -EINVAL;
+ if (blkdev_dio_unaligned(bdev, pos, iter)) {
+ if (!blkdev_bit_bucket(bdev, pos, iov_iter_count(iter), iter,
+ &skip, &trunc))
+ return -EINVAL;
+ nr_pages = bio_iov_vecs_to_alloc_partial(iter, BIO_MAX_VECS + 1,
+ skip, trunc);
+ } else
+ nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
- nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
if (likely(nr_pages <= BIO_MAX_VECS)) {
if (is_sync_kiocb(iocb))
return __blkdev_direct_IO_simple(iocb, iter, nr_pages,
- bdev, pos);
+ bdev, pos, skip, trunc);
return __blkdev_direct_IO_async(iocb, iter, nr_pages, bdev,
- pos);
+ pos, skip, trunc);
}
return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages), bdev,
- pos);
+ pos, skip, trunc);
}
static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
--
2.30.2
More information about the Linux-nvme
mailing list