[PATCH 10/12] block: add direct-io partial sector read support

Keith Busch kbusch at fb.com
Thu Jun 30 13:42:10 PDT 2022


From: Keith Busch <kbusch at kernel.org>

Enable direct io to read partial sectors if the block device supports bit
buckets.

Signed-off-by: Keith Busch <kbusch at kernel.org>
---
 block/fops.c | 69 ++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 56 insertions(+), 13 deletions(-)

diff --git a/block/fops.c b/block/fops.c
index f37af5924cef..5eee8cef7ce0 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -46,9 +46,10 @@ static unsigned int dio_bio_write_op(struct kiocb *iocb)
 
 static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 		struct iov_iter *iter, unsigned int nr_pages,
-		struct block_device *bdev, loff_t pos)
+		struct block_device *bdev, loff_t pos, u16 skip, u16 trunc)
 {
 	struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
+	u16 bucket_bytes = skip + trunc;
 	bool should_dirty = false;
 	struct bio bio;
 	ssize_t ret;
@@ -72,10 +73,19 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 	bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
 	bio.bi_ioprio = iocb->ki_ioprio;
 
+	if (bucket_bytes) {
+		bio_set_flag(&bio, BIO_BIT_BUCKET);
+		if (skip)
+			blk_add_bb_page(&bio, skip);
+	}
+
 	ret = bio_iov_iter_get_pages(&bio, iter);
 	if (unlikely(ret))
 		goto out;
-	ret = bio.bi_iter.bi_size;
+
+	if (trunc)
+		blk_add_bb_page(&bio, trunc);
+	ret = bio.bi_iter.bi_size - bucket_bytes;
 
 	if (iov_iter_rw(iter) == WRITE)
 		task_io_account_write(ret);
@@ -157,13 +167,15 @@ static void blkdev_bio_end_io(struct bio *bio)
 }
 
 static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
-		unsigned int nr_pages, struct block_device *bdev, loff_t pos)
+		unsigned int nr_pages, struct block_device *bdev, loff_t pos,
+		u16 skip, u16 trunc)
 {
 	struct blk_plug plug;
 	struct blkdev_dio *dio;
 	struct bio *bio;
 	bool is_read = (iov_iter_rw(iter) == READ), is_sync;
 	unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
+	u16 bucket_bytes = skip + trunc;
 	int ret = 0;
 
 	if (iocb->ki_flags & IOCB_ALLOC_CACHE)
@@ -199,6 +211,14 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 		bio->bi_end_io = blkdev_bio_end_io;
 		bio->bi_ioprio = iocb->ki_ioprio;
 
+		if (bucket_bytes) {
+			bio_set_flag(bio, BIO_BIT_BUCKET);
+			if (skip) {
+				blk_add_bb_page(bio, skip);
+				skip = 0;
+			}
+		}
+
 		ret = bio_iov_iter_get_pages(bio, iter);
 		if (unlikely(ret)) {
 			bio->bi_status = BLK_STS_IOERR;
@@ -206,6 +226,11 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 			break;
 		}
 
+		if (trunc && !iov_iter_count(iter)) {
+			blk_add_bb_page(bio, trunc);
+			trunc = 0;
+		}
+
 		if (is_read) {
 			if (dio->flags & DIO_SHOULD_DIRTY)
 				bio_set_pages_dirty(bio);
@@ -218,7 +243,8 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 		dio->size += bio->bi_iter.bi_size;
 		pos += bio->bi_iter.bi_size;
 
-		nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS);
+		nr_pages = bio_iov_vecs_to_alloc_partial(iter, BIO_MAX_VECS, 0,
+							 trunc);
 		if (!nr_pages) {
 			submit_bio(bio);
 			break;
@@ -244,7 +270,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 	if (!ret)
 		ret = blk_status_to_errno(dio->bio.bi_status);
 	if (likely(!ret))
-		ret = dio->size;
+		ret = dio->size - bucket_bytes;
 
 	bio_put(&dio->bio);
 	return ret;
@@ -277,10 +303,11 @@ static void blkdev_bio_end_io_async(struct bio *bio)
 
 static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 		struct iov_iter *iter, unsigned int nr_pages,
-		struct block_device *bdev, loff_t pos)
+		struct block_device *bdev, loff_t pos, u16 skip, u16 trunc)
 {
 	bool is_read = iov_iter_rw(iter) == READ;
 	unsigned int opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
+	u16 bucket_bytes = skip + trunc;
 	struct blkdev_dio *dio;
 	struct bio *bio;
 	int ret = 0;
@@ -296,6 +323,12 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 	bio->bi_end_io = blkdev_bio_end_io_async;
 	bio->bi_ioprio = iocb->ki_ioprio;
 
+	if (bucket_bytes) {
+		bio_set_flag(bio, BIO_BIT_BUCKET);
+		if (skip)
+			blk_add_bb_page(bio, skip);
+	}
+
 	if (iov_iter_is_bvec(iter)) {
 		/*
 		 * Users don't rely on the iterator being in any particular
@@ -311,7 +344,11 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 			return ret;
 		}
 	}
-	dio->size = bio->bi_iter.bi_size;
+
+	if (trunc)
+		blk_add_bb_page(bio, trunc);
+
+	dio->size = bio->bi_iter.bi_size - bucket_bytes;
 
 	if (is_read) {
 		if (iter_is_iovec(iter)) {
@@ -338,23 +375,29 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 {
 	struct block_device *bdev = iocb->ki_filp->private_data;
 	loff_t pos = iocb->ki_pos;
+	u16 skip = 0, trunc = 0;
 	unsigned int nr_pages;
 
 	if (!iov_iter_count(iter))
 		return 0;
-	if (blkdev_dio_unaligned(bdev, pos, iter))
-		return -EINVAL;
+	if (blkdev_dio_unaligned(bdev, pos, iter)) {
+		if (!blkdev_bit_bucket(bdev, pos, iov_iter_count(iter), iter,
+				       &skip, &trunc))
+			return -EINVAL;
+		nr_pages = bio_iov_vecs_to_alloc_partial(iter, BIO_MAX_VECS + 1,
+							 skip, trunc);
+	} else
+		nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
 
-	nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
 	if (likely(nr_pages <= BIO_MAX_VECS)) {
 		if (is_sync_kiocb(iocb))
 			return __blkdev_direct_IO_simple(iocb, iter, nr_pages,
-							 bdev, pos);
+							 bdev, pos, skip, trunc);
 		return __blkdev_direct_IO_async(iocb, iter, nr_pages, bdev,
-						pos);
+						pos, skip, trunc);
 	}
 	return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages), bdev,
-				  pos);
+				  pos, skip, trunc);
 }
 
 static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
-- 
2.30.2




More information about the Linux-nvme mailing list