[PATCH 10/21] block: Add fops atomic write support
John Garry
john.g.garry at oracle.com
Fri Sep 29 03:27:15 PDT 2023
Add support for atomic writes, as follows:
- Ensure that the IO follows all the atomic writes rules, like must be
naturally aligned
- Set REQ_ATOMIC
Signed-off-by: John Garry <john.g.garry at oracle.com>
---
block/fops.c | 42 +++++++++++++++++++++++++++++++++++++++++-
1 file changed, 41 insertions(+), 1 deletion(-)
diff --git a/block/fops.c b/block/fops.c
index acff3d5d22d4..516669ad69e5 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -41,6 +41,29 @@ static bool blkdev_dio_unaligned(struct block_device *bdev, loff_t pos,
!bdev_iter_is_aligned(bdev, iter);
}
+static bool blkdev_atomic_write_valid(struct block_device *bdev, loff_t pos,
+ struct iov_iter *iter)
+{
+ unsigned int atomic_write_unit_min_bytes =
+ queue_atomic_write_unit_min_bytes(bdev_get_queue(bdev));
+ unsigned int atomic_write_unit_max_bytes =
+ queue_atomic_write_unit_max_bytes(bdev_get_queue(bdev));
+
+ if (!atomic_write_unit_min_bytes)
+ return false;
+ if (pos % atomic_write_unit_min_bytes)
+ return false;
+ if (iov_iter_count(iter) % atomic_write_unit_min_bytes)
+ return false;
+ if (!is_power_of_2(iov_iter_count(iter)))
+ return false;
+ if (iov_iter_count(iter) > atomic_write_unit_max_bytes)
+ return false;
+ if (pos % iov_iter_count(iter))
+ return false;
+ return true;
+}
+
#define DIO_INLINE_BIO_VECS 4
static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
@@ -48,6 +71,8 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
{
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
+ bool is_read = iov_iter_rw(iter) == READ;
+ bool atomic_write = (iocb->ki_flags & IOCB_ATOMIC) && !is_read;
loff_t pos = iocb->ki_pos;
bool should_dirty = false;
struct bio bio;
@@ -56,6 +81,9 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
if (blkdev_dio_unaligned(bdev, pos, iter))
return -EINVAL;
+ if (atomic_write && !blkdev_atomic_write_valid(bdev, pos, iter))
+ return -EINVAL;
+
if (nr_pages <= DIO_INLINE_BIO_VECS)
vecs = inline_vecs;
else {
@@ -65,7 +93,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
return -ENOMEM;
}
- if (iov_iter_rw(iter) == READ) {
+ if (is_read) {
bio_init(&bio, bdev, vecs, nr_pages, REQ_OP_READ);
if (user_backed_iter(iter))
should_dirty = true;
@@ -74,6 +102,8 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
}
bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
bio.bi_ioprio = iocb->ki_ioprio;
+ if (atomic_write)
+ bio.bi_opf |= REQ_ATOMIC;
ret = bio_iov_iter_get_pages(&bio, iter);
if (unlikely(ret))
@@ -167,10 +197,14 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
struct blkdev_dio *dio;
struct bio *bio;
bool is_read = (iov_iter_rw(iter) == READ), is_sync;
+ bool atomic_write = (iocb->ki_flags & IOCB_ATOMIC) && !is_read;
blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
loff_t pos = iocb->ki_pos;
int ret = 0;
+ if (atomic_write)
+ return -EINVAL;
+
if (blkdev_dio_unaligned(bdev, pos, iter))
return -EINVAL;
@@ -305,6 +339,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
bool is_read = iov_iter_rw(iter) == READ;
blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
+ bool atomic_write = (iocb->ki_flags & IOCB_ATOMIC) && !is_read;
struct blkdev_dio *dio;
struct bio *bio;
loff_t pos = iocb->ki_pos;
@@ -313,6 +348,9 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
if (blkdev_dio_unaligned(bdev, pos, iter))
return -EINVAL;
+ if (atomic_write && !blkdev_atomic_write_valid(bdev, pos, iter))
+ return -EINVAL;
+
if (iocb->ki_flags & IOCB_ALLOC_CACHE)
opf |= REQ_ALLOC_CACHE;
bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
@@ -347,6 +385,8 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
bio_set_pages_dirty(bio);
}
} else {
+ if (atomic_write)
+ bio->bi_opf |= REQ_ATOMIC;
task_io_account_write(bio->bi_iter.bi_size);
}
--
2.31.1
More information about the Linux-nvme
mailing list