[PATCH v2 11/16] block: Add fops atomic write support
John Garry
john.g.garry at oracle.com
Tue Dec 12 03:08:39 PST 2023
Add support for atomic writes, as follows:
- Ensure that the IO follows all the atomic writes rules, like must be
naturally aligned
- Set REQ_ATOMIC
Signed-off-by: John Garry <john.g.garry at oracle.com>
---
block/fops.c | 40 +++++++++++++++++++++++++++++++++++++++-
1 file changed, 39 insertions(+), 1 deletion(-)
diff --git a/block/fops.c b/block/fops.c
index 0abaac705daf..ba6a2c5a74b1 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -41,6 +41,24 @@ static bool blkdev_dio_unaligned(struct block_device *bdev, loff_t pos,
!bdev_iter_is_aligned(bdev, iter);
}
+static bool blkdev_atomic_write_valid(struct block_device *bdev, loff_t pos,
+ struct iov_iter *iter)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+ unsigned int min_bytes = queue_atomic_write_unit_min_bytes(q);
+ unsigned int max_bytes = queue_atomic_write_unit_max_bytes(q);
+
+ if (iov_iter_count(iter) & (min_bytes - 1))
+ return false;
+ if (!is_power_of_2(iov_iter_count(iter)))
+ return false;
+ if (pos & (iov_iter_count(iter) - 1))
+ return false;
+ if (iov_iter_count(iter) > max_bytes)
+ return false;
+ return true;
+}
+
#define DIO_INLINE_BIO_VECS 4
static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
@@ -48,6 +66,8 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
{
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
+ bool is_read = iov_iter_rw(iter) == READ;
+ bool atomic_write = (iocb->ki_flags & IOCB_ATOMIC) && !is_read;
loff_t pos = iocb->ki_pos;
bool should_dirty = false;
struct bio bio;
@@ -56,6 +76,9 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
if (blkdev_dio_unaligned(bdev, pos, iter))
return -EINVAL;
+ if (atomic_write && !blkdev_atomic_write_valid(bdev, pos, iter))
+ return -EINVAL;
+
if (nr_pages <= DIO_INLINE_BIO_VECS)
vecs = inline_vecs;
else {
@@ -65,7 +88,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
return -ENOMEM;
}
- if (iov_iter_rw(iter) == READ) {
+ if (is_read) {
bio_init(&bio, bdev, vecs, nr_pages, REQ_OP_READ);
if (user_backed_iter(iter))
should_dirty = true;
@@ -74,6 +97,8 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
}
bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
bio.bi_ioprio = iocb->ki_ioprio;
+ if (atomic_write)
+ bio.bi_opf |= REQ_ATOMIC;
ret = bio_iov_iter_get_pages(&bio, iter);
if (unlikely(ret))
@@ -167,10 +192,14 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
struct blkdev_dio *dio;
struct bio *bio;
bool is_read = (iov_iter_rw(iter) == READ), is_sync;
+ bool atomic_write = (iocb->ki_flags & IOCB_ATOMIC) && !is_read;
blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
loff_t pos = iocb->ki_pos;
int ret = 0;
+ if (atomic_write)
+ return -EINVAL;
+
if (blkdev_dio_unaligned(bdev, pos, iter))
return -EINVAL;
@@ -305,6 +334,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
bool is_read = iov_iter_rw(iter) == READ;
blk_opf_t opf = is_read ? REQ_OP_READ : dio_bio_write_op(iocb);
+ bool atomic_write = (iocb->ki_flags & IOCB_ATOMIC) && !is_read;
struct blkdev_dio *dio;
struct bio *bio;
loff_t pos = iocb->ki_pos;
@@ -313,6 +343,9 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
if (blkdev_dio_unaligned(bdev, pos, iter))
return -EINVAL;
+ if (atomic_write && !blkdev_atomic_write_valid(bdev, pos, iter))
+ return -EINVAL;
+
if (iocb->ki_flags & IOCB_ALLOC_CACHE)
opf |= REQ_ALLOC_CACHE;
bio = bio_alloc_bioset(bdev, nr_pages, opf, GFP_KERNEL,
@@ -347,6 +380,8 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
bio_set_pages_dirty(bio);
}
} else {
+ if (atomic_write)
+ bio->bi_opf |= REQ_ATOMIC;
task_io_account_write(bio->bi_iter.bi_size);
}
@@ -605,6 +640,9 @@ static int blkdev_open(struct inode *inode, struct file *filp)
if (bdev_nowait(handle->bdev))
filp->f_mode |= FMODE_NOWAIT;
+ if (queue_atomic_write_unit_min_bytes(bdev_get_queue(handle->bdev)))
+ filp->f_mode |= FMODE_CAN_ATOMIC_WRITE;
+
filp->f_mapping = handle->bdev->bd_inode->i_mapping;
filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
filp->private_data = handle;
--
2.35.3
More information about the Linux-nvme
mailing list