[PATCH v3 09/15] block: Add checks to merging of atomic writes
John Garry
john.g.garry at oracle.com
Wed Jan 24 03:38:35 PST 2024
For atomic writes we allow merging, but we must adhere to some additional
rules:
- Only allow merging of atomic writes with other atomic writes. This avoids
any checks to ensure that the resultant merge still adheres to the
underlying device atomic write properties. It also avoids possible
unnecessary overhead in the device in submitting the whole resultant
merged IO with an atomic write command for SCSI.
- Ensure that the merged IO would not cross an atomic write boundary, if
any.
We already ensure that we don't exceed the atomic writes size limit in
get_max_io_size().
Signed-off-by: John Garry <john.g.garry at oracle.com>
---
block/blk-merge.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 75 insertions(+)
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 9d714e8f76b3..12a75a252ca2 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -18,6 +18,42 @@
#include "blk-rq-qos.h"
#include "blk-throttle.h"
+static bool rq_straddles_atomic_write_boundary(struct request *rq,
+ unsigned int front,
+ unsigned int back)
+{
+ unsigned int boundary = queue_atomic_write_boundary_bytes(rq->q);
+ unsigned int mask, imask;
+ loff_t start, end;
+
+ if (!boundary)
+ return false;
+
+ start = rq->__sector << SECTOR_SHIFT;
+ end = start + rq->__data_len;
+
+ start -= front;
+ end += back;
+
+ /* We're longer than the boundary, so must be crossing it */
+ if (end - start > boundary)
+ return true;
+
+ mask = boundary - 1;
+
+ /* start/end are boundary-aligned, so cannot be crossing */
+ if (!(start & mask) || !(end & mask))
+ return false;
+
+ imask = ~mask;
+
+ /* Top bits are different, so crossed a boundary */
+ if ((start & imask) != (end & imask))
+ return true;
+
+ return false;
+}
+
static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
{
*bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
@@ -659,6 +695,13 @@ int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs)
return 0;
}
+ if (req->cmd_flags & REQ_ATOMIC) {
+ if (rq_straddles_atomic_write_boundary(req,
+ 0, bio->bi_iter.bi_size)) {
+ return 0;
+ }
+ }
+
return ll_new_hw_segment(req, bio, nr_segs);
}
@@ -678,6 +721,13 @@ static int ll_front_merge_fn(struct request *req, struct bio *bio,
return 0;
}
+ if (req->cmd_flags & REQ_ATOMIC) {
+ if (rq_straddles_atomic_write_boundary(req,
+ bio->bi_iter.bi_size, 0)) {
+ return 0;
+ }
+ }
+
return ll_new_hw_segment(req, bio, nr_segs);
}
@@ -714,6 +764,13 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
blk_rq_get_max_sectors(req, blk_rq_pos(req)))
return 0;
+ if (req->cmd_flags & REQ_ATOMIC) {
+ if (rq_straddles_atomic_write_boundary(req,
+ 0, blk_rq_bytes(next))) {
+ return 0;
+ }
+ }
+
total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
if (total_phys_segments > blk_rq_get_max_segments(req))
return 0;
@@ -809,6 +866,18 @@ static enum elv_merge blk_try_req_merge(struct request *req,
return ELEVATOR_NO_MERGE;
}
+static bool blk_atomic_write_mergeable_rq_bio(struct request *rq,
+ struct bio *bio)
+{
+ return (rq->cmd_flags & REQ_ATOMIC) == (bio->bi_opf & REQ_ATOMIC);
+}
+
+static bool blk_atomic_write_mergeable_rqs(struct request *rq,
+ struct request *next)
+{
+ return (rq->cmd_flags & REQ_ATOMIC) == (next->cmd_flags & REQ_ATOMIC);
+}
+
/*
* For non-mq, this has to be called with the request spinlock acquired.
* For mq with scheduling, the appropriate queue wide lock should be held.
@@ -828,6 +897,9 @@ static struct request *attempt_merge(struct request_queue *q,
if (req->ioprio != next->ioprio)
return NULL;
+ if (!blk_atomic_write_mergeable_rqs(req, next))
+ return NULL;
+
/*
* If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn
@@ -955,6 +1027,9 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (rq->ioprio != bio_prio(bio))
return false;
+ if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false)
+ return false;
+
return true;
}
--
2.31.1
More information about the Linux-nvme
mailing list