[PATCH RFC 4/5] block: move bio validation into __bio_split_to_limits
Keith Busch
kbusch at meta.com
Tue May 19 10:23:25 PDT 2026
From: Keith Busch <kbusch at kernel.org>
The bio checks in submit_bio_noacct() compares queue limits to determine
whether operations like discard, write zeroes, zone append, and atomic
writes are supported and valid. These checks run before
bio_queue_enter(), so they race against any driver that updates queue
limits inside a freeze window.
Move all limit-dependent operation validation from submit_bio_noacct()
into __bio_split_to_limits(), which runs after the queue usage reference
has been acquired. This ensures that all checks are properly serialized
against limit updates.
The non-limit checks (crypto, fault injection, partition remap, and
flush flag handling) remain in submit_bio_noacct() as they do not
depend on queue limits.
Signed-off-by: Keith Busch <kbusch at kernel.org>
---
block/blk-core.c | 118 -----------------------------------------------
block/blk.h | 75 ++++++++++++++++++++++++++++--
2 files changed, 72 insertions(+), 121 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index c200d0fc44fe7..8360c2b5efee5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -519,25 +519,6 @@ static int __init fail_make_request_debugfs(void)
late_initcall(fail_make_request_debugfs);
#endif /* CONFIG_FAIL_MAKE_REQUEST */
-static inline void bio_check_ro(struct bio *bio)
-{
- if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) {
- if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
- return;
-
- if (bdev_test_flag(bio->bi_bdev, BD_RO_WARNED))
- return;
-
- bdev_set_flag(bio->bi_bdev, BD_RO_WARNED);
-
- /*
- * Use ioctl to set underlying disk of raid/dm to read-only
- * will trigger this.
- */
- pr_warn("Trying to write to read-only block-device %pg\n",
- bio->bi_bdev);
- }
-}
int should_fail_bio(struct bio *bio)
{
@@ -566,39 +547,6 @@ static int blk_partition_remap(struct bio *bio)
return 0;
}
-/*
- * Check write append to a zoned block device.
- */
-static inline blk_status_t blk_check_zone_append(struct request_queue *q,
- struct bio *bio)
-{
- int nr_sectors = bio_sectors(bio);
-
- /* Only applicable to zoned block devices */
- if (!bdev_is_zoned(bio->bi_bdev))
- return BLK_STS_NOTSUPP;
-
- /* The bio sector must point to the start of a sequential zone */
- if (!bdev_is_zone_start(bio->bi_bdev, bio->bi_iter.bi_sector))
- return BLK_STS_INVAL;
-
- /*
- * Not allowed to cross zone boundaries. Otherwise, the BIO will be
- * split and could result in non-contiguous sectors being written in
- * different zones.
- */
- if (nr_sectors > q->limits.chunk_sectors)
- return BLK_STS_INVAL;
-
- /* Make sure the BIO is small enough and will not get split */
- if (nr_sectors > q->limits.max_zone_append_sectors)
- return BLK_STS_INVAL;
-
- bio->bi_opf |= REQ_NOMERGE;
-
- return BLK_STS_OK;
-}
-
static void __submit_bio(struct bio *bio)
{
/* If plug is not used, add new plug here to cache nsecs time. */
@@ -731,18 +679,6 @@ void submit_bio_noacct_nocheck(struct bio *bio, bool split)
}
}
-static blk_status_t blk_validate_atomic_write_op_size(struct request_queue *q,
- struct bio *bio)
-{
- if (bio->bi_iter.bi_size > queue_atomic_write_unit_max_bytes(q))
- return BLK_STS_INVAL;
-
- if (bio->bi_iter.bi_size % queue_atomic_write_unit_min_bytes(q))
- return BLK_STS_INVAL;
-
- return BLK_STS_OK;
-}
-
/**
* submit_bio_noacct - re-submit a bio to the block device layer for I/O
* @bio: The bio describing the location in memory and on the device.
@@ -755,7 +691,6 @@ static blk_status_t blk_validate_atomic_write_op_size(struct request_queue *q,
void submit_bio_noacct(struct bio *bio)
{
struct block_device *bdev = bio->bi_bdev;
- struct request_queue *q = bdev_get_queue(bdev);
blk_status_t status = BLK_STS_IOERR;
might_sleep();
@@ -776,7 +711,6 @@ void submit_bio_noacct(struct bio *bio)
if (should_fail_bio(bio))
goto end_io;
- bio_check_ro(bio);
if (!bio_flagged(bio, BIO_REMAPPED)) {
if (bdev_is_partition(bdev) &&
unlikely(blk_partition_remap(bio)))
@@ -800,58 +734,6 @@ void submit_bio_noacct(struct bio *bio)
}
}
- switch (bio_op(bio)) {
- case REQ_OP_READ:
- break;
- case REQ_OP_WRITE:
- if (bio->bi_opf & REQ_ATOMIC) {
- status = blk_validate_atomic_write_op_size(q, bio);
- if (status != BLK_STS_OK)
- goto end_io;
- }
- break;
- case REQ_OP_FLUSH:
- /*
- * REQ_OP_FLUSH can't be submitted through bios, it is only
- * synthetized in struct request by the flush state machine.
- */
- goto not_supported;
- case REQ_OP_DISCARD:
- if (!bdev_max_discard_sectors(bdev))
- goto not_supported;
- break;
- case REQ_OP_SECURE_ERASE:
- if (!bdev_max_secure_erase_sectors(bdev))
- goto not_supported;
- break;
- case REQ_OP_ZONE_APPEND:
- status = blk_check_zone_append(q, bio);
- if (status != BLK_STS_OK)
- goto end_io;
- break;
- case REQ_OP_WRITE_ZEROES:
- if (!q->limits.max_write_zeroes_sectors)
- goto not_supported;
- break;
- case REQ_OP_ZONE_RESET:
- case REQ_OP_ZONE_OPEN:
- case REQ_OP_ZONE_CLOSE:
- case REQ_OP_ZONE_FINISH:
- case REQ_OP_ZONE_RESET_ALL:
- if (!bdev_is_zoned(bio->bi_bdev))
- goto not_supported;
- break;
- case REQ_OP_DRV_IN:
- case REQ_OP_DRV_OUT:
- /*
- * Driver private operations are only used with passthrough
- * requests.
- */
- fallthrough;
- default:
- goto not_supported;
- }
-
if (blk_throtl_bio(bio))
return;
submit_bio_noacct_nocheck(bio, false);
diff --git a/block/blk.h b/block/blk.h
index e70acb2d358e3..d3b897e9b5ee9 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -407,6 +407,22 @@ static inline bool bio_may_need_split(struct bio *bio,
return bv->bv_len + bv->bv_offset > lim->max_fast_segment_size;
}
+static inline void bio_check_ro(struct bio *bio)
+{
+ if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) {
+ if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
+ return;
+
+ if (bdev_test_flag(bio->bi_bdev, BD_RO_WARNED))
+ return;
+
+ bdev_set_flag(bio->bi_bdev, BD_RO_WARNED);
+
+ pr_warn("Trying to write to read-only block-device %pg\n",
+ bio->bi_bdev);
+ }
+}
+
/**
* __bio_split_to_limits - split a bio to fit the queue limits
* @bio: bio to be split
@@ -423,6 +439,8 @@ static inline bool bio_may_need_split(struct bio *bio,
static inline struct bio *__bio_split_to_limits(struct bio *bio,
const struct queue_limits *lim, unsigned int *nr_segs)
{
+ bio_check_ro(bio);
+
if (unlikely(bio_end_sector(bio) > bdev_nr_sectors(bio->bi_bdev) +
bio->bi_bdev->bd_start_sect)) {
pr_info_ratelimited("%s: attempt to access beyond end of device\n"
@@ -435,24 +453,75 @@ static inline struct bio *__bio_split_to_limits(struct bio *bio,
}
switch (bio_op(bio)) {
- case REQ_OP_READ:
case REQ_OP_WRITE:
+ if (bio->bi_opf & REQ_ATOMIC) {
+ if (bio->bi_iter.bi_size > lim->atomic_write_unit_max ||
+ bio->bi_iter.bi_size % lim->atomic_write_unit_min)
+ goto invalid;
+ }
+ fallthrough;
+ case REQ_OP_READ:
if (bio_may_need_split(bio, lim))
return bio_split_rw(bio, lim, nr_segs);
*nr_segs = 1;
return bio;
case REQ_OP_ZONE_APPEND:
+ /* Only applicable to zoned block devices */
+ if (!(lim->features & BLK_FEAT_ZONED))
+ goto not_supported;
+
+ /* The bio sector must point to the start of a sequential zone */
+ if (!bdev_is_zone_start(bio->bi_bdev, bio->bi_iter.bi_sector))
+ goto invalid;
+
+ /*
+ * Not allowed to cross zone boundaries. Otherwise, the BIO
+ * will be split and could result in non-contiguous sectors
+ * being written in different zones.
+ */
+ if (bio_sectors(bio) > lim->chunk_sectors)
+ goto invalid;
+
+ /* Make sure the BIO is small enough and will not get split */
+ if (bio_sectors(bio) > lim->max_zone_append_sectors)
+ goto invalid;
+
+ bio->bi_opf |= REQ_NOMERGE;
return bio_split_zone_append(bio, lim, nr_segs);
case REQ_OP_DISCARD:
+ if (!lim->max_discard_sectors)
+ goto not_supported;
+ return bio_split_discard(bio, lim, nr_segs);
case REQ_OP_SECURE_ERASE:
+ if (!lim->max_secure_erase_sectors)
+ goto not_supported;
return bio_split_discard(bio, lim, nr_segs);
case REQ_OP_WRITE_ZEROES:
+ if (!lim->max_write_zeroes_sectors)
+ goto not_supported;
return bio_split_write_zeroes(bio, lim, nr_segs);
- default:
- /* other operations can't be split */
+ case REQ_OP_ZONE_RESET:
+ case REQ_OP_ZONE_OPEN:
+ case REQ_OP_ZONE_CLOSE:
+ case REQ_OP_ZONE_FINISH:
+ case REQ_OP_ZONE_RESET_ALL:
+ if (!(lim->features & BLK_FEAT_ZONED))
+ goto not_supported;
*nr_segs = 0;
return bio;
+ default:
+ WARN_ON_ONCE(1);
+ goto not_supported;
}
+
+invalid:
+ bio->bi_status = BLK_STS_INVAL;
+ bio_endio(bio);
+ return NULL;
+not_supported:
+ bio->bi_status = BLK_STS_NOTSUPP;
+ bio_endio(bio);
+ return NULL;
ioerr:
bio_io_error(bio);
return NULL;
--
2.53.0-Meta
More information about the Linux-nvme
mailing list