[PATCH RFC 4/5] block: move bio validation into __bio_split_to_limits

Keith Busch kbusch at meta.com
Tue May 19 10:23:25 PDT 2026


From: Keith Busch <kbusch at kernel.org>

The bio checks in submit_bio_noacct() compares queue limits to determine
whether operations like discard, write zeroes, zone append, and atomic
writes are supported and valid. These checks run before
bio_queue_enter(), so they race against any driver that updates queue
limits inside a freeze window.

Move all limit-dependent operation validation from submit_bio_noacct()
into __bio_split_to_limits(), which runs after the queue usage reference
has been acquired. This ensures that all checks are properly serialized
against limit updates.

The non-limit checks (crypto, fault injection, partition remap, and
flush flag handling) remain in submit_bio_noacct() as they do not
depend on queue limits.

Signed-off-by: Keith Busch <kbusch at kernel.org>
---
 block/blk-core.c | 118 -----------------------------------------------
 block/blk.h      |  75 ++++++++++++++++++++++++++++--
 2 files changed, 72 insertions(+), 121 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index c200d0fc44fe7..8360c2b5efee5 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -519,25 +519,6 @@ static int __init fail_make_request_debugfs(void)
 late_initcall(fail_make_request_debugfs);
 #endif /* CONFIG_FAIL_MAKE_REQUEST */
 
-static inline void bio_check_ro(struct bio *bio)
-{
-	if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) {
-		if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
-			return;
-
-		if (bdev_test_flag(bio->bi_bdev, BD_RO_WARNED))
-			return;
-
-		bdev_set_flag(bio->bi_bdev, BD_RO_WARNED);
-
-		/*
-		 * Use ioctl to set underlying disk of raid/dm to read-only
-		 * will trigger this.
-		 */
-		pr_warn("Trying to write to read-only block-device %pg\n",
-			bio->bi_bdev);
-	}
-}
 
 int should_fail_bio(struct bio *bio)
 {
@@ -566,39 +547,6 @@ static int blk_partition_remap(struct bio *bio)
 	return 0;
 }
 
-/*
- * Check write append to a zoned block device.
- */
-static inline blk_status_t blk_check_zone_append(struct request_queue *q,
-						 struct bio *bio)
-{
-	int nr_sectors = bio_sectors(bio);
-
-	/* Only applicable to zoned block devices */
-	if (!bdev_is_zoned(bio->bi_bdev))
-		return BLK_STS_NOTSUPP;
-
-	/* The bio sector must point to the start of a sequential zone */
-	if (!bdev_is_zone_start(bio->bi_bdev, bio->bi_iter.bi_sector))
-		return BLK_STS_INVAL;
-
-	/*
-	 * Not allowed to cross zone boundaries. Otherwise, the BIO will be
-	 * split and could result in non-contiguous sectors being written in
-	 * different zones.
-	 */
-	if (nr_sectors > q->limits.chunk_sectors)
-		return BLK_STS_INVAL;
-
-	/* Make sure the BIO is small enough and will not get split */
-	if (nr_sectors > q->limits.max_zone_append_sectors)
-		return BLK_STS_INVAL;
-
-	bio->bi_opf |= REQ_NOMERGE;
-
-	return BLK_STS_OK;
-}
-
 static void __submit_bio(struct bio *bio)
 {
 	/* If plug is not used, add new plug here to cache nsecs time. */
@@ -731,18 +679,6 @@ void submit_bio_noacct_nocheck(struct bio *bio, bool split)
 	}
 }
 
-static blk_status_t blk_validate_atomic_write_op_size(struct request_queue *q,
-						 struct bio *bio)
-{
-	if (bio->bi_iter.bi_size > queue_atomic_write_unit_max_bytes(q))
-		return BLK_STS_INVAL;
-
-	if (bio->bi_iter.bi_size % queue_atomic_write_unit_min_bytes(q))
-		return BLK_STS_INVAL;
-
-	return BLK_STS_OK;
-}
-
 /**
  * submit_bio_noacct - re-submit a bio to the block device layer for I/O
  * @bio:  The bio describing the location in memory and on the device.
@@ -755,7 +691,6 @@ static blk_status_t blk_validate_atomic_write_op_size(struct request_queue *q,
 void submit_bio_noacct(struct bio *bio)
 {
 	struct block_device *bdev = bio->bi_bdev;
-	struct request_queue *q = bdev_get_queue(bdev);
 	blk_status_t status = BLK_STS_IOERR;
 
 	might_sleep();
@@ -776,7 +711,6 @@ void submit_bio_noacct(struct bio *bio)
 
 	if (should_fail_bio(bio))
 		goto end_io;
-	bio_check_ro(bio);
 	if (!bio_flagged(bio, BIO_REMAPPED)) {
 		if (bdev_is_partition(bdev) &&
 		    unlikely(blk_partition_remap(bio)))
@@ -800,58 +734,6 @@ void submit_bio_noacct(struct bio *bio)
 		}
 	}
 
-	switch (bio_op(bio)) {
-	case REQ_OP_READ:
-		break;
-	case REQ_OP_WRITE:
-		if (bio->bi_opf & REQ_ATOMIC) {
-			status = blk_validate_atomic_write_op_size(q, bio);
-			if (status != BLK_STS_OK)
-				goto end_io;
-		}
-		break;
-	case REQ_OP_FLUSH:
-		/*
-		 * REQ_OP_FLUSH can't be submitted through bios, it is only
-		 * synthetized in struct request by the flush state machine.
-		 */
-		goto not_supported;
-	case REQ_OP_DISCARD:
-		if (!bdev_max_discard_sectors(bdev))
-			goto not_supported;
-		break;
-	case REQ_OP_SECURE_ERASE:
-		if (!bdev_max_secure_erase_sectors(bdev))
-			goto not_supported;
-		break;
-	case REQ_OP_ZONE_APPEND:
-		status = blk_check_zone_append(q, bio);
-		if (status != BLK_STS_OK)
-			goto end_io;
-		break;
-	case REQ_OP_WRITE_ZEROES:
-		if (!q->limits.max_write_zeroes_sectors)
-			goto not_supported;
-		break;
-	case REQ_OP_ZONE_RESET:
-	case REQ_OP_ZONE_OPEN:
-	case REQ_OP_ZONE_CLOSE:
-	case REQ_OP_ZONE_FINISH:
-	case REQ_OP_ZONE_RESET_ALL:
-		if (!bdev_is_zoned(bio->bi_bdev))
-			goto not_supported;
-		break;
-	case REQ_OP_DRV_IN:
-	case REQ_OP_DRV_OUT:
-		/*
-		 * Driver private operations are only used with passthrough
-		 * requests.
-		 */
-		fallthrough;
-	default:
-		goto not_supported;
-	}
-
 	if (blk_throtl_bio(bio))
 		return;
 	submit_bio_noacct_nocheck(bio, false);
diff --git a/block/blk.h b/block/blk.h
index e70acb2d358e3..d3b897e9b5ee9 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -407,6 +407,22 @@ static inline bool bio_may_need_split(struct bio *bio,
 	return bv->bv_len + bv->bv_offset > lim->max_fast_segment_size;
 }
 
+static inline void bio_check_ro(struct bio *bio)
+{
+	if (op_is_write(bio_op(bio)) && bdev_read_only(bio->bi_bdev)) {
+		if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
+			return;
+
+		if (bdev_test_flag(bio->bi_bdev, BD_RO_WARNED))
+			return;
+
+		bdev_set_flag(bio->bi_bdev, BD_RO_WARNED);
+
+		pr_warn("Trying to write to read-only block-device %pg\n",
+			bio->bi_bdev);
+	}
+}
+
 /**
  * __bio_split_to_limits - split a bio to fit the queue limits
  * @bio:     bio to be split
@@ -423,6 +439,8 @@ static inline bool bio_may_need_split(struct bio *bio,
 static inline struct bio *__bio_split_to_limits(struct bio *bio,
 		const struct queue_limits *lim, unsigned int *nr_segs)
 {
+	bio_check_ro(bio);
+
 	if (unlikely(bio_end_sector(bio) > bdev_nr_sectors(bio->bi_bdev) +
 					   bio->bi_bdev->bd_start_sect)) {
 		pr_info_ratelimited("%s: attempt to access beyond end of device\n"
@@ -435,24 +453,75 @@ static inline struct bio *__bio_split_to_limits(struct bio *bio,
 	}
 
 	switch (bio_op(bio)) {
-	case REQ_OP_READ:
 	case REQ_OP_WRITE:
+		if (bio->bi_opf & REQ_ATOMIC) {
+			if (bio->bi_iter.bi_size > lim->atomic_write_unit_max ||
+			    bio->bi_iter.bi_size % lim->atomic_write_unit_min)
+				goto invalid;
+		}
+		fallthrough;
+	case REQ_OP_READ:
 		if (bio_may_need_split(bio, lim))
 			return bio_split_rw(bio, lim, nr_segs);
 		*nr_segs = 1;
 		return bio;
 	case REQ_OP_ZONE_APPEND:
+		/* Only applicable to zoned block devices */
+		if (!(lim->features & BLK_FEAT_ZONED))
+			goto not_supported;
+
+		/* The bio sector must point to the start of a sequential zone */
+		if (!bdev_is_zone_start(bio->bi_bdev, bio->bi_iter.bi_sector))
+			goto invalid;
+
+		/*
+		 * Not allowed to cross zone boundaries. Otherwise, the BIO
+		 * will be split and could result in non-contiguous sectors
+		 * being written in different zones.
+		 */
+		if (bio_sectors(bio) > lim->chunk_sectors)
+			goto invalid;
+
+		/* Make sure the BIO is small enough and will not get split */
+		if (bio_sectors(bio) > lim->max_zone_append_sectors)
+			goto invalid;
+
+		bio->bi_opf |= REQ_NOMERGE;
 		return bio_split_zone_append(bio, lim, nr_segs);
 	case REQ_OP_DISCARD:
+		if (!lim->max_discard_sectors)
+			goto not_supported;
+		return bio_split_discard(bio, lim, nr_segs);
 	case REQ_OP_SECURE_ERASE:
+		if (!lim->max_secure_erase_sectors)
+			goto not_supported;
 		return bio_split_discard(bio, lim, nr_segs);
 	case REQ_OP_WRITE_ZEROES:
+		if (!lim->max_write_zeroes_sectors)
+			goto not_supported;
 		return bio_split_write_zeroes(bio, lim, nr_segs);
-	default:
-		/* other operations can't be split */
+	case REQ_OP_ZONE_RESET:
+	case REQ_OP_ZONE_OPEN:
+	case REQ_OP_ZONE_CLOSE:
+	case REQ_OP_ZONE_FINISH:
+	case REQ_OP_ZONE_RESET_ALL:
+		if (!(lim->features & BLK_FEAT_ZONED))
+			goto not_supported;
 		*nr_segs = 0;
 		return bio;
+	default:
+		WARN_ON_ONCE(1);
+		goto not_supported;
 	}
+
+invalid:
+	bio->bi_status = BLK_STS_INVAL;
+	bio_endio(bio);
+	return NULL;
+not_supported:
+	bio->bi_status = BLK_STS_NOTSUPP;
+	bio_endio(bio);
+	return NULL;
 ioerr:
 	bio_io_error(bio);
 	return NULL;
-- 
2.53.0-Meta




More information about the Linux-nvme mailing list