[PATCH] block: always allocate integrity buffer

Keith Busch kbusch at meta.com
Wed May 7 08:37:59 PDT 2025


From: Keith Busch <kbusch at kernel.org>

The integrity buffer is mandatory for nvme formats that have metadata
whether or not you want it generated or verified. The block integrity
attributes read_verify and write_generate had been stopping the metadata
buffer from being allocated and attached to the bio entirely. We only
want to suppress the protection checks on the device and host, but we
still need the buffer.

Otherwise, reads and writes will just get IO errors and this nvme warning:

 ------------[ cut here ]------------
 WARNING: CPU: 1 PID: 371 at drivers/nvme/host/core.c:1036 nvme_setup_rw+0x122/0x210
 ...
 RIP: 0010:nvme_setup_rw+0x122/0x210
 ...
 Call Trace:
  <TASK>
  nvme_setup_cmd+0x1b4/0x280
  nvme_queue_rqs+0xc4/0x1f0 [nvme]
  blk_mq_dispatch_queue_requests+0x24a/0x430
  blk_mq_flush_plug_list+0x50/0x140
  __blk_flush_plug+0xc1/0x100
  __submit_bio+0x1c1/0x360
  ? submit_bio_noacct_nocheck+0x2d6/0x3c0
  submit_bio_noacct_nocheck+0x2d6/0x3c0
  ? submit_bio_noacct+0x47/0x4c0
  submit_bio_wait+0x48/0xa0
  __blkdev_direct_IO_simple+0xee/0x210
  ? current_time+0x1d/0x100
  ? current_time+0x1d/0x100
  ? __bio_clone+0xb0/0xb0
  blkdev_read_iter+0xbb/0x140
  vfs_read+0x239/0x310
  ksys_read+0x58/0xc0
  do_syscall_64+0x6c/0x180
  entry_SYSCALL_64_after_hwframe+0x4b/0x53

Signed-off-by: Keith Busch <kbusch at kernel.org>
---
 block/bio-integrity-auto.c | 31 +++++++++++++++++++------------
 block/t10-pi.c             |  5 +++++
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/block/bio-integrity-auto.c b/block/bio-integrity-auto.c
index e524c609be506..301406671f3b6 100644
--- a/block/bio-integrity-auto.c
+++ b/block/bio-integrity-auto.c
@@ -69,6 +69,16 @@ bool __bio_integrity_endio(struct bio *bio)
 	return true;
 }
 
+static inline void bio_set_bip_flags(struct blk_integrity *bi, u16 *bip_flags)
+{
+	if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
+		*bip_flags |= BIP_IP_CHECKSUM;
+	if (bi->csum_type)
+		*bip_flags |= BIP_CHECK_GUARD;
+	if (bi->flags & BLK_INTEGRITY_REF_TAG)
+		*bip_flags |= BIP_CHECK_REFTAG;
+}
+
 /**
  * bio_integrity_prep - Prepare bio for integrity I/O
  * @bio:	bio to prepare
@@ -83,6 +93,7 @@ bool __bio_integrity_endio(struct bio *bio)
 bool bio_integrity_prep(struct bio *bio)
 {
 	struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+	unsigned short bip_flags = BIP_BLOCK_INTEGRITY;
 	struct bio_integrity_data *bid;
 	gfp_t gfp = GFP_NOIO;
 	unsigned int len;
@@ -101,19 +112,22 @@ bool bio_integrity_prep(struct bio *bio)
 	switch (bio_op(bio)) {
 	case REQ_OP_READ:
 		if (bi->flags & BLK_INTEGRITY_NOVERIFY)
-			return true;
+			break;
+		bio_set_bip_flags(bi, &bip_flags);
 		break;
 	case REQ_OP_WRITE:
-		if (bi->flags & BLK_INTEGRITY_NOGENERATE)
-			return true;
-
 		/*
 		 * Zero the memory allocated to not leak uninitialized kernel
 		 * memory to disk for non-integrity metadata where nothing else
 		 * initializes the memory.
 		 */
+		if (bi->flags & BLK_INTEGRITY_NOGENERATE) {
+			gfp |= __GFP_ZERO;
+			break;
+		}
 		if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
 			gfp |= __GFP_ZERO;
+		bio_set_bip_flags(bi, &bip_flags);
 		break;
 	default:
 		return true;
@@ -134,16 +148,9 @@ bool bio_integrity_prep(struct bio *bio)
 
 	bid->bio = bio;
 
-	bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY;
+	bid->bip.bip_flags = bip_flags;
 	bip_set_seed(&bid->bip, bio->bi_iter.bi_sector);
 
-	if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
-		bid->bip.bip_flags |= BIP_IP_CHECKSUM;
-	if (bi->csum_type)
-		bid->bip.bip_flags |= BIP_CHECK_GUARD;
-	if (bi->flags & BLK_INTEGRITY_REF_TAG)
-		bid->bip.bip_flags |= BIP_CHECK_REFTAG;
-
 	if (bio_integrity_add_page(bio, virt_to_page(buf), len,
 			offset_in_page(buf)) < len)
 		goto err_end_io;
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 851db518ee5e8..41c863c927cf4 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -380,6 +380,9 @@ void blk_integrity_generate(struct bio *bio)
 	struct bvec_iter bviter;
 	struct bio_vec bv;
 
+	if (bi->flags & BLK_INTEGRITY_NOGENERATE)
+		return;
+
 	iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
 	iter.interval = 1 << bi->interval_exp;
 	iter.seed = bio->bi_iter.bi_sector;
@@ -412,6 +415,8 @@ void blk_integrity_verify_iter(struct bio *bio, struct bvec_iter *saved_iter)
 	struct bvec_iter bviter;
 	struct bio_vec bv;
 
+	if (bi->flags & BLK_INTEGRITY_NOVERIFY)
+		return;
 	/*
 	 * At the moment verify is called bi_iter has been advanced during split
 	 * and completion, so use the copy created during submission here.
-- 
2.47.1




More information about the Linux-nvme mailing list