[bug report] WARNING: CPU: 3 PID: 522 at block/genhd.c:144 bdev_count_inflight_rw+0x26e/0x410

Yu Kuai yukuai1 at huaweicloud.com
Thu Jun 19 23:47:10 PDT 2025


Hi,

在 2025/06/20 12:10, Calvin Owens 写道:
> I dumped all the similar WARNs I've seen here (blk-warn-%d.txt):
> 
>      https://github.com/jcalvinowens/lkml-debug-616/tree/master

These reports also contain both request-based and bio-based disk, I
think perhaps following concurrent scenario is possible:

While bdev_count_inflight is interating all cpu, some IOs are issued
from traversed cpu and then completed from the cpu that is not traversed
yet.

cpu0
		cpu1
		bdev_count_inflight
		 //for_each_possible_cpu
		 // cpu0 is 0
		 infliht += 0
// issue a io
blk_account_io_start
// cpu0 inflight ++

				cpu2
				// the io is done
				blk_account_io_done
				// cpu2 inflight --
		 // cpu 1 is 0
		 inflight += 0
		 // cpu2 is -1
		 inflight += -1
		 ...

In this case, the total inflight will be -1.

Yi and Calvin,

Can you please help testing the following patch, it add a WARN_ON_ONCE()
using atomic operations, if the new warning is not reporduced while
the old warning is reporduced, I think it can be confirmed the above
analyze is correct, and I will send a revert for the WARN_ON_ONCE()
change in bdev_count_inflight().

Thanks,
Kuai

diff --git a/block/blk-core.c b/block/blk-core.c
index b862c66018f2..2b033caa74e8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1035,6 +1035,8 @@ unsigned long bdev_start_io_acct(struct 
block_device *bdev, enum req_op op,
         part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
         part_stat_unlock();

+       atomic_inc(&bdev->inflight[op_is_write(op)]);
+
         return start_time;
  }
  EXPORT_SYMBOL(bdev_start_io_acct);
@@ -1065,6 +1067,8 @@ void bdev_end_io_acct(struct block_device *bdev, 
enum req_op op,
         part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
         part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
         part_stat_unlock();
+
+       WARN_ON_ONCE(atomic_dec_return(&bdev->inflight[op_is_write(op)]) 
< 0);
  }
  EXPORT_SYMBOL(bdev_end_io_acct);

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 70d704615be5..ff15276d277f 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -658,6 +658,8 @@ static void blk_account_io_merge_request(struct 
request *req)
                 part_stat_local_dec(req->part,
                                     in_flight[op_is_write(req_op(req))]);
                 part_stat_unlock();
+
+ 
WARN_ON_ONCE(atomic_dec_return(&req->part->inflight[op_is_write(req_op(req))]) 
< 0);
         }
  }

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4806b867e37d..94e728ff8bb6 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1056,6 +1056,8 @@ static inline void blk_account_io_done(struct 
request *req, u64 now)
                 part_stat_local_dec(req->part,
                                     in_flight[op_is_write(req_op(req))]);
                 part_stat_unlock();
+
+ 
WARN_ON_ONCE(atomic_dec_return(&req->part->inflight[op_is_write(req_op(req))]) 
< 0);
         }
  }

@@ -1116,6 +1118,8 @@ static inline void blk_account_io_start(struct 
request *req)
         update_io_ticks(req->part, jiffies, false);
         part_stat_local_inc(req->part, 
in_flight[op_is_write(req_op(req))]);
         part_stat_unlock();
+
+       atomic_inc(&req->part->inflight[op_is_write(req_op(req))]);
  }

  static inline void __blk_mq_end_request_acct(struct request *rq, u64 now)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 3d1577f07c1c..a81110c07426 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -43,6 +43,7 @@ struct block_device {
         sector_t                bd_nr_sectors;
         struct gendisk *        bd_disk;
         struct request_queue *  bd_queue;
+       atomic_t                inflight[2];
         struct disk_stats __percpu *bd_stats;
         unsigned long           bd_stamp;
         atomic_t                __bd_flags;     // partition number + flags




More information about the Linux-nvme mailing list