[PATCH -next RFC] block: count 'ios' and 'sectors' when io is done for bio-based device
Yu Kuai
yukuai1 at huaweicloud.com
Mon Feb 27 17:01:27 PST 2023
Hi,
friendly ping ...
Thanks,
Kuai
在 2023/02/23 17:12, Yu Kuai 写道:
> From: Yu Kuai <yukuai3 at huawei.com>
>
> While using iostat for raid, I observed very strange 'await'
> occasionally, and turns out it's due to that 'ios' and 'sectors' is
> counted in bdev_start_io_acct(), while 'nsecs' is counted in
> bdev_end_io_acct(). I'm not sure why they are ccounted like that
> but I think this behaviour is obviously wrong because user will get
> wrong disk stats.
>
> Fix the problem by counting 'ios' and 'sectors' when io is done, like
> what rq-based device does.
>
> Fixes: 394ffa503bc4 ("blk: introduce generic io stat accounting help function")
> Signed-off-by: Yu Kuai <yukuai3 at huawei.com>
> ---
> block/blk-core.c | 16 ++++++----------
> drivers/md/dm.c | 6 +++---
> drivers/nvme/host/multipath.c | 8 ++++----
> include/linux/blkdev.h | 5 ++---
> 4 files changed, 15 insertions(+), 20 deletions(-)
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 82b5b2c53f1e..fe1d320f5f07 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -953,16 +953,11 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end)
> }
> }
>
> -unsigned long bdev_start_io_acct(struct block_device *bdev,
> - unsigned int sectors, enum req_op op,
> +unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
> unsigned long start_time)
> {
> - const int sgrp = op_stat_group(op);
> -
> part_stat_lock();
> update_io_ticks(bdev, start_time, false);
> - part_stat_inc(bdev, ios[sgrp]);
> - part_stat_add(bdev, sectors[sgrp], sectors);
> part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
> part_stat_unlock();
>
> @@ -978,13 +973,12 @@ EXPORT_SYMBOL(bdev_start_io_acct);
> */
> unsigned long bio_start_io_acct(struct bio *bio)
> {
> - return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
> - bio_op(bio), jiffies);
> + return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies);
> }
> EXPORT_SYMBOL_GPL(bio_start_io_acct);
>
> void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
> - unsigned long start_time)
> + unsigned int sectors, unsigned long start_time)
> {
> const int sgrp = op_stat_group(op);
> unsigned long now = READ_ONCE(jiffies);
> @@ -992,6 +986,8 @@ void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
>
> part_stat_lock();
> update_io_ticks(bdev, now, true);
> + part_stat_inc(bdev, ios[sgrp]);
> + part_stat_add(bdev, sectors[sgrp], sectors);
> part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
> part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
> part_stat_unlock();
> @@ -1001,7 +997,7 @@ EXPORT_SYMBOL(bdev_end_io_acct);
> void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
> struct block_device *orig_bdev)
> {
> - bdev_end_io_acct(orig_bdev, bio_op(bio), start_time);
> + bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), start_time);
> }
> EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
>
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index eace45a18d45..f5cc330bb549 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -512,10 +512,10 @@ static void dm_io_acct(struct dm_io *io, bool end)
> sectors = io->sectors;
>
> if (!end)
> - bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio),
> - start_time);
> + bdev_start_io_acct(bio->bi_bdev, bio_op(bio), start_time);
> else
> - bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time);
> + bdev_end_io_acct(bio->bi_bdev, bio_op(bio), sectors,
> + start_time);
>
> if (static_branch_unlikely(&stats_enabled) &&
> unlikely(dm_stats_used(&md->stats))) {
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index fc39d01e7b63..9171452e2f6d 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -123,9 +123,8 @@ void nvme_mpath_start_request(struct request *rq)
> return;
>
> nvme_req(rq)->flags |= NVME_MPATH_IO_STATS;
> - nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0,
> - blk_rq_bytes(rq) >> SECTOR_SHIFT,
> - req_op(rq), jiffies);
> + nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, req_op(rq),
> + jiffies);
> }
> EXPORT_SYMBOL_GPL(nvme_mpath_start_request);
>
> @@ -136,7 +135,8 @@ void nvme_mpath_end_request(struct request *rq)
> if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
> return;
> bdev_end_io_acct(ns->head->disk->part0, req_op(rq),
> - nvme_req(rq)->start_time);
> + blk_rq_bytes(rq) >> SECTOR_SHIFT,
> + nvme_req(rq)->start_time);
> }
>
> void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index d1aee08f8c18..941304f17492 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -1446,11 +1446,10 @@ static inline void blk_wake_io_task(struct task_struct *waiter)
> wake_up_process(waiter);
> }
>
> -unsigned long bdev_start_io_acct(struct block_device *bdev,
> - unsigned int sectors, enum req_op op,
> +unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
> unsigned long start_time);
> void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
> - unsigned long start_time);
> + unsigned int sectors, unsigned long start_time);
>
> unsigned long bio_start_io_acct(struct bio *bio);
> void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
>
More information about the Linux-nvme
mailing list