[PATCH -next RFC] block: count 'ios' and 'sectors' when io is done for bio-based device

Yu Kuai yukuai1 at huaweicloud.com
Mon Feb 27 17:01:27 PST 2023


Hi,

friendly ping ...

Thanks,
Kuai

在 2023/02/23 17:12, Yu Kuai 写道:
> From: Yu Kuai <yukuai3 at huawei.com>
> 
> While using iostat for raid, I observed very strange 'await'
> occasionally, and turns out it's due to that 'ios' and 'sectors' is
> counted in bdev_start_io_acct(), while 'nsecs' is counted in
> bdev_end_io_acct(). I'm not sure why they are ccounted like that
> but I think this behaviour is obviously wrong because user will get
> wrong disk stats.
> 
> Fix the problem by counting 'ios' and 'sectors' when io is done, like
> what rq-based device does.
> 
> Fixes: 394ffa503bc4 ("blk: introduce generic io stat accounting help function")
> Signed-off-by: Yu Kuai <yukuai3 at huawei.com>
> ---
>   block/blk-core.c              | 16 ++++++----------
>   drivers/md/dm.c               |  6 +++---
>   drivers/nvme/host/multipath.c |  8 ++++----
>   include/linux/blkdev.h        |  5 ++---
>   4 files changed, 15 insertions(+), 20 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 82b5b2c53f1e..fe1d320f5f07 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -953,16 +953,11 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end)
>   	}
>   }
>   
> -unsigned long bdev_start_io_acct(struct block_device *bdev,
> -				 unsigned int sectors, enum req_op op,
> +unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
>   				 unsigned long start_time)
>   {
> -	const int sgrp = op_stat_group(op);
> -
>   	part_stat_lock();
>   	update_io_ticks(bdev, start_time, false);
> -	part_stat_inc(bdev, ios[sgrp]);
> -	part_stat_add(bdev, sectors[sgrp], sectors);
>   	part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
>   	part_stat_unlock();
>   
> @@ -978,13 +973,12 @@ EXPORT_SYMBOL(bdev_start_io_acct);
>    */
>   unsigned long bio_start_io_acct(struct bio *bio)
>   {
> -	return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
> -				  bio_op(bio), jiffies);
> +	return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies);
>   }
>   EXPORT_SYMBOL_GPL(bio_start_io_acct);
>   
>   void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
> -		      unsigned long start_time)
> +		      unsigned int sectors, unsigned long start_time)
>   {
>   	const int sgrp = op_stat_group(op);
>   	unsigned long now = READ_ONCE(jiffies);
> @@ -992,6 +986,8 @@ void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
>   
>   	part_stat_lock();
>   	update_io_ticks(bdev, now, true);
> +	part_stat_inc(bdev, ios[sgrp]);
> +	part_stat_add(bdev, sectors[sgrp], sectors);
>   	part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
>   	part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
>   	part_stat_unlock();
> @@ -1001,7 +997,7 @@ EXPORT_SYMBOL(bdev_end_io_acct);
>   void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
>   			      struct block_device *orig_bdev)
>   {
> -	bdev_end_io_acct(orig_bdev, bio_op(bio), start_time);
> +	bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), start_time);
>   }
>   EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
>   
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index eace45a18d45..f5cc330bb549 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -512,10 +512,10 @@ static void dm_io_acct(struct dm_io *io, bool end)
>   		sectors = io->sectors;
>   
>   	if (!end)
> -		bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio),
> -				   start_time);
> +		bdev_start_io_acct(bio->bi_bdev, bio_op(bio), start_time);
>   	else
> -		bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time);
> +		bdev_end_io_acct(bio->bi_bdev, bio_op(bio), sectors,
> +				 start_time);
>   
>   	if (static_branch_unlikely(&stats_enabled) &&
>   	    unlikely(dm_stats_used(&md->stats))) {
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index fc39d01e7b63..9171452e2f6d 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -123,9 +123,8 @@ void nvme_mpath_start_request(struct request *rq)
>   		return;
>   
>   	nvme_req(rq)->flags |= NVME_MPATH_IO_STATS;
> -	nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0,
> -					blk_rq_bytes(rq) >> SECTOR_SHIFT,
> -					req_op(rq), jiffies);
> +	nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, req_op(rq),
> +						      jiffies);
>   }
>   EXPORT_SYMBOL_GPL(nvme_mpath_start_request);
>   
> @@ -136,7 +135,8 @@ void nvme_mpath_end_request(struct request *rq)
>   	if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
>   		return;
>   	bdev_end_io_acct(ns->head->disk->part0, req_op(rq),
> -		nvme_req(rq)->start_time);
> +			 blk_rq_bytes(rq) >> SECTOR_SHIFT,
> +			 nvme_req(rq)->start_time);
>   }
>   
>   void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index d1aee08f8c18..941304f17492 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -1446,11 +1446,10 @@ static inline void blk_wake_io_task(struct task_struct *waiter)
>   		wake_up_process(waiter);
>   }
>   
> -unsigned long bdev_start_io_acct(struct block_device *bdev,
> -				 unsigned int sectors, enum req_op op,
> +unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
>   				 unsigned long start_time);
>   void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
> -		unsigned long start_time);
> +		      unsigned int sectors, unsigned long start_time);
>   
>   unsigned long bio_start_io_acct(struct bio *bio);
>   void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
> 




More information about the Linux-nvme mailing list