[dm-devel] [RFC PATCH 2/2] block: support to freeze bio based request queue
JeffleXu
jefflexu at linux.alibaba.com
Mon Apr 19 13:05:46 BST 2021
On 4/15/21 6:33 PM, Ming Lei wrote:
> For bio based request queue, the queue usage refcnt is only grabbed
> during submission, which isn't consistent with request base queue.
>
> Queue freezing has been used widely, and turns out it is very useful
> to quiesce queue activity.
>
> Support to freeze bio based request queue by the following approach:
>
> 1) grab two queue usage refcount for blk-mq before submitting blk-mq
> bio, one is for bio, anther is for request;
Hi, I can't understand the sense of grabbing two refcounts on the
@q_usage_count of the underlying blk-mq device, while
@q_usage_count of the MD/DM device is kept untouched.
In the following calling stack
```
queue_poll_store
blk_mq_freeze_queue(q)
```
Is the input @q still the request queue of MD/DM device?
>
> 2) add bio flag of BIO_QUEUE_REFFED for making sure that only one
> refcnt is grabbed for each bio, so we can put the refcnt when the
> bio is going away
>
> 3) nvme mpath is a bit special, because same bio is used for both
> mpath queue and underlying nvme queue. So we put the mpath queue's
> usage refcnt before completing the nvme request.
>
> Cc: Christoph Hellwig <hch at lst.de>
> Cc: Bart Van Assche <bvanassche at acm.org>
> Signed-off-by: Ming Lei <ming.lei at redhat.com>
> ---
> block/bio.c | 12 ++++++++++--
> block/blk-core.c | 23 +++++++++++++++++------
> drivers/nvme/host/core.c | 16 ++++++++++++++++
> drivers/nvme/host/multipath.c | 6 ++++++
> include/linux/blk-mq.h | 2 ++
> include/linux/blk_types.h | 1 +
> include/linux/blkdev.h | 7 ++++++-
> 7 files changed, 58 insertions(+), 9 deletions(-)
>
> diff --git a/block/bio.c b/block/bio.c
> index 303298996afe..941a306e390b 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -1365,14 +1365,18 @@ static inline bool bio_remaining_done(struct bio *bio)
> **/
> void bio_endio(struct bio *bio)
> {
> + struct block_device *bdev;
> + bool put_queue;
> again:
> + bdev = bio->bi_bdev;
> + put_queue = bio_flagged(bio, BIO_QUEUE_REFFED);
> if (!bio_remaining_done(bio))
> return;
> if (!bio_integrity_endio(bio))
> return;
>
> - if (bio->bi_bdev)
> - rq_qos_done_bio(bio->bi_bdev->bd_disk->queue, bio);
> + if (bdev)
> + rq_qos_done_bio(bdev->bd_disk->queue, bio);
>
> /*
> * Need to have a real endio function for chained bios, otherwise
> @@ -1384,6 +1388,8 @@ void bio_endio(struct bio *bio)
> */
> if (bio->bi_end_io == bio_chain_endio) {
> bio = __bio_chain_endio(bio);
> + if (bdev && put_queue)
> + blk_queue_exit(bdev->bd_disk->queue);
> goto again;
> }
>
> @@ -1397,6 +1403,8 @@ void bio_endio(struct bio *bio)
> bio_uninit(bio);
> if (bio->bi_end_io)
> bio->bi_end_io(bio);
> + if (bdev && put_queue)
> + blk_queue_exit(bdev->bd_disk->queue);
> }
> EXPORT_SYMBOL(bio_endio);
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 09f774e7413d..f71e4b433030 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -431,12 +431,13 @@ EXPORT_SYMBOL(blk_cleanup_queue);
> int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
> {
> const bool pm = flags & BLK_MQ_REQ_PM;
> + const unsigned int nr = (flags & BLK_MQ_REQ_DOUBLE_REF) ? 2 : 1;
>
> while (true) {
> bool success = false;
>
> rcu_read_lock();
> - if (percpu_ref_tryget_live(&q->q_usage_counter)) {
> + if (percpu_ref_tryget_many_live(&q->q_usage_counter, nr)) {
> /*
> * The code that increments the pm_only counter is
> * responsible for ensuring that that counter is
> @@ -446,7 +447,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
> !blk_queue_pm_only(q)) {
> success = true;
> } else {
> - percpu_ref_put(&q->q_usage_counter);
> + percpu_ref_put_many(&q->q_usage_counter, nr);
> }
> }
> rcu_read_unlock();
> @@ -480,8 +481,18 @@ static inline int bio_queue_enter(struct bio *bio)
> struct request_queue *q = bio->bi_bdev->bd_disk->queue;
> bool nowait = bio->bi_opf & REQ_NOWAIT;
> int ret;
> + blk_mq_req_flags_t flags = nowait ? BLK_MQ_REQ_NOWAIT : 0;
> + bool reffed = bio_flagged(bio, BIO_QUEUE_REFFED);
>
> - ret = blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0);
> + if (!reffed)
> + bio_set_flag(bio, BIO_QUEUE_REFFED);
> +
> + /*
> + * Grab two queue references for blk-mq, one is for bio, and
> + * another is for blk-mq request.
> + */
> + ret = blk_queue_enter(q, q->mq_ops && !reffed ?
> + (flags | BLK_MQ_REQ_DOUBLE_REF) : flags);
> if (unlikely(ret)) {
> if (nowait && !blk_queue_dying(q))
> bio_wouldblock_error(bio);
> @@ -492,10 +503,11 @@ static inline int bio_queue_enter(struct bio *bio)
> return ret;
> }
>
> -void blk_queue_exit(struct request_queue *q)
> +void __blk_queue_exit(struct request_queue *q, unsigned int nr)
> {
> - percpu_ref_put(&q->q_usage_counter);
> + percpu_ref_put_many(&q->q_usage_counter, nr);
> }
> +EXPORT_SYMBOL_GPL(__blk_queue_exit);
>
> static void blk_queue_usage_counter_release(struct percpu_ref *ref)
> {
> @@ -920,7 +932,6 @@ static blk_qc_t __submit_bio(struct bio *bio)
> return blk_mq_submit_bio(bio);
> ret = disk->fops->submit_bio(bio);
> }
> - blk_queue_exit(disk->queue);
> return ret;
> }
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 34b8c78f88e0..791638a7164b 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -323,14 +323,30 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req)
> static inline void nvme_end_req(struct request *req)
> {
> blk_status_t status = nvme_error_status(nvme_req(req)->status);
> + const bool mpath = req->cmd_flags & REQ_NVME_MPATH;
> + unsigned int nr = 0;
> + struct bio *bio;
> + struct nvme_ns *ns;
>
> if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
> req_op(req) == REQ_OP_ZONE_APPEND)
> req->__sector = nvme_lba_to_sect(req->q->queuedata,
> le64_to_cpu(nvme_req(req)->result.u64));
>
> + if (mpath) {
> + ns = req->q->queuedata;
> + __rq_for_each_bio(bio, req)
> + nr++;
> + }
> nvme_trace_bio_complete(req);
> blk_mq_end_request(req, status);
> +
> + /*
> + * We changed multipath bio->bi_bdev, so have to drop the queue
> + * reference manually
> + */
> + if (mpath && nr)
> + __blk_queue_exit(ns->head->disk->queue, nr);
> }
>
> void nvme_complete_rq(struct request *req)
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index a1d476e1ac02..017487c835fb 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -312,6 +312,12 @@ blk_qc_t nvme_ns_head_submit_bio(struct bio *bio)
> srcu_idx = srcu_read_lock(&head->srcu);
> ns = nvme_find_path(head);
> if (likely(ns)) {
> + /*
> + * this bio's ownership is transferred to underlying queue, so
> + * clear the queue reffed flag and let underlying queue to put
> + * the multipath queue for us.
> + */
> + bio_clear_flag(bio, BIO_QUEUE_REFFED);
> bio_set_dev(bio, ns->disk->part0);
> bio->bi_opf |= REQ_NVME_MPATH;
> trace_block_bio_remap(bio, disk_devt(ns->head->disk),
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index 2c473c9b8990..b96ac162e703 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -445,6 +445,8 @@ enum {
> BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1),
> /* set RQF_PM */
> BLK_MQ_REQ_PM = (__force blk_mq_req_flags_t)(1 << 2),
> + /* double queue reference */
> + BLK_MQ_REQ_DOUBLE_REF = (__force blk_mq_req_flags_t)(1 << 3),
> };
>
> struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
> index 57099b37ef3a..e7f7d67198cc 100644
> --- a/include/linux/blk_types.h
> +++ b/include/linux/blk_types.h
> @@ -305,6 +305,7 @@ enum {
> BIO_CGROUP_ACCT, /* has been accounted to a cgroup */
> BIO_TRACKED, /* set if bio goes through the rq_qos path */
> BIO_REMAPPED,
> + BIO_QUEUE_REFFED, /* need to put queue refcnt */
> BIO_FLAG_LAST
> };
>
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 62944d06a80f..6ad09b2ff2d1 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -925,7 +925,7 @@ extern int get_sg_io_hdr(struct sg_io_hdr *hdr, const void __user *argp);
> extern int put_sg_io_hdr(const struct sg_io_hdr *hdr, void __user *argp);
>
> extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags);
> -extern void blk_queue_exit(struct request_queue *q);
> +extern void __blk_queue_exit(struct request_queue *q, unsigned int nr);
> extern void blk_sync_queue(struct request_queue *q);
> extern int blk_rq_map_user(struct request_queue *, struct request *,
> struct rq_map_data *, void __user *, unsigned long,
> @@ -947,6 +947,11 @@ blk_status_t errno_to_blk_status(int errno);
>
> int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin);
>
> +static inline void blk_queue_exit(struct request_queue *q)
> +{
> + __blk_queue_exit(q, 1);
> +}
> +
> static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
> {
> return bdev->bd_disk->queue; /* this is never NULL */
>
--
Thanks,
Jeffle
More information about the Linux-nvme
mailing list