[PATCH v2 1/2] blk-mq: add tagset quiesce interface
Chao Leng
lengchao at huawei.com
Thu Oct 13 19:09:47 PDT 2022
On 2022/10/13 18:28, Sagi Grimberg wrote:
>
>
> On 10/13/22 12:44, Chao Leng wrote:
>> Drivers that have shared tagsets may need to quiesce potentially a lot
>> of request queues that all share a single tagset (e.g. nvme). Add an interface
>> to quiesce all the queues on a given tagset. This interface is useful because
>> it can speedup the quiesce by doing it in parallel.
>>
>> For tagsets that have BLK_MQ_F_BLOCKING set, we use call_srcu to all hctxs
>> in parallel such that all of them wait for the same rcu elapsed period with
>> a per-hctx heap allocated rcu_synchronize. for tagsets that don't have
>> BLK_MQ_F_BLOCKING set, we simply call a single synchronize_rcu as this is
>> sufficient.
>>
>> Because some queues never need to be quiesced(e.g. nvme connect_q).
>> So introduce QUEUE_FLAG_NOQUIESCED to tagset quiesce interface to
>> skip the queue.
>
> I wouldn't say it never nor will ever quiesce, we just don't happen to
> quiesce it today...
>
>>
>> Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
>> Signed-off-by: Chao Leng <lengchao at huawei.com>
>> ---
>> block/blk-mq.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++
>> include/linux/blk-mq.h | 2 ++
>> include/linux/blkdev.h | 2 ++
>> 3 files changed, 79 insertions(+)
>>
>> diff --git a/block/blk-mq.c b/block/blk-mq.c
>> index 8070b6c10e8d..ebe25da08156 100644
>> --- a/block/blk-mq.c
>> +++ b/block/blk-mq.c
>> @@ -29,6 +29,7 @@
>> #include <linux/prefetch.h>
>> #include <linux/blk-crypto.h>
>> #include <linux/part_stat.h>
>> +#include <linux/rcupdate_wait.h>
>> #include <trace/events/block.h>
>> @@ -311,6 +312,80 @@ void blk_mq_unquiesce_queue(struct request_queue *q)
>> }
>> EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
>> +static void blk_mq_quiesce_blocking_tagset(struct blk_mq_tag_set *set)
>> +{
>> + int i = 0;
>> + int count = 0;
>> + struct request_queue *q;
>> + struct rcu_synchronize *rcu;
>> +
>> + list_for_each_entry(q, &set->tag_list, tag_set_list) {
>> + if (blk_queue_noquiesced(q))
>> + continue;
>> +
>> + blk_mq_quiesce_queue_nowait(q);
>> + count++;
>> + }
>> +
>> + rcu = kvmalloc(count * sizeof(*rcu), GFP_KERNEL);
>> + if (rcu) {
>> + list_for_each_entry(q, &set->tag_list, tag_set_list) {
>> + if (blk_queue_noquiesced(q))
>> + continue;
>> +
>> + init_rcu_head(&rcu[i].head);
>> + init_completion(&rcu[i].completion);
>> + call_srcu(q->srcu, &rcu[i].head, wakeme_after_rcu);
>> + i++;
>> + }
>> +
>> + for (i = 0; i < count; i++) {
>> + wait_for_completion(&rcu[i].completion);
>> + destroy_rcu_head(&rcu[i].head);
>> + }
>> + kvfree(rcu);
>> + } else {
>> + list_for_each_entry(q, &set->tag_list, tag_set_list)
>> + synchronize_srcu(q->srcu);
>> + }
>> +}
>> +
>> +static void blk_mq_quiesce_nonblocking_tagset(struct blk_mq_tag_set *set)
>> +{
>> + struct request_queue *q;
>> +
>> + list_for_each_entry(q, &set->tag_list, tag_set_list) {
>> + if (blk_queue_noquiesced(q))
>> + continue;
>> +
>> + blk_mq_quiesce_queue_nowait(q);
>> + }
>> + synchronize_rcu();
>> +}
>> +
>> +void blk_mq_quiesce_tagset(struct blk_mq_tag_set *set)
>> +{
>> + mutex_lock(&set->tag_list_lock);
>> + if (set->flags & BLK_MQ_F_BLOCKING)
>> + blk_mq_quiesce_blocking_tagset(set);
>> + else
>> + blk_mq_quiesce_nonblocking_tagset(set);
>> +
>> + mutex_unlock(&set->tag_list_lock);
>> +}
>> +EXPORT_SYMBOL_GPL(blk_mq_quiesce_tagset);
>> +
>> +void blk_mq_unquiesce_tagset(struct blk_mq_tag_set *set)
>> +{
>> + struct request_queue *q;
>> +
>> + mutex_lock(&set->tag_list_lock);
>> + list_for_each_entry(q, &set->tag_list, tag_set_list)
>> + blk_mq_unquiesce_queue(q);
>> + mutex_unlock(&set->tag_list_lock);
>> +}
>> +EXPORT_SYMBOL_GPL(blk_mq_unquiesce_tagset);
>> +
>> void blk_mq_wake_waiters(struct request_queue *q)
>> {
>> struct blk_mq_hw_ctx *hctx;
>> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
>> index ba18e9bdb799..1df47606d0a7 100644
>> --- a/include/linux/blk-mq.h
>> +++ b/include/linux/blk-mq.h
>> @@ -877,6 +877,8 @@ void blk_mq_start_hw_queues(struct request_queue *q);
>> void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
>> void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
>> void blk_mq_quiesce_queue(struct request_queue *q);
>> +void blk_mq_quiesce_tagset(struct blk_mq_tag_set *set);
>> +void blk_mq_unquiesce_tagset(struct blk_mq_tag_set *set);
>> void blk_mq_wait_quiesce_done(struct request_queue *q);
>> void blk_mq_unquiesce_queue(struct request_queue *q);
>> void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index 50e358a19d98..f15544299a67 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -579,6 +579,7 @@ struct request_queue {
>> #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */
>> #define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */
>> #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */
>> +#define QUEUE_FLAG_NOQUIESCED 31 /* queue is never quiesced */
>
> the comment is misleading. If this is truely queue that is never
> quiescing then blk_mq_quiesce_queue() and friends need to skip it.
>
> I'd call it self_quiesce or something that would reflect that it is
> black-listed from tagset-wide quiesce.
Yes, you are right. I will modify the comment in patch V3.
> .
More information about the Linux-nvme
mailing list