[PATCH v3 1/2] blk-mq: add async quiesce interface

Sun Jul 26 23:33:43 EDT 2020

On 2020/7/27 10:08, Ming Lei wrote:
>> It is at the end and contains exactly what is needed to synchronize. Not
> The sync is simply single global synchronize_rcu(), and why bother to add
> extra >=40bytes for each hctx.
> 
>> sure what you mean by reuse hctx->srcu?
> You already reuses hctx->srcu, but not see reason to add extra rcu_synchronize
> to each hctx for just simulating one single synchronize_rcu().

To sync srcu together, the extra bytes must be needed, seperate blocking
and non blocking queue to two hctx may be a not good choice.

There is two choice: the struct rcu_synchronize is added in hctx or in srcu.
Though add rcu_synchronize in srcu has a  weakness: the extra bytes is
not need if which do not need batch sync srcu, I still think it's better
for the SRCU to provide the batch synchronization interface.

We can add check ctrl->tagset->flags to provide same interface both for
blocking and non blocking queue. The code for TINY_SRCU:

---
  block/blk-mq.c           | 29 +++++++++++++++++++++++++++++
  drivers/nvme/host/core.c |  9 ++++++++-
  include/linux/blk-mq.h   |  2 ++
  include/linux/srcu.h     |  2 ++
  include/linux/srcutiny.h |  1 +
  kernel/rcu/srcutiny.c    | 16 ++++++++++++++++
  6 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4e0d173beaa3..3117fc3082ff 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -235,6 +235,35 @@ void blk_mq_quiesce_queue(struct request_queue *q)
  }
  EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);

+void blk_mq_quiesce_queue_async(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	unsigned int i;
+
+	blk_mq_quiesce_queue_nowait(q);
+
+	queue_for_each_hw_ctx(q, hctx, i)
+		if (hctx->flags & BLK_MQ_F_BLOCKING)
+			synchronize_srcu_async(hctx->srcu);
+}
+EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_async);
+
+void blk_mq_quiesce_queue_async_wait(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	unsigned int i;
+
+	if (q == NULL) {
+		synchronize_rcu();
+		return;
+	}
+
+	queue_for_each_hw_ctx(q, hctx, i)
+		if (hctx->flags & BLK_MQ_F_BLOCKING)
+			synchronize_srcu_async_wait(hctx->srcu);
+}
+EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_async_wait);
+
  /*
   * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
   * @q: request queue.
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index a3b1157561f5..f13aa447ab64 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4322,7 +4322,14 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl)

  	down_read(&ctrl->namespaces_rwsem);
  	list_for_each_entry(ns, &ctrl->namespaces, list)
-		blk_mq_quiesce_queue(ns->queue);
+		blk_mq_quiesce_queue_async(ns->queue);
+
+	if (ctrl->tagset->flags & BLK_MQ_F_BLOCKING) {
+		list_for_each_entry(ns, &ctrl->namespaces, list)
+			blk_mq_quiesce_queue_async_wait(ns->queue);
+	} else {
+		blk_mq_quiesce_queue_async_wait(NULL);
+	}
  	up_read(&ctrl->namespaces_rwsem);
  }
  EXPORT_SYMBOL_GPL(nvme_stop_queues);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index d6fcae17da5a..092470c63558 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -515,6 +515,8 @@ void blk_mq_start_hw_queues(struct request_queue *q);
  void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
  void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
  void blk_mq_quiesce_queue(struct request_queue *q);
+void blk_mq_quiesce_queue_async(struct request_queue *q);
+void blk_mq_quiesce_queue_async_wait(struct request_queue *q);
  void blk_mq_unquiesce_queue(struct request_queue *q);
  void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
  void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index e432cc92c73d..7e006e51ccf9 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -60,6 +60,8 @@ void cleanup_srcu_struct(struct srcu_struct *ssp);
  int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp);
  void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
  void synchronize_srcu(struct srcu_struct *ssp);
+void synchronize_srcu_async(struct srcu_struct *ssp);
+void synchronize_srcu_async_wait(struct srcu_struct *ssp);

  #ifdef CONFIG_DEBUG_LOCK_ALLOC

diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 5a5a1941ca15..3d7d871bef61 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -23,6 +23,7 @@ struct srcu_struct {
  	struct rcu_head *srcu_cb_head;	/* Pending callbacks: Head. */
  	struct rcu_head **srcu_cb_tail;	/* Pending callbacks: Tail. */
  	struct work_struct srcu_work;	/* For driving grace periods. */
+	struct rcu_synchronize rcu_sync;
  #ifdef CONFIG_DEBUG_LOCK_ALLOC
  	struct lockdep_map dep_map;
  #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 6208c1dae5c9..6e1468175a45 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -190,6 +190,22 @@ void synchronize_srcu(struct srcu_struct *ssp)
  }
  EXPORT_SYMBOL_GPL(synchronize_srcu);

+void synchronize_srcu_async(struct srcu_struct *ssp)
+{
+	init_rcu_head(&ssp->rcu_sync.head);
+	init_completion(&ssp->rcu_sync.completion);
+	call_srcu(ssp, &ssp->rcu_sync.head, wakeme_after_rcu_batch);
+
+}
+EXPORT_SYMBOL_GPL(synchronize_srcu_async);
+
+void synchronize_srcu_async_wait(struct srcu_struct *ssp)
+{
+	wait_for_completion(&ssp->rcu_sync.completion);
+	destroy_rcu_head(&ssp->rcu_sync.head);
+}
+EXPORT_SYMBOL_GPL(synchronize_srcu_async_wait);
+
  /* Lockdep diagnostics.  */
  void __init rcu_scheduler_starting(void)
  {
-- 
2.16.4