[PATCH 3/8] Revert "blk-mq: Fix races between iterating over requests and freeing requests"
Ming Lei
ming.lei at redhat.com
Sun Apr 25 09:57:48 BST 2021
This reverts commit 5ba3f5a6ca7ee2dffcae7fab25a1a1053e3264cb.
Signed-off-by: Ming Lei <ming.lei at redhat.com>
---
block/blk-core.c | 34 +------------------------------
block/blk-mq-tag.c | 51 ++++++----------------------------------------
block/blk-mq-tag.h | 4 +---
block/blk-mq.c | 21 ++++---------------
block/blk-mq.h | 1 -
block/blk.h | 2 --
block/elevator.c | 1 -
7 files changed, 12 insertions(+), 102 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index ca7f833e25a8..9bcdae93f6d4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -279,36 +279,6 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
}
EXPORT_SYMBOL(blk_dump_rq_flags);
-/**
- * blk_mq_wait_for_tag_iter - wait for preexisting tag iteration functions to finish
- * @set: Tag set to wait on.
- *
- * Waits for preexisting calls of blk_mq_all_tag_iter(),
- * blk_mq_tagset_busy_iter() and also for their atomic variants to finish
- * accessing hctx->tags->rqs[]. New readers may start while this function is
- * in progress or after this function has returned. Use this function to make
- * sure that hctx->tags->rqs[] changes have become globally visible.
- *
- * Waits for preexisting blk_mq_queue_tag_busy_iter(q, fn, priv) calls to
- * finish accessing requests associated with other request queues than 'q'.
- */
-void blk_mq_wait_for_tag_iter(struct blk_mq_tag_set *set)
-{
- struct blk_mq_tags *tags;
- int i;
-
- if (set->tags) {
- for (i = 0; i < set->nr_hw_queues; i++) {
- tags = set->tags[i];
- if (!tags)
- continue;
- down_write(&tags->iter_rwsem);
- up_write(&tags->iter_rwsem);
- }
- }
- synchronize_rcu();
-}
-
/**
* blk_sync_queue - cancel any pending callbacks on a queue
* @q: the queue
@@ -442,10 +412,8 @@ void blk_cleanup_queue(struct request_queue *q)
* it is safe to free requests now.
*/
mutex_lock(&q->sysfs_lock);
- if (q->elevator) {
- blk_mq_wait_for_tag_iter(q->tag_set);
+ if (q->elevator)
blk_mq_sched_free_requests(q);
- }
mutex_unlock(&q->sysfs_lock);
percpu_ref_exit(&q->q_usage_counter);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 39d5c9190a6b..d8eaa38a1bd1 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -209,24 +209,14 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
if (!reserved)
bitnr += tags->nr_reserved_tags;
- rcu_read_lock();
- /*
- * The request 'rq' points at is protected by an RCU read lock until
- * its queue pointer has been verified and by q_usage_count while the
- * callback function is being invoked. See also the
- * percpu_ref_tryget() and blk_queue_exit() calls in
- * blk_mq_queue_tag_busy_iter().
- */
- rq = rcu_dereference(tags->rqs[bitnr]);
+ rq = tags->rqs[bitnr];
+
/*
* We can hit rq == NULL here, because the tagging functions
* test and set the bit before assigning ->rqs[].
*/
- if (rq && rq->q == hctx->queue && rq->mq_hctx == hctx) {
- rcu_read_unlock();
+ if (rq && rq->q == hctx->queue && rq->mq_hctx == hctx)
return iter_data->fn(hctx, rq, iter_data->data, reserved);
- }
- rcu_read_unlock();
return true;
}
@@ -264,17 +254,11 @@ struct bt_tags_iter_data {
unsigned int flags;
};
-/* Include reserved tags. */
#define BT_TAG_ITER_RESERVED (1 << 0)
-/* Only include started requests. */
#define BT_TAG_ITER_STARTED (1 << 1)
-/* Iterate over tags->static_rqs[] instead of tags->rqs[]. */
#define BT_TAG_ITER_STATIC_RQS (1 << 2)
-/* The callback function may sleep. */
-#define BT_TAG_ITER_MAY_SLEEP (1 << 3)
-static bool __bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr,
- void *data)
+static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
{
struct bt_tags_iter_data *iter_data = data;
struct blk_mq_tags *tags = iter_data->tags;
@@ -291,8 +275,7 @@ static bool __bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr,
if (iter_data->flags & BT_TAG_ITER_STATIC_RQS)
rq = tags->static_rqs[bitnr];
else
- rq = rcu_dereference_check(tags->rqs[bitnr],
- lockdep_is_held(&tags->iter_rwsem));
+ rq = tags->rqs[bitnr];
if (!rq)
return true;
if ((iter_data->flags & BT_TAG_ITER_STARTED) &&
@@ -301,25 +284,6 @@ static bool __bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr,
return iter_data->fn(rq, iter_data->data, reserved);
}
-static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
-{
- struct bt_tags_iter_data *iter_data = data;
- struct blk_mq_tags *tags = iter_data->tags;
- bool res;
-
- if (iter_data->flags & BT_TAG_ITER_MAY_SLEEP) {
- down_read(&tags->iter_rwsem);
- res = __bt_tags_iter(bitmap, bitnr, data);
- up_read(&tags->iter_rwsem);
- } else {
- rcu_read_lock();
- res = __bt_tags_iter(bitmap, bitnr, data);
- rcu_read_unlock();
- }
-
- return res;
-}
-
/**
* bt_tags_for_each - iterate over the requests in a tag map
* @tags: Tag map to iterate over.
@@ -393,12 +357,10 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
{
int i;
- might_sleep();
-
for (i = 0; i < tagset->nr_hw_queues; i++) {
if (tagset->tags && tagset->tags[i])
__blk_mq_all_tag_iter(tagset->tags[i], fn, priv,
- BT_TAG_ITER_STARTED | BT_TAG_ITER_MAY_SLEEP);
+ BT_TAG_ITER_STARTED);
}
}
EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
@@ -582,7 +544,6 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
tags->nr_tags = total_tags;
tags->nr_reserved_tags = reserved_tags;
- init_rwsem(&tags->iter_rwsem);
if (blk_mq_is_sbitmap_shared(flags))
return tags;
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index d1d73d7cc7df..0290c308ece9 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -17,11 +17,9 @@ struct blk_mq_tags {
struct sbitmap_queue __bitmap_tags;
struct sbitmap_queue __breserved_tags;
- struct request __rcu **rqs;
+ struct request **rqs;
struct request **static_rqs;
struct list_head page_list;
-
- struct rw_semaphore iter_rwsem;
};
extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 8b59f6b4ec8e..79c01b1f885c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -496,10 +496,8 @@ static void __blk_mq_free_request(struct request *rq)
blk_crypto_free_request(rq);
blk_pm_mark_last_busy(rq);
rq->mq_hctx = NULL;
- if (rq->tag != BLK_MQ_NO_TAG) {
+ if (rq->tag != BLK_MQ_NO_TAG)
blk_mq_put_tag(hctx->tags, ctx, rq->tag);
- rcu_assign_pointer(hctx->tags->rqs[rq->tag], NULL);
- }
if (sched_tag != BLK_MQ_NO_TAG)
blk_mq_put_tag(hctx->sched_tags, ctx, sched_tag);
blk_mq_sched_restart(hctx);
@@ -841,20 +839,9 @@ EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
{
- struct request *rq;
-
if (tag < tags->nr_tags) {
- /*
- * Freeing tags happens with the request queue frozen so the
- * rcu dereference below is protected by the request queue
- * usage count. We can only verify that usage count after
- * having read the request pointer.
- */
- rq = rcu_dereference_check(tags->rqs[tag], true);
- WARN_ON_ONCE(IS_ENABLED(CONFIG_PROVE_RCU) && rq &&
- percpu_ref_is_zero(&rq->q->q_usage_counter));
- prefetch(rq);
- return rq;
+ prefetch(tags->rqs[tag]);
+ return tags->rqs[tag];
}
return NULL;
@@ -1125,7 +1112,7 @@ static bool blk_mq_get_driver_tag(struct request *rq)
rq->rq_flags |= RQF_MQ_INFLIGHT;
__blk_mq_inc_active_requests(hctx);
}
- rcu_assign_pointer(hctx->tags->rqs[rq->tag], rq);
+ hctx->tags->rqs[rq->tag] = rq;
return true;
}
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 9ccb1818303b..3616453ca28c 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -226,7 +226,6 @@ static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
struct request *rq)
{
blk_mq_put_tag(hctx->tags, rq->mq_ctx, rq->tag);
- rcu_assign_pointer(hctx->tags->rqs[rq->tag], NULL);
rq->tag = BLK_MQ_NO_TAG;
if (rq->rq_flags & RQF_MQ_INFLIGHT) {
diff --git a/block/blk.h b/block/blk.h
index d88b0823738c..529233957207 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -185,8 +185,6 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
void blk_account_io_start(struct request *req);
void blk_account_io_done(struct request *req, u64 now);
-void blk_mq_wait_for_tag_iter(struct blk_mq_tag_set *set);
-
/*
* Internal elevator interface
*/
diff --git a/block/elevator.c b/block/elevator.c
index aae9cff6d5ae..7c486ce858e0 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -201,7 +201,6 @@ static void elevator_exit(struct request_queue *q, struct elevator_queue *e)
{
lockdep_assert_held(&q->sysfs_lock);
- blk_mq_wait_for_tag_iter(q->tag_set);
blk_mq_sched_free_requests(q);
__elevator_exit(q, e);
}
--
2.29.2
More information about the Linux-nvme
mailing list