[PATCH v8 11/12] blk-mq: prevent offlining hk CPUs with associated online isolated CPUs
Daniel Wagner
wagi at kernel.org
Fri Sep 5 07:59:57 PDT 2025
When isolcpus=io_queue is enabled, and the last housekeeping CPU for a
given hctx goes offline, there would be no CPU left to handle I/O. To
prevent I/O stalls, prevent offlining housekeeping CPUs that are still
serving isolated CPUs.
When isolcpus=io_queue is enabled and the last housekeeping CPU
for a given hctx goes offline, no CPU would be left to handle I/O.
To prevent I/O stalls, disallow offlining housekeeping CPUs that are
still serving isolated CPUs.
Reviewed-by: Aaron Tomlin <atomlin at atomlin.com>
Reviewed-by: Hannes Reinecke <hare at suse.de>
Signed-off-by: Daniel Wagner <wagi at kernel.org>
---
block/blk-mq.c | 42 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 42 insertions(+)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ba3a4b77f5786e5372adce53e4fff5aa2ace24aa..d48be77919e671a81077f7042103699a80959664 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3683,6 +3683,43 @@ static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx)
return data.has_rq;
}
+static bool blk_mq_hctx_can_offline_hk_cpu(struct blk_mq_hw_ctx *hctx,
+ unsigned int this_cpu)
+{
+ const struct cpumask *hk_mask = housekeeping_cpumask(HK_TYPE_IO_QUEUE);
+
+ for (int i = 0; i < hctx->nr_ctx; i++) {
+ struct blk_mq_ctx *ctx = hctx->ctxs[i];
+
+ if (ctx->cpu == this_cpu)
+ continue;
+
+ /*
+ * Check if this context has at least one online
+ * housekeeping CPU; in this case the hardware context is
+ * usable.
+ */
+ if (cpumask_test_cpu(ctx->cpu, hk_mask) &&
+ cpu_online(ctx->cpu))
+ break;
+
+ /*
+ * The context doesn't have any online housekeeping CPUs,
+ * but there might be an online isolated CPU mapped to
+ * it.
+ */
+ if (cpu_is_offline(ctx->cpu))
+ continue;
+
+ pr_warn("%s: trying to offline hctx%d but there is still an online isolcpu CPU %d mapped to it\n",
+ hctx->queue->disk->disk_name,
+ hctx->queue_num, ctx->cpu);
+ return false;
+ }
+
+ return true;
+}
+
static bool blk_mq_hctx_has_online_cpu(struct blk_mq_hw_ctx *hctx,
unsigned int this_cpu)
{
@@ -3714,6 +3751,11 @@ static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node)
struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node,
struct blk_mq_hw_ctx, cpuhp_online);
+ if (housekeeping_enabled(HK_TYPE_IO_QUEUE)) {
+ if (!blk_mq_hctx_can_offline_hk_cpu(hctx, cpu))
+ return -EINVAL;
+ }
+
if (blk_mq_hctx_has_online_cpu(hctx, cpu))
return 0;
--
2.51.0
More information about the Linux-nvme
mailing list