[PATCH 2/7] nvme-tcp: distribute queue affinity
Hannes Reinecke
hare at kernel.org
Wed Jun 26 05:13:42 PDT 2024
Introduce a per-cpu counter to distribute the number of queues
over all cpus in a blk-mq hwctx cpu set. The current algorithm
leads to identical cpu affinity maps for all controllers, piling
work on the same cpu for all queues with the same qid.
Signed-off-by: Hannes Reinecke <hare at kernel.org>
---
drivers/nvme/host/tcp.c | 31 +++++++++++++++++++++++++------
1 file changed, 25 insertions(+), 6 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 78fbce13a9e6..faab55ff86fe 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -26,6 +26,8 @@
struct nvme_tcp_queue;
+static atomic_t nvme_tcp_cpu_queues[NR_CPUS];
+
/* Define the socket priority to use for connections were it is desirable
* that the NIC consider performing optimized packet processing or filtering.
* A non-zero value being sufficient to indicate general consideration of any
@@ -1569,16 +1571,26 @@ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
if (wq_unbound)
queue->io_cpu = WORK_CPU_UNBOUND;
else {
- int i;
+ int i, min_queues = WORK_CPU_UNBOUND, io_cpu = WORK_CPU_UNBOUND;
if (WARN_ON(!mq_map))
return;
- for_each_cpu(i, cpu_online_mask) {
- if (mq_map[i] == qid) {
- queue->io_cpu = i;
- break;
+ for_each_online_cpu(i) {
+ int num_queues;
+
+ if (mq_map[i] != qid)
+ continue;
+
+ num_queues = atomic_read(&nvme_tcp_cpu_queues[i]);
+ if (num_queues < min_queues) {
+ min_queues = num_queues;
+ io_cpu = i;
}
}
+ if (io_cpu != WORK_CPU_UNBOUND) {
+ queue->io_cpu = io_cpu;
+ atomic_inc(&nvme_tcp_cpu_queues[io_cpu]);
+ }
dev_dbg(ctrl->ctrl.device, "queue %d: using cpu %d\n",
qid, queue->io_cpu);
}
@@ -1834,6 +1846,10 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
kernel_sock_shutdown(queue->sock, SHUT_RDWR);
nvme_tcp_restore_sock_ops(queue);
cancel_work_sync(&queue->io_work);
+ if (queue->io_cpu != WORK_CPU_UNBOUND) {
+ atomic_dec(&nvme_tcp_cpu_queues[queue->io_cpu]);
+ queue->io_cpu = WORK_CPU_UNBOUND;
+ }
}
static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
@@ -2845,7 +2861,7 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
static int __init nvme_tcp_init_module(void)
{
- unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS;
+ unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS, i;
BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8);
BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72);
@@ -2863,6 +2879,9 @@ static int __init nvme_tcp_init_module(void)
if (!nvme_tcp_wq)
return -ENOMEM;
+ for_each_possible_cpu(i)
+ atomic_set(&nvme_tcp_cpu_queues[i], 0);
+
nvmf_register_transport(&nvme_tcp_transport);
return 0;
}
--
2.35.3
More information about the Linux-nvme
mailing list