[PATCHv2] nvme-tcp: align I/O cpu with blk-mq mapping
Hannes Reinecke
hare at kernel.org
Wed Jun 19 07:55:53 PDT 2024
Select the first CPU from a given blk-mq hctx mapping
to queue the tcp workqueue item.
This avoids thread bouncing during I/O on machines with
an uneven cpu topology.
On an AMD EPYC system the performance increases from
Seq 4k write: 13.8 MB/s
Random 4k write: 11.3 MB/s
Seq 4k read: 13.9 MB/s
Random 4k read: 10.9 MB/s
to
Seq 4k write: 19.1 MB/s
Randowm 4k write: 18.1 MB/s
Seq 4k read: 15.8 MB/s
Random 4k read: 15.8 MB/s
Signed-off-by: Hannes Reinecke <hare at suse.de>
---
drivers/nvme/host/tcp.c | 43 +++++++++++++++++++++++++++++------------
1 file changed, 31 insertions(+), 12 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 3be67c98c906..78fbce13a9e6 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1550,20 +1550,38 @@ static bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue)
static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
{
struct nvme_tcp_ctrl *ctrl = queue->ctrl;
- int qid = nvme_tcp_queue_id(queue);
+ struct blk_mq_tag_set *set = &ctrl->tag_set;
+ int qid = nvme_tcp_queue_id(queue) - 1;
+ unsigned int *mq_map;
int n = 0;
- if (nvme_tcp_default_queue(queue))
- n = qid - 1;
- else if (nvme_tcp_read_queue(queue))
- n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - 1;
- else if (nvme_tcp_poll_queue(queue))
+ if (nvme_tcp_default_queue(queue)) {
+ mq_map = set->map[HCTX_TYPE_DEFAULT].mq_map;
+ n = qid;
+ } else if (nvme_tcp_read_queue(queue)) {
+ mq_map = set->map[HCTX_TYPE_READ].mq_map;
+ n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT];
+ } else if (nvme_tcp_poll_queue(queue)) {
+ mq_map = set->map[HCTX_TYPE_POLL].mq_map;
n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] -
- ctrl->io_queues[HCTX_TYPE_READ] - 1;
+ ctrl->io_queues[HCTX_TYPE_READ];
+ }
if (wq_unbound)
queue->io_cpu = WORK_CPU_UNBOUND;
- else
- queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
+ else {
+ int i;
+
+ if (WARN_ON(!mq_map))
+ return;
+ for_each_cpu(i, cpu_online_mask) {
+ if (mq_map[i] == qid) {
+ queue->io_cpu = i;
+ break;
+ }
+ }
+ dev_dbg(ctrl->ctrl.device, "queue %d: using cpu %d\n",
+ qid, queue->io_cpu);
+ }
}
static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid)
@@ -1704,7 +1722,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid,
queue->sock->sk->sk_allocation = GFP_ATOMIC;
queue->sock->sk->sk_use_task_frag = false;
- nvme_tcp_set_queue_io_cpu(queue);
+ queue->io_cpu = WORK_CPU_UNBOUND;
queue->request = NULL;
queue->data_remaining = 0;
queue->ddgst_remaining = 0;
@@ -1858,9 +1876,10 @@ static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx)
nvme_tcp_init_recv_ctx(queue);
nvme_tcp_setup_sock_ops(queue);
- if (idx)
+ if (idx) {
+ nvme_tcp_set_queue_io_cpu(queue);
ret = nvmf_connect_io_queue(nctrl, idx);
- else
+ } else
ret = nvmf_connect_admin_queue(nctrl);
if (!ret) {
--
2.35.3
More information about the Linux-nvme
mailing list