[PATCH 6/7] nvme-tcp: SOCK_NOSPACE handling
Hannes Reinecke
hare at kernel.org
Wed Jun 26 05:13:46 PDT 2024
When there is no write space on the socket we shouldn't try to
push more data onto it; it'll stall anyway and leads to higher CPU
utilisation. So check for sock_wspace() before queueing new
requests and let the sock write_space() handler restart the
submission.
Signed-off-by: Hannes Reinecke <hare at kernel.org>
---
drivers/nvme/host/tcp.c | 30 ++++++++++++++++++++++++++----
1 file changed, 26 insertions(+), 4 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 599d4ebf888f..d78cca2f05d4 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -147,6 +147,7 @@ enum nvme_tcp_recv_state {
struct nvme_tcp_ctrl;
struct nvme_tcp_queue {
struct socket *sock;
+ struct blk_mq_hw_ctx *hctx;
struct work_struct io_work;
int io_cpu;
@@ -381,6 +382,15 @@ static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
nvme_tcp_queue_has_pending(queue);
}
+static inline void nvme_tcp_queue_work(struct nvme_tcp_queue *queue)
+{
+ set_bit(SOCK_NOSPACE, &queue->sock->flags);
+ if (!sock_wspace(queue->sock->sk))
+ return;
+ clear_bit(SOCK_NOSPACE, &queue->sock->flags);
+ queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+}
+
static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
bool sync, bool last)
{
@@ -402,7 +412,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
}
if (last && nvme_tcp_queue_has_pending(queue))
- queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ nvme_tcp_queue_work(queue);
}
static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue)
@@ -550,6 +560,7 @@ static int nvme_tcp_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
struct nvme_tcp_queue *queue = &ctrl->queues[hctx_idx + 1];
hctx->driver_data = queue;
+ queue->hctx = hctx;
return 0;
}
@@ -1004,7 +1015,10 @@ static void nvme_tcp_write_space(struct sock *sk)
queue = sk->sk_user_data;
if (likely(queue && sk_stream_is_writeable(sk))) {
clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ if (sock_wspace(sk))
+ queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ if (queue->hctx)
+ blk_mq_start_hw_queue(queue->hctx);
}
read_unlock_bh(&sk->sk_callback_lock);
}
@@ -1317,7 +1331,7 @@ static void nvme_tcp_io_work(struct work_struct *w)
} while (!time_after(jiffies, deadline)); /* quota is exhausted */
- queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ nvme_tcp_queue_work(queue);
}
static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue)
@@ -1863,6 +1877,7 @@ static void nvme_tcp_restore_sock_ops(struct nvme_tcp_queue *queue)
static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
{
+ queue->hctx = NULL;
kernel_sock_shutdown(queue->sock, SHUT_RDWR);
nvme_tcp_restore_sock_ops(queue);
cancel_work_sync(&queue->io_work);
@@ -2614,7 +2629,7 @@ static void nvme_tcp_commit_rqs(struct blk_mq_hw_ctx *hctx)
struct nvme_tcp_queue *queue = hctx->driver_data;
if (!llist_empty(&queue->req_list))
- queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ nvme_tcp_queue_work(queue);
}
static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -2630,6 +2645,13 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);
+ set_bit(SOCK_NOSPACE, &queue->sock->flags);
+ if (!sock_wspace(queue->sock->sk)) {
+ blk_mq_stop_hw_queue(hctx);
+ return BLK_STS_DEV_RESOURCE;
+ }
+ clear_bit(SOCK_NOSPACE, &queue->sock->flags);
+
ret = nvme_tcp_setup_cmd_pdu(ns, rq);
if (unlikely(ret))
return ret;
--
2.35.3
More information about the Linux-nvme
mailing list