[PATCH 4/8] nvme-tcp: improve stall debugging
Sagi Grimberg
sagi at grimberg.me
Wed Jul 17 14:11:28 PDT 2024
On 16/07/2024 10:36, Hannes Reinecke wrote:
> Add counter for the number of send and receive calls, and an additional
> counter for the number of SQEs processed.
TBH, this looks like something that should be added as trace points
and have something like an ebpf program that attach to them and performs
this type of logic.
Otherwise, these counters need to be grouped in a proper stats struct to
clarify what they are used for.
>
> Signed-off-by: Hannes Reinecke <hare at kernel.org>
> ---
> drivers/nvme/host/tcp.c | 17 ++++++++++++++++-
> 1 file changed, 16 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 04d840709d5d..9caee99955c2 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -156,7 +156,10 @@ struct nvme_tcp_queue {
> int pdu_offset;
> size_t data_remaining;
> size_t ddgst_remaining;
> + unsigned int nr_sqe;
> + unsigned int nr_send;
> unsigned int nr_cqe;
> + unsigned int nr_recv;
>
> /* send state */
> struct nvme_tcp_request *request;
> @@ -368,6 +371,8 @@ static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue)
> int ret;
>
> /* drain the send queue as much as we can... */
> + queue->nr_sqe = 0;
> + queue->nr_send = 0;
> do {
> ret = nvme_tcp_try_send(queue);
> } while (ret > 0);
> @@ -944,6 +949,7 @@ static int nvme_tcp_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
> if (unlikely(!queue->rd_enabled))
> return -EFAULT;
>
> + queue->nr_recv++;
> while (len) {
> switch (nvme_tcp_recv_state(queue)) {
> case NVME_TCP_RECV_PDU:
> @@ -1028,6 +1034,7 @@ static void nvme_tcp_state_change(struct sock *sk)
> static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
> {
> queue->request = NULL;
> + queue->nr_sqe++;
> }
>
> static void nvme_tcp_fail_request(struct nvme_tcp_request *req)
> @@ -1071,6 +1078,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
>
> bvec_set_page(&bvec, page, len, offset);
> iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
> + queue->nr_send++;
> ret = sock_sendmsg(queue->sock, &msg);
> if (ret <= 0)
> return ret;
> @@ -1127,6 +1135,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
>
> bvec_set_virt(&bvec, (void *)pdu + req->offset, len);
> iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
> + queue->nr_send++;
> ret = sock_sendmsg(queue->sock, &msg);
> if (unlikely(ret <= 0))
> return ret;
> @@ -1165,6 +1174,7 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
>
> bvec_set_virt(&bvec, (void *)pdu + req->offset, len);
> iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
> + queue->nr_send++;
> ret = sock_sendmsg(queue->sock, &msg);
> if (unlikely(ret <= 0))
> return ret;
> @@ -1198,6 +1208,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
> else
> msg.msg_flags |= MSG_EOR;
>
> + queue->nr_send++;
> ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
> if (unlikely(ret <= 0))
> return ret;
> @@ -1275,6 +1286,7 @@ static int nvme_tcp_try_recv(struct nvme_tcp_queue *queue)
> rd_desc.count = 1;
> lock_sock(sk);
> queue->nr_cqe = 0;
> + queue->nr_recv = 0;
> consumed = sock->ops->read_sock(sk, &rd_desc, nvme_tcp_recv_skb);
> release_sock(sk);
> return consumed;
> @@ -1288,6 +1300,8 @@ static void nvme_tcp_io_work(struct work_struct *w)
> u64 tx_deadline = start + deadline;
> bool pending = false;
>
> + queue->nr_sqe = 0;
> + queue->nr_send = 0;
> do {
> int result;
>
> @@ -1318,8 +1332,9 @@ static void nvme_tcp_io_work(struct work_struct *w)
> overrun = ktime_to_us(ktime_get()) - start;
> if (overrun > 10 * deadline) {
> dev_dbg(queue->ctrl->ctrl.device,
> - "queue %d: stall (%llu msecs)%s%s\n",
> + "queue %d: stall (%llu msecs) send %u sqe %u recv %u cqe %u%s%s\n",
> nvme_tcp_queue_id(queue), div_u64(overrun, 1000),
> + queue->nr_send, queue->nr_sqe, queue->nr_recv, queue->nr_cqe,
> list_empty(&queue->send_list) ? " empty" : "", queue->request ? " pending" : "");
> }
> if (pending)
More information about the Linux-nvme
mailing list