[PATCH 3/3] nvme-tcp: fix I/O stalls on congested sockets

Kamaljit Singh Kamaljit.Singh1 at wdc.com
Thu Apr 17 16:06:59 PDT 2025


Sagi,
I tried both of these patches but looks like #1 causes an infinite loop. dmesg was full of panics.
I had tried just #1 and later with #1+#2. Both failed the same way.

>How about these two (untested) patches:
>[1 based on your recv-side fix]:
>--diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
>index 72d260201d8c..4eb9a2dec07e 100644
>--- a/drivers/nvme/host/tcp.c
>+++ b/drivers/nvme/host/tcp.c
>@@ -1348,7 +1348,7 @@ static int nvme_tcp_try_recv(struct nvme_tcp_queue
>*queue)
>         queue->nr_cqe = 0;
>         consumed = sock->ops->read_sock(sk, &rd_desc, nvme_tcp_recv_skb);
>         release_sock(sk);
>-       return consumed;
>+       return consumed == -EAGAIN ? 0 : consumed;
>  }
>
>  static void nvme_tcp_io_work(struct work_struct *w)
>--
>
>[2 based on your partial write fix]:
>diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
>index 4eb9a2dec07e..daf59e75cf15 100644
>--- a/drivers/nvme/host/tcp.c
>+++ b/drivers/nvme/host/tcp.c
>@@ -129,6 +129,7 @@ enum nvme_tcp_queue_flags {
>         NVME_TCP_Q_LIVE         = 1,
>         NVME_TCP_Q_POLLING      = 2,
>         NVME_TCP_Q_IO_CPU_SET   = 3,
>+       NVME_TCP_Q_WAKE_SENDER  = 4,
>  };
>
>  enum nvme_tcp_recv_state {
>@@ -1063,6 +1064,7 @@ static void nvme_tcp_write_space(struct sock *sk)
>         queue = sk->sk_user_data;
>         if (likely(queue && sk_stream_is_writeable(sk))) {
>                 clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
>+               set_bit(NVME_TCP_Q_WAKE_SENDER, &queue->flags);
>                 queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
>         }
>         read_unlock_bh(&sk->sk_callback_lock);
>@@ -1357,6 +1359,7 @@ static void nvme_tcp_io_work(struct work_struct *w)
>                 container_of(w, struct nvme_tcp_queue, io_work);
>         unsigned long deadline = jiffies + msecs_to_jiffies(1);
>
>+       clear_bit(NVME_TCP_Q_WAKE_SENDER, &queue->flags);
>         do {
>                 bool pending = false;
>                 int result;
>@@ -1376,7 +1379,15 @@ static void nvme_tcp_io_work(struct work_struct *w)
>                 else if (unlikely(result < 0))
>                         return;
>
>-               if (!pending || !queue->rd_enabled)
>+               /* did we get some space after spending time in recv ? */
>+               if (nvme_tcp_queue_has_pending(queue) &&
>+                   sk_stream_is_writeable(queue->sock->sk))
>+                       pending = true;
>+
>+               if (!queue->rd_enabled)
>+                       return;
>+
>+               if (!pending && !test_bit(NVME_TCP_Q_WAKE_SENDER,
>&queue->flags))
>                         return;
>
>         } while (!time_after(jiffies, deadline)); /* quota is exhausted */
>--
> 

-Kamaljit



More information about the Linux-nvme mailing list