nvme tcp receive errors

Sagi Grimberg sagi at grimberg.me
Wed Apr 21 06:33:30 BST 2021


Hey Keith, sorry for the late response, been a bit under water
lately...

> Sorry, this was a mistake in the reporting. The last one's data length
> was only 808; 832 was the packet length.
> 
>> Can you share for each of the c2hdata PDUs what is:
>> - hlen
> 
> 24 for all of them
> 
>> - plen
> 
> 11 transfers at 1440, 832 for the last one
> 
>> - data_length
> 
> 11 transfers at 1416, 808 for the last one
> 
>> - data_offset
> 
> 0, 1416, 2832, 4248, 5564, 7080, 8496, 9912, 11328, 12744, 14160, 15567
> 

Can you retry with the following applied on top of what I sent you?
--
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index c60c1dcfb587..ff39d37e9793 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -63,6 +63,7 @@ struct nvme_tcp_request {
         /* send state */
         size_t                  offset;
         size_t                  data_sent;
+       size_t                  data_recvd;
         enum nvme_tcp_send_state state;
         enum nvme_tcp_cmd_state cmd_state;
  };
@@ -769,6 +770,7 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue 
*queue, struct sk_buff *skb,
                 *len -= recv_len;
                 *offset += recv_len;
                 queue->data_remaining -= recv_len;
+               req->data_recvd += recv_len;
         }

         if (!queue->data_remaining) {
@@ -776,6 +778,7 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue 
*queue, struct sk_buff *skb,
                         nvme_tcp_ddgst_final(queue->rcv_hash, 
&queue->exp_ddgst);
                         queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
                 } else {
+                       BUG_ON(req->data_recvd != req->data_len);
                         req->cmd_state = NVME_TCP_CMD_DATA_DONE;
                         if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
                                 req->cmd_state = NVME_TCP_CMD_DONE;
--

There might be a hidden assumption here that may cause this if multiple
c2hdata pdus will come per request...

If that is the case, you can try the following (on top):
--
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index ff39d37e9793..aabec8e6810a 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -773,19 +773,20 @@ static int nvme_tcp_recv_data(struct 
nvme_tcp_queue *queue, struct sk_buff *skb,
                 req->data_recvd += recv_len;
         }

-       if (!queue->data_remaining) {
+       if (!queue->data_remaining)
+               nvme_tcp_init_recv_ctx(queue);
+
+       if (req->data_recvd == req->data_len) {
                 if (queue->data_digest) {
                         nvme_tcp_ddgst_final(queue->rcv_hash, 
&queue->exp_ddgst);
                         queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
                 } else {
-                       BUG_ON(req->data_recvd != req->data_len);
                         req->cmd_state = NVME_TCP_CMD_DATA_DONE;
                         if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
                                 req->cmd_state = NVME_TCP_CMD_DONE;
                                 nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
                                 queue->nr_cqe++;
                         }
-                       nvme_tcp_init_recv_ctx(queue);
                 }
         }
--



More information about the Linux-nvme mailing list