nvmet_sq_destroy stuck forever when data digest is turned on
Sagi Grimberg
sagi at grimberg.me
Tue Sep 19 05:22:40 PDT 2023
> Hi
>
> I have an issue with nvmet_tcp_release_queue_work hitting hung task after 2 minutes of waiting for nvmet_sq_destroy.
> This issue reproduces only when data digest is on.
>
> I am inspecting the code of nvmet_tcp_release_queue_work and I see that the code handles 'data in' commands
> This means that it calls nvmet_req_uninit for any command that its data is still in transit.
>
> There might be commands that the data transfer is already done, but data digest was not received from socket yet (aka rcv_state is NVMET_TCP_RECV_DDGST)
> The data digest will never be read from the socket because the socket is blocked by NVMET_TCP_RECV_ERR
> Hence nvmet_sq_destroy will be stuck forever waiting for nvmet_tcp_try_recv_ddgst to execute.
>
> Can you suggest a fix for such an issue?
Does this (untested) patch make the issue go away?
--
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 97d07488072d..f5eaf3457ada 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -204,10 +204,16 @@ static inline u16 nvmet_tcp_cmd_tag(struct
nvmet_tcp_queue *queue,
return cmd - queue->cmds;
}
+static inline u8 nvmet_tcp_ddgst_len(struct nvmet_tcp_queue *queue)
+{
+ return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0;
+}
+
static inline bool nvmet_tcp_has_data_in(struct nvmet_tcp_cmd *cmd)
{
- return nvme_is_write(cmd->req.cmd) &&
- cmd->rbytes_done < cmd->req.transfer_len;
+ u32 total_len = cmd->req.transfer_len +
nvmet_tcp_ddgst_len(cmd->queue);
+
+ return nvme_is_write(cmd->req.cmd) && cmd->rbytes_done < total_len;
}
static inline bool nvmet_tcp_need_data_in(struct nvmet_tcp_cmd *cmd)
@@ -265,11 +271,6 @@ static inline u8 nvmet_tcp_hdgst_len(struct
nvmet_tcp_queue *queue)
return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0;
}
-static inline u8 nvmet_tcp_ddgst_len(struct nvmet_tcp_queue *queue)
-{
- return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0;
-}
-
static inline void nvmet_tcp_hdgst(struct ahash_request *hash,
void *pdu, size_t len)
{
@@ -1221,8 +1222,8 @@ static int nvmet_tcp_try_recv_ddgst(struct
nvmet_tcp_queue *queue)
goto out;
}
- if (cmd->rbytes_done == cmd->req.transfer_len)
- nvmet_tcp_execute_request(cmd);
+ cmd->rbytes_done += NVME_TCP_DIGEST_LENGTH;
+ nvmet_tcp_execute_request(cmd);
ret = 0;
out:
--
More information about the Linux-nvme
mailing list