[PATCH] nvme-tcp: print correct opcode on timeout and error handling

Sagi Grimberg sagi at grimberg.me
Wed Mar 8 03:06:37 PST 2023


> In timeout and error handling, various information is reported about the
> command in error, but the printed opcode may not be correct
> 
> The opcode is obtained from the nvme_command structure referenced by the
> 'cmd' member in the nvme_request structure.
> (i.e. nvme_req(req)->cmd->common.opcode)
> 
> For the nvme-tcp driver, the 'cmd' member in the nvme_request structure
> points to a structure within the page fragment allocated by
> nvme_tcp_init_request().  This page fragment is used as a command capsule
> PDU, and then may be reused as a h2c data PDU, so the nvme_command
> referenced by the 'cmd' member has already been overwritten.
> 
> To fix this problem, when setting up the nvme_command, keep the opcode in
> a newly added member in the nvme_tcp_request and use that instead.

I'm wandering if we should just not reuse the cmd pdu for a subsequent
data pdu...

It will take more space allocated per request (up to a few MBs for lots
of requests and controllers)...

but will prevent from propagating this to nvme core...

Something like the below (untested):
--
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index ef27122acce6..ffbcd5c7cb26 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -207,6 +207,16 @@ static inline u8 nvme_tcp_ddgst_len(struct 
nvme_tcp_queue *queue)
         return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0;
  }

+static inline void *nvme_tcp_req_cmd_pdu(struct nvme_tcp_request *req)
+{
+       return req->pdu;
+}
+
+static inline void *nvme_tcp_req_data_pdu(struct nvme_tcp_request *req)
+{
+       return req->pdu + sizeof(struct nvme_tcp_cmd_pdu);
+}
+
  static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_request 
*req)
  {
         if (nvme_is_fabrics(req->req.cmd))
@@ -470,7 +480,8 @@ static int nvme_tcp_init_request(struct 
blk_mq_tag_set *set,
         u8 hdgst = nvme_tcp_hdgst_len(queue);

         req->pdu = page_frag_alloc(&queue->pf_cache,
-               sizeof(struct nvme_tcp_cmd_pdu) + hdgst,
+               sizeof(struct nvme_tcp_cmd_pdu) +
+               sizeof(struct nvme_tcp_data_pdu) + hdgst,
                 GFP_KERNEL | __GFP_ZERO);
         if (!req->pdu)
                 return -ENOMEM;
@@ -613,7 +624,7 @@ static int nvme_tcp_handle_comp(struct 
nvme_tcp_queue *queue,

  static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req)
  {
-       struct nvme_tcp_data_pdu *data = req->pdu;
+       struct nvme_tcp_data_pdu *data = nvme_tcp_req_data_pdu(req);
         struct nvme_tcp_queue *queue = req->queue;
         struct request *rq = blk_mq_rq_from_pdu(req);
         u32 h2cdata_sent = req->pdu_len;
@@ -1035,7 +1046,7 @@ static int nvme_tcp_try_send_data(struct 
nvme_tcp_request *req)
  static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
  {
         struct nvme_tcp_queue *queue = req->queue;
-       struct nvme_tcp_cmd_pdu *pdu = req->pdu;
+       struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
         bool inline_data = nvme_tcp_has_inline_data(req);
         u8 hdgst = nvme_tcp_hdgst_len(queue);
         int len = sizeof(*pdu) + hdgst - req->offset;
@@ -1074,7 +1085,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct 
nvme_tcp_request *req)
  static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
  {
         struct nvme_tcp_queue *queue = req->queue;
-       struct nvme_tcp_data_pdu *pdu = req->pdu;
+       struct nvme_tcp_data_pdu *pdu = nvme_tcp_req_data_pdu(req);
         u8 hdgst = nvme_tcp_hdgst_len(queue);
         int len = sizeof(*pdu) - req->offset + hdgst;
         int ret;
@@ -2281,7 +2292,7 @@ static enum blk_eh_timer_return 
nvme_tcp_timeout(struct request *rq)
  {
         struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
         struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
-       struct nvme_tcp_cmd_pdu *pdu = req->pdu;
+       struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
         u8 opc = pdu->cmd.common.opcode, fctype = pdu->cmd.fabrics.fctype;
         int qid = nvme_tcp_queue_id(req->queue);

@@ -2320,7 +2331,7 @@ static blk_status_t nvme_tcp_map_data(struct 
nvme_tcp_queue *queue,
                         struct request *rq)
  {
         struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
-       struct nvme_tcp_cmd_pdu *pdu = req->pdu;
+       struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
         struct nvme_command *c = &pdu->cmd;

         c->common.flags |= NVME_CMD_SGL_METABUF;
@@ -2340,7 +2351,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct 
nvme_ns *ns,
                 struct request *rq)
  {
         struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
-       struct nvme_tcp_cmd_pdu *pdu = req->pdu;
+       struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
         struct nvme_tcp_queue *queue = req->queue;
         u8 hdgst = nvme_tcp_hdgst_len(queue), ddgst = 0;
         blk_status_t ret;
--



More information about the Linux-nvme mailing list