[PATCH 1/2] nvme-tcp: validate R2T PDU in nvme_tcp_handle_r2t()

Sagi Grimberg sagi at grimberg.me
Mon Nov 22 02:43:42 PST 2021



On 11/22/21 12:12 PM, Varun Prakash wrote:
> If maxh2cdata < r2t_length then driver will form multiple
> H2CData PDUs, validate R2T PDU in nvme_tcp_handle_r2t() to
> reuse nvme_tcp_setup_h2c_data_pdu().
> 
> Signed-off-by: Varun Prakash <varun at chelsio.com>
> ---
>   drivers/nvme/host/tcp.c | 79 +++++++++++++++++++++++++++----------------------
>   1 file changed, 44 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 33bc83d..92e07d2 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -44,7 +44,9 @@ struct nvme_tcp_request {
>   	u32			data_len;
>   	u32			pdu_len;
>   	u32			pdu_sent;
> -	u16			ttag;
> +	u32			h2cdata_left;
> +	u32			h2cdata_offset;
> +	u16			h2cdata_ttag;
>   	__le16			status;
>   	struct list_head	entry;
>   	struct llist_node	lentry;
> @@ -572,8 +574,7 @@ static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue,
>   	return ret;
>   }
>   
> -static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
> -		struct nvme_tcp_r2t_pdu *pdu)
> +static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req)
>   {
>   	struct nvme_tcp_data_pdu *data = req->pdu;
>   	struct nvme_tcp_queue *queue = req->queue;
> @@ -581,35 +582,11 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
>   	u8 hdgst = nvme_tcp_hdgst_len(queue);
>   	u8 ddgst = nvme_tcp_ddgst_len(queue);
>   
> -	req->pdu_len = le32_to_cpu(pdu->r2t_length);
> +	req->pdu_len = req->h2cdata_left;
>   	req->pdu_sent = 0;
>   
> -	if (unlikely(!req->pdu_len)) {
> -		dev_err(queue->ctrl->ctrl.device,
> -			"req %d r2t len is %u, probably a bug...\n",
> -			rq->tag, req->pdu_len);
> -		return -EPROTO;
> -	}
> -
> -	if (unlikely(req->data_sent + req->pdu_len > req->data_len)) {
> -		dev_err(queue->ctrl->ctrl.device,
> -			"req %d r2t len %u exceeded data len %u (%zu sent)\n",
> -			rq->tag, req->pdu_len, req->data_len,
> -			req->data_sent);
> -		return -EPROTO;
> -	}
> -
> -	if (unlikely(le32_to_cpu(pdu->r2t_offset) < req->data_sent)) {
> -		dev_err(queue->ctrl->ctrl.device,
> -			"req %d unexpected r2t offset %u (expected %zu)\n",
> -			rq->tag, le32_to_cpu(pdu->r2t_offset),
> -			req->data_sent);
> -		return -EPROTO;
> -	}
> -
>   	memset(data, 0, sizeof(*data));
>   	data->hdr.type = nvme_tcp_h2c_data;
> -	data->hdr.flags = NVME_TCP_F_DATA_LAST;
>   	if (queue->hdr_digest)
>   		data->hdr.flags |= NVME_TCP_F_HDGST;
>   	if (queue->data_digest)
> @@ -618,11 +595,16 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
>   	data->hdr.pdo = data->hdr.hlen + hdgst;
>   	data->hdr.plen =
>   		cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst);
> -	data->ttag = pdu->ttag;
> +	data->ttag = req->h2cdata_ttag;
>   	data->command_id = nvme_cid(rq);
> -	data->data_offset = pdu->r2t_offset;
> +	data->data_offset = cpu_to_le32(req->h2cdata_offset);
>   	data->data_length = cpu_to_le32(req->pdu_len);
> -	return 0;
> +
> +	req->h2cdata_left -= req->pdu_len;
> +	req->h2cdata_offset += req->pdu_len;
> +
> +	if (!req->h2cdata_left)
> +		data->hdr.flags |= NVME_TCP_F_DATA_LAST;

No need to move the hdr.flags setting here to the bottom, you can keep
it where it was. Also perhaps let's have h2cdata_left and h2cdata_offset
advanced right after they were used?

I'm thinking about something like this:
--
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 4d49fc816a74..16d45afe5753 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -44,6 +44,8 @@ struct nvme_tcp_request {
         u32                     data_len;
         u32                     pdu_len;
         u32                     pdu_sent;
+       u32                     h2cdata_left;
+       u32                     h2cdata_offset;
         u16                     ttag;
         __le16                  status;
         struct list_head        entry;
@@ -95,6 +97,7 @@ struct nvme_tcp_queue {
         struct nvme_tcp_request *request;

         int                     queue_size;
+       u32                     maxh2cdata;
         size_t                  cmnd_capsule_len;
         struct nvme_tcp_ctrl    *ctrl;
         unsigned long           flags;
@@ -572,21 +575,24 @@ static int nvme_tcp_handle_comp(struct 
nvme_tcp_queue *queue,
         return ret;
  }

-static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
-               struct nvme_tcp_r2t_pdu *pdu)
+static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req)
  {
         struct nvme_tcp_data_pdu *data = req->pdu;
         struct nvme_tcp_queue *queue = req->queue;
         struct request *rq = blk_mq_rq_from_pdu(req);
         u8 hdgst = nvme_tcp_hdgst_len(queue);
         u8 ddgst = nvme_tcp_ddgst_len(queue);
+       u32 h2cdata_sent = req->pdu_len;

-       req->pdu_len = le32_to_cpu(pdu->r2t_length);
+       req->pdu_len = min(req->h2cdata_left, queue->maxh2cdata);
+       req->h2cdata_left -= req->pdu_len;
+       req->h2cdata_offset += h2cdata_sent;
         req->pdu_sent = 0;

         memset(data, 0, sizeof(*data));
         data->hdr.type = nvme_tcp_h2c_data;
-       data->hdr.flags = NVME_TCP_F_DATA_LAST;
+       if (!req->h2cdata_left)
+               data->hdr.flags = NVME_TCP_F_DATA_LAST;
         if (queue->hdr_digest)
                 data->hdr.flags |= NVME_TCP_F_HDGST;
         if (queue->data_digest)
@@ -595,9 +601,9 @@ static void nvme_tcp_setup_h2c_data_pdu(struct 
nvme_tcp_request *req,
         data->hdr.pdo = data->hdr.hlen + hdgst;
         data->hdr.plen =
                 cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst);
-       data->ttag = pdu->ttag;
+       data->ttag = req->ttag;
         data->command_id = nvme_cid(rq);
-       data->data_offset = pdu->r2t_offset;
+       data->data_offset = cpu_to_le32(req->h2cdata_offset);
         data->data_length = cpu_to_le32(req->pdu_len);

         req->state = NVME_TCP_SEND_H2C_PDU;
@@ -643,7 +649,10 @@ static int nvme_tcp_handle_r2t(struct 
nvme_tcp_queue *queue,
                 return -EPROTO;
         }

-       nvme_tcp_setup_h2c_data_pdu(req, pdu);
+       req->h2cdata_left = r2t_length;
+       req->h2cdata_offset = le32_to_cpu(pdu->r2t_offset);
+       req->ttag = pdu->ttag;
+       nvme_tcp_setup_h2c_data_pdu(req);
         nvme_tcp_queue_request(req, false, true);

         return 0;
@@ -967,7 +976,10 @@ static int nvme_tcp_try_send_data(struct 
nvme_tcp_request *req)
                                 req->state = NVME_TCP_SEND_DDGST;
                                 req->offset = 0;
                         } else {
-                               nvme_tcp_done_send_req(queue);
+                               if (req->h2cdata_left)
+                                       nvme_tcp_setup_h2c_data_pdu(req);
+                               else
+                                       nvme_tcp_done_send_req(queue);
                         }
                         return 1;
                 }
@@ -1018,16 +1030,20 @@ static int nvme_tcp_try_send_data_pdu(struct 
nvme_tcp_request *req)
  {
         struct nvme_tcp_queue *queue = req->queue;
         struct nvme_tcp_data_pdu *pdu = req->pdu;
+       struct page *page = virt_to_page(pdu);
+       size_t offset = offset_in_page(pdu) + req->offset;
         u8 hdgst = nvme_tcp_hdgst_len(queue);
         int len = sizeof(*pdu) - req->offset + hdgst;
+       int flags = MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST;
         int ret;

         if (queue->hdr_digest && !req->offset)
                 nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));

-       ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
-                       offset_in_page(pdu) + req->offset, len,
-                       MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
+       if (!req->h2cdata_left)
+               ret = kernel_sendpage(queue->sock, page, offset, len, 
flags);
+       else
+               ret = sock_no_sendpage(queue->sock, page, offset, len, 
flags);
         if (unlikely(ret <= 0))
                 return ret;

@@ -1064,7 +1080,10 @@ static int nvme_tcp_try_send_ddgst(struct 
nvme_tcp_request *req)
                 return ret;

         if (offset + ret == NVME_TCP_DIGEST_LENGTH) {
-               nvme_tcp_done_send_req(queue);
+               if (req->h2cdata_left)
+                       nvme_tcp_setup_h2c_data_pdu(req);
+               else
+                       nvme_tcp_done_send_req(queue);
                 return 1;
         }

@@ -1256,6 +1275,7 @@ static int nvme_tcp_init_connection(struct 
nvme_tcp_queue *queue)
         struct msghdr msg = {};
         struct kvec iov;
         bool ctrl_hdgst, ctrl_ddgst;
+       u32 maxh2cdata;
         int ret;

         icreq = kzalloc(sizeof(*icreq), GFP_KERNEL);
@@ -1339,6 +1359,14 @@ static int nvme_tcp_init_connection(struct 
nvme_tcp_queue *queue)
                 goto free_icresp;
         }

+       maxh2cdata = le32_to_cpu(icresp->maxdata);
+       if ((maxh2cdata % 4) || (maxh2cdata < NVME_TCP_MIN_MAXH2CDATA)) {
+               pr_err("queue %d: invalid maxh2cdata returned %u\n",
+                      nvme_tcp_queue_id(queue), maxh2cdata);
+               goto free_icresp;
+       }
+       queue->maxh2cdata = maxh2cdata;
+
         ret = 0;
  free_icresp:
         kfree(icresp);
diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h
index 959e0bd9a913..75470159a194 100644
--- a/include/linux/nvme-tcp.h
+++ b/include/linux/nvme-tcp.h
@@ -12,6 +12,7 @@
  #define NVME_TCP_DISC_PORT     8009
  #define NVME_TCP_ADMIN_CCSZ    SZ_8K
  #define NVME_TCP_DIGEST_LENGTH 4
+#define NVME_TCP_MIN_MAXH2CDATA 4096

  enum nvme_tcp_pfv {
         NVME_TCP_PFV_1_0 = 0x0,
--



More information about the Linux-nvme mailing list