[PATCH v5 net-next 25/36] nvme-tcp: TX DDGST offload

Boris Pismenny borisp at nvidia.com
Thu Jul 22 04:03:14 PDT 2021


From: Yoray Zack <yorayz at nvidia.com>

This patch add support for TX DDGST offload.

Enable tx side of DDGST offload when supported.

if supported, NVMEoTCP will:
1. Stop compute the DDGST on transmitted pdus.
2. send dummy digest (only zeros).

Signed-off-by: Yoray Zack <yorayz at nvidia.com>
---
 drivers/nvme/host/tcp.c | 33 +++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index b338cd2d9f65..b2a4316eddce 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -70,6 +70,7 @@ enum nvme_tcp_queue_flags {
 	NVME_TCP_Q_POLLING	= 2,
 	NVME_TCP_Q_OFF_DDP	= 3,
 	NVME_TCP_Q_OFF_DDGST_RX = 4,
+	NVME_TCP_Q_OFF_DDGST_TX = 5,
 };
 
 enum nvme_tcp_recv_state {
@@ -372,6 +373,7 @@ static int nvme_tcp_offload_socket(struct nvme_tcp_queue *queue)
 	if (netdev->features & NETIF_F_HW_ULP_DDP) {
 		set_bit(NVME_TCP_Q_OFF_DDP, &queue->flags);
 		set_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags);
+		set_bit(NVME_TCP_Q_OFF_DDGST_TX, &queue->flags);
 	}
 
 	return ret;
@@ -388,6 +390,7 @@ static void nvme_tcp_unoffload_socket(struct nvme_tcp_queue *queue)
 
 	clear_bit(NVME_TCP_Q_OFF_DDP, &queue->flags);
 	clear_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags);
+	clear_bit(NVME_TCP_Q_OFF_DDGST_TX, &queue->flags);
 
 	netdev->ulp_ddp_ops->ulp_ddp_sk_del(netdev, queue->sock->sk);
 
@@ -1269,6 +1272,7 @@ static void nvme_tcp_fail_request(struct nvme_tcp_request *req)
 static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
 {
 	struct nvme_tcp_queue *queue = req->queue;
+	bool is_offload = test_bit(NVME_TCP_Q_OFF_DDGST_TX, &queue->flags);
 
 	while (true) {
 		struct page *page = nvme_tcp_req_cur_page(req);
@@ -1277,6 +1281,9 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
 		bool last = nvme_tcp_pdu_last_send(req, len);
 		int ret, flags = MSG_DONTWAIT;
 
+		if (is_offload && queue->data_digest)
+			flags |= MSG_DDP_CRC;
+
 		if (last && !queue->data_digest && !nvme_tcp_queue_more(queue))
 			flags |= MSG_EOR;
 		else
@@ -1292,15 +1299,19 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
 		if (ret <= 0)
 			return ret;
 
-		if (queue->data_digest)
+		if (queue->data_digest && !is_offload)
 			nvme_tcp_ddgst_update(queue->snd_hash, page,
 					offset, ret);
 
 		/* fully successful last write*/
 		if (last && ret == len) {
 			if (queue->data_digest) {
-				nvme_tcp_ddgst_final(queue->snd_hash,
-					&req->ddgst);
+				if (!is_offload)
+					nvme_tcp_ddgst_final(queue->snd_hash,
+						&req->ddgst);
+				else
+					req->ddgst = 0;
+
 				req->state = NVME_TCP_SEND_DDGST;
 				req->offset = 0;
 			} else {
@@ -1324,6 +1335,9 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
 	int flags = MSG_DONTWAIT;
 	int ret;
 
+	if (test_bit(NVME_TCP_Q_OFF_DDGST_TX, &queue->flags) && queue->data_digest)
+		flags |= MSG_DDP_CRC;
+
 	if (inline_data || nvme_tcp_queue_more(queue))
 		flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
 	else
@@ -1357,18 +1371,21 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
 
 static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
 {
+	int flags = MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST;
 	struct nvme_tcp_queue *queue = req->queue;
 	struct nvme_tcp_data_pdu *pdu = req->pdu;
 	u8 hdgst = nvme_tcp_hdgst_len(queue);
 	int len = sizeof(*pdu) - req->offset + hdgst;
 	int ret;
 
+	if (test_bit(NVME_TCP_Q_OFF_DDGST_TX, &queue->flags) && queue->data_digest)
+		flags |= MSG_DDP_CRC;
+
 	if (queue->hdr_digest && !req->offset)
 		nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
 
 	ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
-			offset_in_page(pdu) + req->offset, len,
-			MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
+			offset_in_page(pdu) + req->offset, len, flags);
 	if (unlikely(ret <= 0))
 		return ret;
 
@@ -1399,6 +1416,9 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
 	else
 		msg.msg_flags |= MSG_EOR;
 
+	if (test_bit(NVME_TCP_Q_OFF_DDGST_TX, &queue->flags))
+		msg.msg_flags |= MSG_DDP_CRC;
+
 	ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
 	if (unlikely(ret <= 0))
 		return ret;
@@ -1908,7 +1928,8 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
 	cancel_work_sync(&queue->io_work);
 
 	if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags) ||
-	    test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags))
+	    test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags) ||
+	    test_bit(NVME_TCP_Q_OFF_DDGST_TX, &queue->flags))
 		nvme_tcp_unoffload_socket(queue);
 }
 
-- 
2.24.1




More information about the Linux-nvme mailing list