[RFC PATCH 1/2] nvmet-tcp: enable io_work() idle period tracking

Wunderlich, Mark mark.wunderlich at intel.com
Mon Dec 14 20:11:14 EST 2020


nvmet-tcp: enable io_work() idle period tracking

Add 'idle poll period' option to io_work() to support
network devices that perform better in a more polled model.
In this new option a queue is assigned a period of time to
sample for activity before determining it is 'idle'.  Until
the queue is idle the work item is requeued.

Commands other than received asynchronous command requests
are tracked as valid pending activity that counts towards
the queue being not 'idle'.

The pre-existing legacy behavior is preserved when no module option
for idle poll period is specified.

Signed-off-by: Mark Wunderlich <mark.wunderlich at intel.com>
---
 drivers/nvme/target/tcp.c |   91 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 89 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index dc1f0f647189..739d67985d93 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -29,6 +29,16 @@ static int so_priority;
 module_param(so_priority, int, 0644);
 MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority");
 
+/*
+ * Define a time period (in msecs) that io_work() shall poll an activated
+ * queue before determining it to be idle.  This optional module behavior
+ * can enhance results for NIC solutions that support socket optimized
+ * packet processing.
+ */
+static int idle_poll_period;
+module_param(idle_poll_period, int, 0644);
+MODULE_PARM_DESC(idle_poll_period, "nvmet tcp io_work poll till idle time period");
+
 #define NVMET_TCP_RECV_BUDGET		8
 #define NVMET_TCP_SEND_BUDGET		8
 #define NVMET_TCP_IO_WORK_BUDGET	64
@@ -96,10 +106,13 @@ struct nvmet_tcp_queue {
 	struct work_struct	io_work;
 	struct nvmet_cq		nvme_cq;
 	struct nvmet_sq		nvme_sq;
+	unsigned long           idle_poll_period;
 
 	/* send state */
 	struct nvmet_tcp_cmd	*cmds;
 	unsigned int		nr_cmds;
+	unsigned int		pend_cmds;
+	unsigned int		pend_async_cmds;
 	struct list_head	free_list;
 	struct llist_head	resp_list;
 	struct list_head	resp_send_list;
@@ -206,11 +219,13 @@ nvmet_tcp_get_cmd(struct nvmet_tcp_queue *queue)
 	cmd->pdu_recv = 0;
 	cmd->iov = NULL;
 	cmd->flags = 0;
+	queue->pend_cmds++;
 	return cmd;
 }
 
 static inline void nvmet_tcp_put_cmd(struct nvmet_tcp_cmd *cmd)
 {
+	cmd->queue->pend_cmds--;
 	if (unlikely(cmd == &cmd->queue->connect))
 		return;
 
@@ -726,6 +741,10 @@ static int nvmet_tcp_try_send(struct nvmet_tcp_queue *queue,
 {
 	int i, ret = 0;
 
+	/* expect nothing to send if nothing pending */
+	if (!queue->pend_cmds)
+		goto done;
+
 	for (i = 0; i < budget; i++) {
 		ret = nvmet_tcp_try_send_one(queue, i == budget - 1);
 		if (unlikely(ret < 0)) {
@@ -932,6 +951,9 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
 		return -ENOMEM;
 	}
 
+	if (queue->idx == 0 && nvme_cmd->common.opcode == nvme_admin_async_event)
+		queue->pend_async_cmds++;
+
 	req = &queue->cmd->req;
 	memcpy(req->cmd, nvme_cmd, sizeof(*nvme_cmd));
 
@@ -1198,12 +1220,50 @@ static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue)
 	spin_unlock(&queue->state_lock);
 }
 
+static inline bool nvmet_tcp_cmds_pending(struct nvmet_tcp_queue *queue)
+{
+	if (queue->pend_cmds > queue->pend_async_cmds)
+		return true;
+	return false;
+}
+
+/*
+ * This worker function will process all send and recv
+ * packet activity for a queue. It will loop on the queue for up
+ * to a given time, or until there is no activity for a single
+ * iteration.
+ *
+ * Two exit modes are possible.
+ *
+ * The default 'one shot' mode where the worker will re-queue
+ * itself only if any send or recv activity was recorded.
+ *
+ * A special 'extended deadline' mode where in addition to re-queueing
+ * itself because of activity it also determines it has not reached an
+ * assigned 'idle' period of time. The worker consumes from the assigned
+ * time period, across many potential invocations, until it is expired.
+ * A queue with activity always being awarded a fresh time
+ * period for processing.
+ */
 static void nvmet_tcp_io_work(struct work_struct *w)
 {
 	struct nvmet_tcp_queue *queue =
 		container_of(w, struct nvmet_tcp_queue, io_work);
-	bool pending;
+	bool pending, idle_poll_mode = false;
 	int ret, ops = 0;
+	long period_remaining;
+	unsigned long idle_deadline, bp_usec;
+
+	/* Setup use of optional tracking for idle time period */
+	if (idle_poll_period > 0) {
+		bp_usec = idle_poll_period;
+		idle_poll_mode = true;
+
+		/* Assign the queues idle period if no time remaining */
+		if (!queue->idle_poll_period)
+			queue->idle_poll_period = usecs_to_jiffies(bp_usec);
+		idle_deadline = jiffies + queue->idle_poll_period;
+	}
 
 	do {
 		pending = false;
@@ -1220,10 +1280,37 @@ static void nvmet_tcp_io_work(struct work_struct *w)
 		else if (ret < 0)
 			return;
 
+	/*
+	 * When no activity across a loop iteration we assume this queue
+	 * has reached an idle point, so we exit to allow fairness to other waiting
+	 * queues on this cpu.
+	 */
 	} while (pending && ops < NVMET_TCP_IO_WORK_BUDGET);
 
+	/* If special deadline mode active, determine if queue has reached its
+	 * idle process deadline limit.  Remaining deadline is calculated. Any ops
+	 * activity awards the queue a new deadline period.
+	 */
+	if (idle_poll_mode) {
+		/*
+		 * Clear to award active non-idle queue new period, or
+		 * reset for future queue activity after exit when idle reached.
+		 */
+		queue->idle_poll_period = 0;
+		if (ops > 0 || nvmet_tcp_cmds_pending(queue)) {
+			pending = true;
+		} else if (!time_after(jiffies, idle_deadline)) {
+			period_remaining = (long)(idle_deadline - jiffies);
+			if (period_remaining > 0) {
+				pending = true;
+				queue->idle_poll_period = period_remaining;
+			}
+		}
+	}
+
 	/*
-	 * We exahusted our budget, requeue our selves
+	 * We requeue ourself when pending indicates there was activity
+	 * recorded, or queue has not reached optional idle time period.
 	 */
 	if (pending)
 		queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);



More information about the Linux-nvme mailing list