[PATCH 4/5] nvmet_tcp: activate new poll group
Wunderlich, Mark
mark.wunderlich at intel.com
Thu Aug 27 21:01:00 EDT 2020
nvmet_tcp: activate new poll group
Shift worker focus within nvmet_tcp_io_work() from an
individual queue to the poll group. The function will
process the group's active work_list for a bounded
time period. At the end of this polling time period.
Early exit from the polling period is tracked across
multiple iterations of the worker. The worker will not
re-queue itself if a complete polling period has been
performed with no recorded activity.
A module parameter provides a means to set alternative
poll group process time periods.
Signed-off-by: Mark Wunderlich <mark.wunderlich at intel.com>
---
drivers/nvme/target/tcp.c | 94 ++++++++++++++++++++++++++++++++-------------
1 file changed, 67 insertions(+), 27 deletions(-)
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 84dd5b300a1d..3955dbe38f0f 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -29,9 +29,17 @@ static int so_priority;
module_param(so_priority, int, 0644);
MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority");
+/*
+ * Define an alternate time period (in msecs) that io_work() shall use
+ * over the default value. Changing the value may benefit high group
+ * queue count scenarios.
+ */
+static int io_work_poll_budget;
+module_param(io_work_poll_budget, int, 0644);
+MODULE_PARM_DESC(io_work_poll_budget, "nvmet tcp io_work poll time budget");
+
#define NVMET_TCP_RECV_BUDGET 8
#define NVMET_TCP_SEND_BUDGET 8
-#define NVMET_TCP_IO_WORK_BUDGET 64
enum nvmet_tcp_send_state {
NVMET_TCP_SEND_DATA_PDU,
@@ -93,7 +101,6 @@ enum nvmet_tcp_queue_state {
struct nvmet_tcp_queue {
struct socket *sock;
struct nvmet_tcp_port *port;
- struct work_struct io_work;
struct nvmet_cq nvme_cq;
struct nvmet_sq nvme_sq;
@@ -128,7 +135,10 @@ struct nvmet_tcp_queue {
int idx;
struct list_head queue_list;
+
struct nvmet_tcp_queue_group *group;
+ struct list_head glist_entry;
+ struct mutex activate_mutex;
struct nvmet_tcp_cmd connect;
@@ -157,6 +167,7 @@ struct nvmet_tcp_queue_group {
struct nvmet_tcp_group_napi napi[NVMET_TCP_GROUP_NAPI_LIMIT];
struct work_struct io_work;
int cpu;
+ unsigned long deadline;
struct list_head work_list;
struct list_head release_list;
};
@@ -202,6 +213,16 @@ static inline bool nvmet_tcp_has_inline_data(struct nvmet_tcp_cmd *cmd)
!cmd->rbytes_done;
}
+static inline void nvmet_tcp_queue_work(struct nvmet_tcp_queue *queue)
+{
+ struct nvmet_tcp_queue_group *group;
+
+ group = queue->group;
+ if (likely(group)) {
+ queue_work_on(group->cpu, nvmet_tcp_wq, &group->io_work);
+ }
+}
+
static inline struct nvmet_tcp_cmd *
nvmet_tcp_get_cmd(struct nvmet_tcp_queue *queue)
{
@@ -521,7 +542,7 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req)
struct nvmet_tcp_queue *queue = cmd->queue;
llist_add(&cmd->lentry, &queue->resp_list);
- queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &cmd->queue->io_work);
+ nvmet_tcp_queue_work(queue);
}
static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd)
@@ -1209,33 +1230,50 @@ static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue)
static void nvmet_tcp_io_work(struct work_struct *w)
{
- struct nvmet_tcp_queue *queue =
- container_of(w, struct nvmet_tcp_queue, io_work);
- bool pending;
- int ret, ops = 0;
+ struct nvmet_tcp_queue_group *group =
+ container_of(w, struct nvmet_tcp_queue_group, io_work);
+ struct nvmet_tcp_queue *queue, *next;
+ bool pending = false;
+ unsigned long deadline, bp_usec = 10000;
+ int ret, ops, grp_ops = 0;
- do {
- pending = false;
+ if (io_work_poll_budget > 0)
+ bp_usec = io_work_poll_budget;
+ deadline = jiffies + usecs_to_jiffies(bp_usec);
- ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops);
- if (ret > 0)
- pending = true;
- else if (ret < 0)
- return;
+ do {
+ ops = 0;
+ list_for_each_entry_safe(queue, next, &group->work_list, glist_entry) {
+ ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops);
+ if (ret < 0)
+ return;
+
+ ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops);
+ if (ret < 0)
+ return;
+ }
+ grp_ops += ops;
+ if (!ops)
+ break;
+ } while (!time_after(jiffies, deadline));
- ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops);
- if (ret > 0)
+ if (grp_ops > 0) {
+ pending = true;
+ group->deadline = 0;
+ } else {
+ if (!group->deadline)
+ group->deadline = deadline;
+ if (!time_after(jiffies, group->deadline))
pending = true;
- else if (ret < 0)
- return;
+ }
- } while (pending && ops < NVMET_TCP_IO_WORK_BUDGET);
+ cond_resched();
/*
- * We exahusted our budget, requeue our selves
+ * We exhausted our budget, re-queue ourselves if pending activity
*/
if (pending)
- queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
+ queue_work_on(group->cpu, nvmet_tcp_wq, &group->io_work);
}
static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue,
@@ -1376,11 +1414,9 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w)
mutex_unlock(&nvmet_tcp_queue_mutex);
nvmet_tcp_restore_socket_callbacks(queue);
- flush_work(&queue->io_work);
nvmet_tcp_uninit_data_in_cmds(queue);
nvmet_sq_destroy(&queue->nvme_sq);
- cancel_work_sync(&queue->io_work);
sock_release(queue->sock);
nvmet_tcp_free_cmds(queue);
if (queue->hdr_digest || queue->data_digest)
@@ -1397,7 +1433,7 @@ static void nvmet_tcp_data_ready(struct sock *sk)
read_lock_bh(&sk->sk_callback_lock);
queue = sk->sk_user_data;
if (likely(queue))
- queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
+ nvmet_tcp_queue_work(queue);
read_unlock_bh(&sk->sk_callback_lock);
}
@@ -1417,7 +1453,7 @@ static void nvmet_tcp_write_space(struct sock *sk)
if (sk_stream_is_writeable(sk)) {
clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
+ nvmet_tcp_queue_work(queue);
}
out:
read_unlock_bh(&sk->sk_callback_lock);
@@ -1569,7 +1605,6 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
return -ENOMEM;
INIT_WORK(&queue->release_work, nvmet_tcp_release_queue_work);
- INIT_WORK(&queue->io_work, nvmet_tcp_io_work);
queue->sock = newsock;
queue->port = port;
queue->nr_cmds = 0;
@@ -1599,6 +1634,8 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
list_add_tail(&queue->queue_list, &nvmet_tcp_queue_list);
mutex_unlock(&nvmet_tcp_queue_mutex);
+ mutex_init(&queue->activate_mutex);
+ INIT_LIST_HEAD(&queue->glist_entry);
if (!nvmet_tcp_add_to_group(queue))
goto out_destroy_sq;
@@ -1606,7 +1643,10 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
if (ret)
goto out_remove_from_group;
- queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
+ if (!mutex_trylock(&queue->activate_mutex))
+ goto out_remove_from_group;
+ list_add_tail(&queue->glist_entry, &queue->group->work_list);
+ nvmet_tcp_queue_work(queue);
return 0;
out_remove_from_group:
More information about the Linux-nvme
mailing list