[PATCHv6 6/8] nvme-multipath: add debugfs attribute latency_batch_timeout

Nilay Shroff nilay at linux.ibm.com
Wed May 20 11:21:02 PDT 2026


By default, the latency I/O policy accumulates latency samples over a
15-second window. When this window expires, the driver computes the
average latency and updates the smoothed (EWMA) latency value. The
path weight is then recalculated based on this data.

A 15-second window provides a good balance for most workloads, as it
helps smooth out transient latency spikes and produces a more stable
path weight profile. However, some workloads may benefit from faster
or slower adaptation to changing latency conditions.

This commit introduces a new debugfs attribute, latency_batch_timeout,
which allows users to configure the latency batch window and thus path
weight calculation interval based on their workload requirements.

Reviewed-by: Hannes Reinecke <hare at suse.de>
Signed-off-by: Nilay Shroff <nilay at linux.ibm.com>
---
 drivers/nvme/host/debugfs.c   | 37 +++++++++++++++++++++++++++++++++++
 drivers/nvme/host/multipath.c |  8 ++++++--
 drivers/nvme/host/nvme.h      |  1 +
 3 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/debugfs.c b/drivers/nvme/host/debugfs.c
index 4371d7aafae8..63b0ad5d105b 100644
--- a/drivers/nvme/host/debugfs.c
+++ b/drivers/nvme/host/debugfs.c
@@ -146,12 +146,49 @@ static ssize_t nvme_latency_ewma_shift_store(void *data,
 	WRITE_ONCE(head->latency_ewma_shift, res);
 	return count;
 }
+
+static int nvme_latency_batch_timeout_show(void *data, struct seq_file *m)
+{
+	struct nvme_ns_head *head = data;
+
+	seq_printf(m, "%llu\n",
+		div_u64(READ_ONCE(head->latency_batch_timeout), NSEC_PER_SEC));
+	return 0;
+}
+
+static ssize_t nvme_latency_batch_timeout_store(void *data,
+		const char __user *ubuf, size_t count, loff_t *ppos)
+{
+	struct nvme_ns_head *head = data;
+	char kbuf[8];
+	u32 res;
+	int ret;
+	size_t len;
+	char *arg;
+
+	len = min(sizeof(kbuf) - 1, count);
+
+	if (copy_from_user(kbuf, ubuf, len))
+		return -EFAULT;
+
+	kbuf[len] = '\0';
+	arg = strstrip(kbuf);
+
+	ret = kstrtou32(arg, 0, &res);
+	if (ret)
+		return ret;
+
+	WRITE_ONCE(head->latency_batch_timeout, res * NSEC_PER_SEC);
+	return count;
+}
 #endif
 
 static const struct nvme_debugfs_attr nvme_mpath_debugfs_attrs[] = {
 #ifdef CONFIG_NVME_MULTIPATH
 	{"latency_ewma_shift", 0600, nvme_latency_ewma_shift_show,
 			nvme_latency_ewma_shift_store},
+	{"latency_batch_timeout", 0600, nvme_latency_batch_timeout_show,
+			nvme_latency_batch_timeout_store},
 #endif
 	{},
 };
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 3e76e07a0376..aa817bfa4b81 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -349,8 +349,11 @@ static void nvme_mpath_add_sample(struct request *rq, struct nvme_ns *ns)
 	stat->batch_count++;
 	stat->nr_samples++;
 
-	if (now > stat->last_batch_ts && ((now - stat->last_batch_ts) >=
-			NVME_DEFAULT_LATENCY_BATCH_TIMEOUT)) {
+	if (now > stat->last_batch_ts) {
+		u64 timeout = READ_ONCE(head->latency_batch_timeout);
+
+		if ((now - stat->last_batch_ts) < timeout)
+			return;
 
 		/*
 		 * Find simple average latency for the last epoch (~15 sec
@@ -1114,6 +1117,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
 	INIT_DELAYED_WORK(&head->remove_work, nvme_remove_head_work);
 	head->delayed_removal_secs = 0;
 	head->latency_ewma_shift = NVME_DEFAULT_LATENCY_EWMA_SHIFT;
+	head->latency_batch_timeout = NVME_DEFAULT_LATENCY_BATCH_TIMEOUT;
 
 	/*
 	 * If "multipath_always_on" is enabled, a multipath node is added
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 40009c024ab8..a694dd091a16 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -600,6 +600,7 @@ struct nvme_ns_head {
 
 	struct nvme_ns * __percpu	*latency_path;
 	u32				latency_ewma_shift;
+	u64				latency_batch_timeout;
 
 #define NVME_NSHEAD_DISK_LIVE		0
 #define NVME_NSHEAD_QUEUE_IF_NO_PATH	1
-- 
2.53.0




More information about the Linux-nvme mailing list