[PATCHv6 5/8] nvme-multipath: add debugfs attribute latency_ewma_shift

Nilay Shroff nilay at linux.ibm.com
Wed May 20 11:21:01 PDT 2026


By default, the EWMA (Exponentially Weighted Moving Average) shift
value, used for storing latency samples for latency iopolicy, is set
to 3. The EWMA is calculated using the following formula:

ewma = (old * ((1 << ewma_shift) - 1) + new) >> ewma_shift;

The default value of 3 assigns ~87.5% weight to the existing EWMA value
and ~12.5% weight to the new latency sample. This provides a stable
average that smooths out short-term variations.

However, different workloads may require faster or slower adaptation to
changing conditions. This commit introduces a new debugfs attribute,
latency_ewma_shift, allowing users to tune the weighting factor.

For example:
   - latency_ewma_shift = 2 => 75% old, 25% new
   - latency_ewma_shift = 1 => 50% old, 50% new
   - latency_ewma_shift = 0 => 0% old, 100% new

Reviewed-by: Hannes Reinecke <hare at suse.de>
Signed-off-by: Nilay Shroff <nilay at linux.ibm.com>
---
 drivers/nvme/host/debugfs.c   | 46 +++++++++++++++++++++++++++++++++++
 drivers/nvme/host/multipath.c |  9 ++++---
 drivers/nvme/host/nvme.h      |  1 +
 3 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/debugfs.c b/drivers/nvme/host/debugfs.c
index 26a50566e4a1..4371d7aafae8 100644
--- a/drivers/nvme/host/debugfs.c
+++ b/drivers/nvme/host/debugfs.c
@@ -105,8 +105,54 @@ static const struct file_operations nvme_debugfs_fops = {
 	.release = nvme_debugfs_release,
 };
 
+#ifdef CONFIG_NVME_MULTIPATH
+static int nvme_latency_ewma_shift_show(void *data, struct seq_file *m)
+{
+	struct nvme_ns_head *head = data;
+
+	seq_printf(m, "%u\n", READ_ONCE(head->latency_ewma_shift));
+	return 0;
+}
+
+static ssize_t nvme_latency_ewma_shift_store(void *data,
+		const char __user *ubuf, size_t count, loff_t *ppos)
+{
+	struct nvme_ns_head *head = data;
+	char kbuf[8];
+	u32 res;
+	int ret;
+	size_t len;
+	char *arg;
+
+	len = min(sizeof(kbuf) - 1, count);
+
+	if (copy_from_user(kbuf, ubuf, len))
+		return -EFAULT;
+
+	kbuf[len] = '\0';
+	arg = strstrip(kbuf);
+
+	ret = kstrtou32(arg, 0, &res);
+	if (ret)
+		return ret;
+
+	/*
+	 * Values greater than 8 are nonsensical, as they effectively assign
+	 * zero weight to new samples.
+	 */
+	if (res > 8)
+		return -EINVAL;
+
+	WRITE_ONCE(head->latency_ewma_shift, res);
+	return count;
+}
+#endif
 
 static const struct nvme_debugfs_attr nvme_mpath_debugfs_attrs[] = {
+#ifdef CONFIG_NVME_MULTIPATH
+	{"latency_ewma_shift", 0600, nvme_latency_ewma_shift_show,
+			nvme_latency_ewma_shift_store},
+#endif
 	{},
 };
 
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 541d12b73b74..3e76e07a0376 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -281,10 +281,9 @@ static void nvme_mpath_weight_work(struct work_struct *weight_work)
  * For instance, with EWMA_SHIFT = 3, this assigns 7/8 (~87.5 %) weight to
  * the existing/old ewma and 1/8 (~12.5%) weight to the new sample.
  */
-static inline u64 calc_ewma_update(u64 old, u64 new)
+static inline u64 calc_ewma_update(u64 old, u64 new, u32 ewma_shift)
 {
-	return (old * ((1 << NVME_DEFAULT_LATENCY_EWMA_SHIFT) - 1)
-			+ new) >> NVME_DEFAULT_LATENCY_EWMA_SHIFT;
+	return (old * ((1 << ewma_shift) - 1) + new) >> ewma_shift;
 }
 
 static void nvme_mpath_add_sample(struct request *rq, struct nvme_ns *ns)
@@ -375,7 +374,8 @@ static void nvme_mpath_add_sample(struct request *rq, struct nvme_ns *ns)
 		if (unlikely(!stat->slat_ns))
 			WRITE_ONCE(stat->slat_ns, avg_lat_ns);
 		else {
-			slat_ns = calc_ewma_update(stat->slat_ns, avg_lat_ns);
+			slat_ns = calc_ewma_update(stat->slat_ns, avg_lat_ns,
+					READ_ONCE(head->latency_ewma_shift));
 			WRITE_ONCE(stat->slat_ns, slat_ns);
 		}
 
@@ -1113,6 +1113,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
 	INIT_WORK(&head->partition_scan_work, nvme_partition_scan_work);
 	INIT_DELAYED_WORK(&head->remove_work, nvme_remove_head_work);
 	head->delayed_removal_secs = 0;
+	head->latency_ewma_shift = NVME_DEFAULT_LATENCY_EWMA_SHIFT;
 
 	/*
 	 * If "multipath_always_on" is enabled, a multipath node is added
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 7ee9689ce07e..40009c024ab8 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -599,6 +599,7 @@ struct nvme_ns_head {
 	unsigned int		delayed_removal_secs;
 
 	struct nvme_ns * __percpu	*latency_path;
+	u32				latency_ewma_shift;
 
 #define NVME_NSHEAD_DISK_LIVE		0
 #define NVME_NSHEAD_QUEUE_IF_NO_PATH	1
-- 
2.53.0




More information about the Linux-nvme mailing list