[RFC PATCHv4 4/6] nvme-multipath: add debugfs attribute adaptive_ewma_shift
Nilay Shroff
nilay at linux.ibm.com
Tue Nov 4 02:45:19 PST 2025
By default, the EWMA (Exponentially Weighted Moving Average) shift
value, used for storing latency samples for adaptive iopolicy, is set
to 3. The EWMA is calculated using the following formula:
ewma = (old * ((1 << ewma_shift) - 1) + new) >> ewma_shift;
The default value of 3 assigns ~87.5% weight to the existing EWMA value
and ~12.5% weight to the new latency sample. This provides a stable
average that smooths out short-term variations.
However, different workloads may require faster or slower adaptation to
changing conditions. This commit introduces a new debugfs attribute,
adaptive_ewma_shift, allowing users to tune the weighting factor.
For example:
- adaptive_ewma_shift = 2 => 75% old, 25% new
- adaptive_ewma_shift = 1 => 50% old, 50% new
- adaptive_ewma_shift = 0 => 0% old, 100% new
Signed-off-by: Nilay Shroff <nilay at linux.ibm.com>
---
drivers/nvme/host/core.c | 3 +++
drivers/nvme/host/debugfs.c | 46 +++++++++++++++++++++++++++++++++++
drivers/nvme/host/multipath.c | 8 +++---
drivers/nvme/host/nvme.h | 1 +
4 files changed, 54 insertions(+), 4 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index c15dfcaf3de2..43b9b0d6cbdf 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3913,6 +3913,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
head->ids = info->ids;
head->shared = info->is_shared;
head->rotational = info->is_rotational;
+#ifdef CONFIG_NVME_MULTIPATH
+ head->adp_ewma_shift = NVME_DEFAULT_ADP_EWMA_SHIFT;
+#endif
ratelimit_state_init(&head->rs_nuse, 5 * HZ, 1);
ratelimit_set_flags(&head->rs_nuse, RATELIMIT_MSG_ON_RELEASE);
kref_init(&head->ref);
diff --git a/drivers/nvme/host/debugfs.c b/drivers/nvme/host/debugfs.c
index 6bb57c4b5c3b..e3c37041e8f2 100644
--- a/drivers/nvme/host/debugfs.c
+++ b/drivers/nvme/host/debugfs.c
@@ -105,8 +105,54 @@ static const struct file_operations nvme_debugfs_fops = {
.release = nvme_debugfs_release,
};
+#ifdef CONFIG_NVME_MULTIPATH
+static int nvme_adp_ewma_shift_show(void *data, struct seq_file *m)
+{
+ struct nvme_ns_head *head = data;
+
+ seq_printf(m, "%u\n", READ_ONCE(head->adp_ewma_shift));
+ return 0;
+}
+
+static ssize_t nvme_adp_ewma_shift_store(void *data, const char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct nvme_ns_head *head = data;
+ char kbuf[8];
+ u32 res;
+ int ret;
+ size_t len;
+ char *arg;
+
+ len = min(sizeof(kbuf) - 1, count);
+
+ if (copy_from_user(kbuf, ubuf, len))
+ return -EFAULT;
+
+ kbuf[len] = '\0';
+ arg = strstrip(kbuf);
+
+ ret = kstrtou32(arg, 0, &res);
+ if (ret)
+ return ret;
+
+ /*
+ * Values greater than 8 are nonsensical, as they effectively assign
+ * zero weight to new samples.
+ */
+ if (res > 8)
+ return -EINVAL;
+
+ WRITE_ONCE(head->adp_ewma_shift, res);
+ return count;
+}
+#endif
static const struct nvme_debugfs_attr nvme_mpath_debugfs_attrs[] = {
+#ifdef CONFIG_NVME_MULTIPATH
+ {"adaptive_ewma_shift", 0600, nvme_adp_ewma_shift_show,
+ nvme_adp_ewma_shift_store},
+#endif
{},
};
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 047dd9da9cbf..c7470cc8844e 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -294,10 +294,9 @@ static void nvme_mpath_weight_work(struct work_struct *weight_work)
* For instance, with EWMA_SHIFT = 3, this assigns 7/8 (~87.5 %) weight to
* the existing/old ewma and 1/8 (~12.5%) weight to the new sample.
*/
-static inline u64 ewma_update(u64 old, u64 new)
+static inline u64 ewma_update(u64 old, u64 new, u32 ewma_shift)
{
- return (old * ((1 << NVME_DEFAULT_ADP_EWMA_SHIFT) - 1)
- + new) >> NVME_DEFAULT_ADP_EWMA_SHIFT;
+ return (old * ((1 << ewma_shift) - 1) + new) >> ewma_shift;
}
static void nvme_mpath_add_sample(struct request *rq, struct nvme_ns *ns)
@@ -389,7 +388,8 @@ static void nvme_mpath_add_sample(struct request *rq, struct nvme_ns *ns)
if (unlikely(!stat->slat_ns))
WRITE_ONCE(stat->slat_ns, avg_lat_ns);
else {
- slat_ns = ewma_update(stat->slat_ns, avg_lat_ns);
+ slat_ns = ewma_update(stat->slat_ns, avg_lat_ns,
+ READ_ONCE(head->adp_ewma_shift));
WRITE_ONCE(stat->slat_ns, slat_ns);
}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 1c1ec2a7f9ad..97de45634f08 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -545,6 +545,7 @@ struct nvme_ns_head {
unsigned int delayed_removal_secs;
struct nvme_ns * __percpu *adp_path;
+ u32 adp_ewma_shift;
#define NVME_NSHEAD_DISK_LIVE 0
#define NVME_NSHEAD_QUEUE_IF_NO_PATH 1
--
2.51.0
More information about the Linux-nvme
mailing list