[RFC PATCHv3 3/6] nvme: add sysfs attribute adp_ewma_shift
Nilay Shroff
nilay at linux.ibm.com
Mon Oct 27 02:29:37 PDT 2025
By default, the EWMA (Exponentially Weighted Moving Average) shift
value, used for storing latency samples for adaptive iopolicy, is set
to 3. The EWMA is calculated using the following formula:
ewma = (old * ((1 << ewma_shift) - 1) + new) >> ewma_shift;
The default value of 3 assigns ~87.5% weight to the existing EWMA value
and ~12.5% weight to the new latency sample. This provides a stable
average that smooths out short-term variations.
However, different workloads may require faster or slower adaptation to
changing conditions. This commit introduces a new sysfs attribute,
adp_ewma_shift, allowing users to tune the weighting factor.
For example:
- adp_ewma_shift = 1 => 50% old, 50% new
- adp_ewma_shift = 0 => 0% old, 100% new
Signed-off-by: Nilay Shroff <nilay at linux.ibm.com>
---
drivers/nvme/host/core.c | 10 ++++++++-
drivers/nvme/host/multipath.c | 38 +++++++++++++++++++++++++++++++----
drivers/nvme/host/nvme.h | 2 ++
drivers/nvme/host/sysfs.c | 1 +
4 files changed, 46 insertions(+), 5 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 284a7c9c5d1d..ab09b9724674 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3240,7 +3240,15 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
return -EINVAL;
}
nvme_mpath_default_iopolicy(subsys);
-
+#ifdef CONFIG_NVME_MULTIPATH
+ /*
+ * Default value of emwa_shift is set to 3 so that we can assign ~87.5 %
+ * weight to the existing ewma and ~12.5% weight to the new latency
+ * sample. This default could be changed through sysfs. This value is
+ * used while adding latency sample for adaptive iopolicy.
+ */
+ subsys->adp_ewma_shift = NVME_DEFAULT_ADP_EWMA_SHIFT;
+#endif
subsys->dev.class = &nvme_subsys_class;
subsys->dev.release = nvme_release_subsystem;
subsys->dev.groups = nvme_subsys_attrs_groups;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index b438371b8494..95407c0f2f4b 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -294,10 +294,9 @@ static void nvme_mpath_weight_work(struct work_struct *weight_work)
* For instance, with EWMA_SHIFT = 3, this assigns 7/8 (~87.5 %) weight to
* the existing/old ewma and 1/8 (~12.5%) weight to the new sample.
*/
-static inline u64 ewma_update(u64 old, u64 new)
+static inline u64 ewma_update(u64 old, u64 new, int ewma_shift)
{
- return (old * ((1 << NVME_DEFAULT_ADP_EWMA_SHIFT) - 1)
- + new) >> NVME_DEFAULT_ADP_EWMA_SHIFT;
+ return (old * ((1 << ewma_shift) - 1) + new) >> ewma_shift;
}
static void nvme_mpath_add_sample(struct request *rq, struct nvme_ns *ns)
@@ -389,7 +388,8 @@ static void nvme_mpath_add_sample(struct request *rq, struct nvme_ns *ns)
if (unlikely(!stat->slat_ns))
WRITE_ONCE(stat->slat_ns, avg_lat_ns);
else {
- slat_ns = ewma_update(stat->slat_ns, avg_lat_ns);
+ slat_ns = ewma_update(stat->slat_ns, avg_lat_ns,
+ READ_ONCE(head->subsys->adp_ewma_shift));
WRITE_ONCE(stat->slat_ns, slat_ns);
}
@@ -1465,6 +1465,36 @@ static ssize_t nvme_subsys_iopolicy_store(struct device *dev,
SUBSYS_ATTR_RW(iopolicy, S_IRUGO | S_IWUSR,
nvme_subsys_iopolicy_show, nvme_subsys_iopolicy_store);
+static ssize_t nvme_subsys_adp_ewma_shift_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvme_subsystem *subsys =
+ container_of(dev, struct nvme_subsystem, dev);
+
+ return sysfs_emit(buf, "%d\n", READ_ONCE(subsys->adp_ewma_shift));
+}
+
+static ssize_t nvme_subsys_adp_ewma_shift_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ int shift, err;
+ struct nvme_subsystem *subsys =
+ container_of(dev, struct nvme_subsystem, dev);
+
+ err = kstrtoint(buf, 0, &shift);
+ if (err)
+ return -EINVAL;
+
+ if (shift < 0)
+ return -EINVAL;
+
+ WRITE_ONCE(subsys->adp_ewma_shift, shift);
+ return count;
+}
+
+SUBSYS_ATTR_RW(adp_ewma_shift, 0644, nvme_subsys_adp_ewma_shift_show,
+ nvme_subsys_adp_ewma_shift_store);
+
static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 5baf0232726f..9f5b233c747a 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -450,6 +450,7 @@ struct nvme_subsystem {
struct ida ns_ida;
#ifdef CONFIG_NVME_MULTIPATH
enum nvme_iopolicy iopolicy;
+ int adp_ewma_shift; /* used for adaptive iopolicy */
#endif
};
@@ -1043,6 +1044,7 @@ extern struct device_attribute dev_attr_queue_depth;
extern struct device_attribute dev_attr_numa_nodes;
extern struct device_attribute dev_attr_delayed_removal_secs;
extern struct device_attribute subsys_attr_iopolicy;
+extern struct device_attribute subsys_attr_adp_ewma_shift;
static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
{
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index 1cbab90ed42e..cf9711961b00 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -917,6 +917,7 @@ static struct attribute *nvme_subsys_attrs[] = {
&subsys_attr_subsystype.attr,
#ifdef CONFIG_NVME_MULTIPATH
&subsys_attr_iopolicy.attr,
+ &subsys_attr_adp_ewma_shift.attr,
#endif
NULL,
};
--
2.51.0
More information about the Linux-nvme
mailing list