[RFC PATCHv3 3/6] nvme: add sysfs attribute adp_ewma_shift
Hannes Reinecke
hare at suse.de
Mon Oct 27 04:54:22 PDT 2025
On 10/27/25 10:29, Nilay Shroff wrote:
> By default, the EWMA (Exponentially Weighted Moving Average) shift
> value, used for storing latency samples for adaptive iopolicy, is set
> to 3. The EWMA is calculated using the following formula:
>
> ewma = (old * ((1 << ewma_shift) - 1) + new) >> ewma_shift;
>
> The default value of 3 assigns ~87.5% weight to the existing EWMA value
> and ~12.5% weight to the new latency sample. This provides a stable
> average that smooths out short-term variations.
>
> However, different workloads may require faster or slower adaptation to
> changing conditions. This commit introduces a new sysfs attribute,
> adp_ewma_shift, allowing users to tune the weighting factor.
>
> For example:
> - adp_ewma_shift = 1 => 50% old, 50% new
> - adp_ewma_shift = 0 => 0% old, 100% new
>
> Signed-off-by: Nilay Shroff <nilay at linux.ibm.com>
> ---
> drivers/nvme/host/core.c | 10 ++++++++-
> drivers/nvme/host/multipath.c | 38 +++++++++++++++++++++++++++++++----
> drivers/nvme/host/nvme.h | 2 ++
> drivers/nvme/host/sysfs.c | 1 +
> 4 files changed, 46 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 284a7c9c5d1d..ab09b9724674 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -3240,7 +3240,15 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
> return -EINVAL;
> }
> nvme_mpath_default_iopolicy(subsys);
> -
> +#ifdef CONFIG_NVME_MULTIPATH
> + /*
> + * Default value of emwa_shift is set to 3 so that we can assign ~87.5 %
> + * weight to the existing ewma and ~12.5% weight to the new latency
> + * sample. This default could be changed through sysfs. This value is
> + * used while adding latency sample for adaptive iopolicy.
> + */
> + subsys->adp_ewma_shift = NVME_DEFAULT_ADP_EWMA_SHIFT;
> +#endif
> subsys->dev.class = &nvme_subsys_class;
> subsys->dev.release = nvme_release_subsystem;
> subsys->dev.groups = nvme_subsys_attrs_groups;
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index b438371b8494..95407c0f2f4b 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -294,10 +294,9 @@ static void nvme_mpath_weight_work(struct work_struct *weight_work)
> * For instance, with EWMA_SHIFT = 3, this assigns 7/8 (~87.5 %) weight to
> * the existing/old ewma and 1/8 (~12.5%) weight to the new sample.
> */
> -static inline u64 ewma_update(u64 old, u64 new)
> +static inline u64 ewma_update(u64 old, u64 new, int ewma_shift)
> {
> - return (old * ((1 << NVME_DEFAULT_ADP_EWMA_SHIFT) - 1)
> - + new) >> NVME_DEFAULT_ADP_EWMA_SHIFT;
> + return (old * ((1 << ewma_shift) - 1) + new) >> ewma_shift;
> }
>
> static void nvme_mpath_add_sample(struct request *rq, struct nvme_ns *ns)
> @@ -389,7 +388,8 @@ static void nvme_mpath_add_sample(struct request *rq, struct nvme_ns *ns)
> if (unlikely(!stat->slat_ns))
> WRITE_ONCE(stat->slat_ns, avg_lat_ns);
> else {
> - slat_ns = ewma_update(stat->slat_ns, avg_lat_ns);
> + slat_ns = ewma_update(stat->slat_ns, avg_lat_ns,
> + READ_ONCE(head->subsys->adp_ewma_shift));
> WRITE_ONCE(stat->slat_ns, slat_ns);
> }
>
> @@ -1465,6 +1465,36 @@ static ssize_t nvme_subsys_iopolicy_store(struct device *dev,
> SUBSYS_ATTR_RW(iopolicy, S_IRUGO | S_IWUSR,
> nvme_subsys_iopolicy_show, nvme_subsys_iopolicy_store);
>
> +static ssize_t nvme_subsys_adp_ewma_shift_show(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + struct nvme_subsystem *subsys =
> + container_of(dev, struct nvme_subsystem, dev);
> +
> + return sysfs_emit(buf, "%d\n", READ_ONCE(subsys->adp_ewma_shift));
> +}
> +
> +static ssize_t nvme_subsys_adp_ewma_shift_store(struct device *dev,
> + struct device_attribute *attr, const char *buf, size_t count)
> +{
> + int shift, err;
> + struct nvme_subsystem *subsys =
> + container_of(dev, struct nvme_subsystem, dev);
> +
> + err = kstrtoint(buf, 0, &shift);
> + if (err)
> + return -EINVAL;
> +
> + if (shift < 0)
> + return -EINVAL;
> +
> + WRITE_ONCE(subsys->adp_ewma_shift, shift);
> + return count;
> +}
> +
> +SUBSYS_ATTR_RW(adp_ewma_shift, 0644, nvme_subsys_adp_ewma_shift_show,
> + nvme_subsys_adp_ewma_shift_store);
> +
> static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr,
> char *buf)
> {
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 5baf0232726f..9f5b233c747a 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -450,6 +450,7 @@ struct nvme_subsystem {
> struct ida ns_ida;
> #ifdef CONFIG_NVME_MULTIPATH
> enum nvme_iopolicy iopolicy;
> + int adp_ewma_shift; /* used for adaptive iopolicy */
> #endif
> };
>
> @@ -1043,6 +1044,7 @@ extern struct device_attribute dev_attr_queue_depth;
> extern struct device_attribute dev_attr_numa_nodes;
> extern struct device_attribute dev_attr_delayed_removal_secs;
> extern struct device_attribute subsys_attr_iopolicy;
> +extern struct device_attribute subsys_attr_adp_ewma_shift;
>
> static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
> {
> diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
> index 1cbab90ed42e..cf9711961b00 100644
> --- a/drivers/nvme/host/sysfs.c
> +++ b/drivers/nvme/host/sysfs.c
> @@ -917,6 +917,7 @@ static struct attribute *nvme_subsys_attrs[] = {
> &subsys_attr_subsystype.attr,
> #ifdef CONFIG_NVME_MULTIPATH
> &subsys_attr_iopolicy.attr,
> + &subsys_attr_adp_ewma_shift.attr,
> #endif
> NULL,
> };
Hmm. Can we please move that to debugfs?
I'd rather not litter sysfs with module-specific attributes
which come and go depending on the configuration ...
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare at suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
More information about the Linux-nvme
mailing list