[PATCH v3] nvme-multipath: expose path_state via sysfs
John Garry
john.g.garry at oracle.com
Wed Jun 24 00:36:44 PDT 2026
On 24/06/2026 06:48, Guixin Liu wrote:
> Add a read-only "path_state" sysfs attribute to each NVMe path namespace
> device (/sys/class/nvme/nvmeX/nvmeXcYnZ/path_state) that exposes the
> current path state, including whether the path is enabled or disabled
> with a specific reason.
>
> Factor the path disable checks from nvme_path_is_disabled() into a new
> nvme_path_get_state() helper that returns an enum nvme_path_state. This
> keeps the path selection logic and sysfs reporting in sync, so any future
> updates to the path disable criteria are automatically reflected in the
> sysfs output.
>
> Possible values:
> - "enabled (optimized)" : ANA state is optimized
> - "enabled (non-optimized)" : ANA state is not optimized
> - "disabled (ctrl_down)" : controller is not live
> - "disabled (ana_pending)" : ANA state change pending
> - "disabled (ns_not_ready)" : namespace is not ready
>
> This gives userspace visibility into the multipath path selection state
> without requiring users to piece together controller state and namespace
> flags manually.
>
> Signed-off-by: Guixin Liu <kanie at linux.alibaba.com>
> ---
> v2->v3:
> - Factor path disable checks into nvme_path_get_state()
> helper returning enum nvme_path_state, and rebuild
> nvme_path_is_disabled() on top of it to keep path
> selection logic and sysfs reporting in sync.
> (Nilay Shroff)
> - Distinguish "enabled (optimized)" vs
> "enabled (non-optimized)" based on ANA state.
> (Keith Busch)
>
> v1->v2:
> - Show specific disabled reason instead of just
> "disabled": "disabled (ctrl_down)",
> "disabled (ana_pending)",
> "disabled (ns_not_ready)". (Nilay Shroff)
> ---
> drivers/nvme/host/multipath.c | 47 ++++++++++++++++++++++++++++++-----
> drivers/nvme/host/nvme.h | 1 +
> drivers/nvme/host/sysfs.c | 4 ++-
> 3 files changed, 45 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index e033ede953cc..2da5e9da1866 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -288,7 +288,15 @@ void nvme_mpath_revalidate_paths(struct nvme_ns_head *head)
> kblockd_schedule_work(&head->requeue_work);
> }
>
> -static bool nvme_path_is_disabled(struct nvme_ns *ns)
> +enum nvme_path_state {
> + NVME_PATH_ENABLED_OPTIMIZED,
> + NVME_PATH_ENABLED_NONOPTIMIZED,
> + NVME_PATH_DISABLED_CTRL_DOWN,
> + NVME_PATH_DISABLED_ANA_PENDING,
> + NVME_PATH_DISABLED_NS_NOT_READY,
> +};
> +
> +static enum nvme_path_state nvme_path_get_state(struct nvme_ns *ns)
> {
> enum nvme_ctrl_state state = nvme_ctrl_state(ns->ctrl);
>
> @@ -298,11 +306,20 @@ static bool nvme_path_is_disabled(struct nvme_ns *ns)
> * Otherwise it will fail immediately and return to the requeue list.
> */
> if (state != NVME_CTRL_LIVE && state != NVME_CTRL_DELETING)
> - return true;
> - if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
> - !test_bit(NVME_NS_READY, &ns->flags))
> - return true;
> - return false;
> + return NVME_PATH_DISABLED_CTRL_DOWN;
> + if (test_bit(NVME_NS_ANA_PENDING, &ns->flags))
> + return NVME_PATH_DISABLED_ANA_PENDING;
> + if (!test_bit(NVME_NS_READY, &ns->flags))
> + return NVME_PATH_DISABLED_NS_NOT_READY;
> + if (nvme_ctrl_use_ana(ns->ctrl) &&
> + ns->ana_state != NVME_ANA_OPTIMIZED)
> + return NVME_PATH_ENABLED_NONOPTIMIZED;
> + return NVME_PATH_ENABLED_OPTIMIZED;
> +}
> +
> +static bool nvme_path_is_disabled(struct nvme_ns *ns)
> +{
> + return nvme_path_get_state(ns) > NVME_PATH_ENABLED_NONOPTIMIZED;
I don't think that this is a particularly robust programming style,
since nothing in enum nvme_path_state explicitly states that a very
specific ordering or grouping is required
> }
>
> static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
> @@ -1101,6 +1118,24 @@ static ssize_t queue_depth_show(struct device *dev,
> }
> DEVICE_ATTR_RO(queue_depth);
>
> +static const char * const nvme_path_state_names[] = {
> + [NVME_PATH_ENABLED_OPTIMIZED] = "enabled (optimized)",
> + [NVME_PATH_ENABLED_NONOPTIMIZED] = "enabled (non-optimized)",
> + [NVME_PATH_DISABLED_CTRL_DOWN] = "disabled (ctrl_down)",
> + [NVME_PATH_DISABLED_ANA_PENDING] = "disabled (ana_pending)",
> + [NVME_PATH_DISABLED_NS_NOT_READY] = "disabled (ns_not_ready)",
some strings are abbreviated and some aren't
> +};
> +
> +static ssize_t path_state_show(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
> +
> + return sysfs_emit(buf, "%s\n",
> + nvme_path_state_names[nvme_path_get_state(ns)]);
you should really check that nvme_path_get_state(ns) does not exceed
indexing into ARRAY_SIZE(nvme_path_state_names)
> +}
> +DEVICE_ATTR_RO(path_state);
> +
> static ssize_t numa_nodes_show(struct device *dev, struct device_attribute *attr,
> char *buf)
> {
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index b367c67dcb37..da59364d4774 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -1072,6 +1072,7 @@ extern struct device_attribute dev_attr_ana_grpid;
> extern struct device_attribute dev_attr_ana_state;
> extern struct device_attribute dev_attr_queue_depth;
> extern struct device_attribute dev_attr_numa_nodes;
> +extern struct device_attribute dev_attr_path_state;
> extern struct device_attribute dev_attr_delayed_removal_secs;
> extern struct device_attribute dev_attr_multipath_failover_count;
> extern struct device_attribute dev_attr_io_requeue_no_usable_path_count;
> diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
> index 933a5adfb7af..500d773300e7 100644
> --- a/drivers/nvme/host/sysfs.c
> +++ b/drivers/nvme/host/sysfs.c
> @@ -261,6 +261,7 @@ static struct attribute *nvme_ns_attrs[] = {
> &dev_attr_ana_state.attr,
> &dev_attr_queue_depth.attr,
> &dev_attr_numa_nodes.attr,
> + &dev_attr_path_state.attr,
> &dev_attr_delayed_removal_secs.attr,
> #endif
> &dev_attr_io_passthru_err_log_enabled.attr,
> @@ -294,7 +295,8 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
> if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
> return 0;
> }
> - if (a == &dev_attr_queue_depth.attr || a == &dev_attr_numa_nodes.attr) {
> + if (a == &dev_attr_queue_depth.attr || a == &dev_attr_numa_nodes.attr ||
> + a == &dev_attr_path_state.attr) {
> if (nvme_disk_is_ns_head(dev_to_disk(dev)))
> return 0;
> }
More information about the Linux-nvme
mailing list