[PATCH v3] nvme-multipath: expose path_state via sysfs

John Garry john.g.garry at oracle.com
Wed Jun 24 00:36:44 PDT 2026


On 24/06/2026 06:48, Guixin Liu wrote:
> Add a read-only "path_state" sysfs attribute to each NVMe path namespace
> device (/sys/class/nvme/nvmeX/nvmeXcYnZ/path_state) that exposes the
> current path state, including whether the path is enabled or disabled
> with a specific reason.
> 
> Factor the path disable checks from nvme_path_is_disabled() into a new
> nvme_path_get_state() helper that returns an enum nvme_path_state. This
> keeps the path selection logic and sysfs reporting in sync, so any future
> updates to the path disable criteria are automatically reflected in the
> sysfs output.
> 
> Possible values:
>    - "enabled (optimized)"     : ANA state is optimized
>    - "enabled (non-optimized)" : ANA state is not optimized
>    - "disabled (ctrl_down)"    : controller is not live
>    - "disabled (ana_pending)"  : ANA state change pending
>    - "disabled (ns_not_ready)" : namespace is not ready
> 
> This gives userspace visibility into the multipath path selection state
> without requiring users to piece together controller state and namespace
> flags manually.
> 
> Signed-off-by: Guixin Liu <kanie at linux.alibaba.com>
> ---
> v2->v3:
> - Factor path disable checks into nvme_path_get_state()
>    helper returning enum nvme_path_state, and rebuild
>    nvme_path_is_disabled() on top of it to keep path
>    selection logic and sysfs reporting in sync.
>    (Nilay Shroff)
> - Distinguish "enabled (optimized)" vs
>    "enabled (non-optimized)" based on ANA state.
>    (Keith Busch)
> 
> v1->v2:
> - Show specific disabled reason instead of just
>    "disabled": "disabled (ctrl_down)",
>    "disabled (ana_pending)",
>    "disabled (ns_not_ready)". (Nilay Shroff)
> ---
>   drivers/nvme/host/multipath.c | 47 ++++++++++++++++++++++++++++++-----
>   drivers/nvme/host/nvme.h      |  1 +
>   drivers/nvme/host/sysfs.c     |  4 ++-
>   3 files changed, 45 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index e033ede953cc..2da5e9da1866 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -288,7 +288,15 @@ void nvme_mpath_revalidate_paths(struct nvme_ns_head *head)
>   	kblockd_schedule_work(&head->requeue_work);
>   }
>   
> -static bool nvme_path_is_disabled(struct nvme_ns *ns)
> +enum nvme_path_state {
> +	NVME_PATH_ENABLED_OPTIMIZED,
> +	NVME_PATH_ENABLED_NONOPTIMIZED,
> +	NVME_PATH_DISABLED_CTRL_DOWN,
> +	NVME_PATH_DISABLED_ANA_PENDING,
> +	NVME_PATH_DISABLED_NS_NOT_READY,
> +};
> +
> +static enum nvme_path_state nvme_path_get_state(struct nvme_ns *ns)
>   {
>   	enum nvme_ctrl_state state = nvme_ctrl_state(ns->ctrl);
>   
> @@ -298,11 +306,20 @@ static bool nvme_path_is_disabled(struct nvme_ns *ns)
>   	 * Otherwise it will fail immediately and return to the requeue list.
>   	 */
>   	if (state != NVME_CTRL_LIVE && state != NVME_CTRL_DELETING)
> -		return true;
> -	if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
> -	    !test_bit(NVME_NS_READY, &ns->flags))
> -		return true;
> -	return false;
> +		return NVME_PATH_DISABLED_CTRL_DOWN;
> +	if (test_bit(NVME_NS_ANA_PENDING, &ns->flags))
> +		return NVME_PATH_DISABLED_ANA_PENDING;
> +	if (!test_bit(NVME_NS_READY, &ns->flags))
> +		return NVME_PATH_DISABLED_NS_NOT_READY;
> +	if (nvme_ctrl_use_ana(ns->ctrl) &&
> +	    ns->ana_state != NVME_ANA_OPTIMIZED)
> +		return NVME_PATH_ENABLED_NONOPTIMIZED;
> +	return NVME_PATH_ENABLED_OPTIMIZED;
> +}
> +
> +static bool nvme_path_is_disabled(struct nvme_ns *ns)
> +{
> +	return nvme_path_get_state(ns) > NVME_PATH_ENABLED_NONOPTIMIZED;

I don't think that this is a particularly robust programming style, 
since nothing in enum nvme_path_state explicitly states that a very 
specific ordering or grouping is required


>   }
>   
>   static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
> @@ -1101,6 +1118,24 @@ static ssize_t queue_depth_show(struct device *dev,
>   }
>   DEVICE_ATTR_RO(queue_depth);
>   
> +static const char * const nvme_path_state_names[] = {
> +	[NVME_PATH_ENABLED_OPTIMIZED]		= "enabled (optimized)",
> +	[NVME_PATH_ENABLED_NONOPTIMIZED]	= "enabled (non-optimized)",
> +	[NVME_PATH_DISABLED_CTRL_DOWN]		= "disabled (ctrl_down)",
> +	[NVME_PATH_DISABLED_ANA_PENDING]	= "disabled (ana_pending)",
> +	[NVME_PATH_DISABLED_NS_NOT_READY]	= "disabled (ns_not_ready)",

some strings are abbreviated and some aren't

> +};
> +
> +static ssize_t path_state_show(struct device *dev,
> +		struct device_attribute *attr, char *buf)
> +{
> +	struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
> +
> +	return sysfs_emit(buf, "%s\n",
> +			  nvme_path_state_names[nvme_path_get_state(ns)]);

you should really check that nvme_path_get_state(ns) does not exceed 
indexing into ARRAY_SIZE(nvme_path_state_names)

> +}
> +DEVICE_ATTR_RO(path_state);
> +
>   static ssize_t numa_nodes_show(struct device *dev, struct device_attribute *attr,
>   		char *buf)
>   {
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index b367c67dcb37..da59364d4774 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -1072,6 +1072,7 @@ extern struct device_attribute dev_attr_ana_grpid;
>   extern struct device_attribute dev_attr_ana_state;
>   extern struct device_attribute dev_attr_queue_depth;
>   extern struct device_attribute dev_attr_numa_nodes;
> +extern struct device_attribute dev_attr_path_state;
>   extern struct device_attribute dev_attr_delayed_removal_secs;
>   extern struct device_attribute dev_attr_multipath_failover_count;
>   extern struct device_attribute dev_attr_io_requeue_no_usable_path_count;
> diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
> index 933a5adfb7af..500d773300e7 100644
> --- a/drivers/nvme/host/sysfs.c
> +++ b/drivers/nvme/host/sysfs.c
> @@ -261,6 +261,7 @@ static struct attribute *nvme_ns_attrs[] = {
>   	&dev_attr_ana_state.attr,
>   	&dev_attr_queue_depth.attr,
>   	&dev_attr_numa_nodes.attr,
> +	&dev_attr_path_state.attr,
>   	&dev_attr_delayed_removal_secs.attr,
>   #endif
>   	&dev_attr_io_passthru_err_log_enabled.attr,
> @@ -294,7 +295,8 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj,
>   		if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
>   			return 0;
>   	}
> -	if (a == &dev_attr_queue_depth.attr || a == &dev_attr_numa_nodes.attr) {
> +	if (a == &dev_attr_queue_depth.attr || a == &dev_attr_numa_nodes.attr ||
> +	    a == &dev_attr_path_state.attr) {
>   		if (nvme_disk_is_ns_head(dev_to_disk(dev)))
>   			return 0;
>   	}




More information about the Linux-nvme mailing list