[PATCH v3] nvme-multipath: expose path_state via sysfs
Guixin Liu
kanie at linux.alibaba.com
Wed Jun 24 04:08:37 PDT 2026
在 2026/6/24 15:36, John Garry 写道:
> On 24/06/2026 06:48, Guixin Liu wrote:
>> Add a read-only "path_state" sysfs attribute to each NVMe path namespace
>> device (/sys/class/nvme/nvmeX/nvmeXcYnZ/path_state) that exposes the
>> current path state, including whether the path is enabled or disabled
>> with a specific reason.
>>
>> Factor the path disable checks from nvme_path_is_disabled() into a new
>> nvme_path_get_state() helper that returns an enum nvme_path_state. This
>> keeps the path selection logic and sysfs reporting in sync, so any
>> future
>> updates to the path disable criteria are automatically reflected in the
>> sysfs output.
>>
>> Possible values:
>> - "enabled (optimized)" : ANA state is optimized
>> - "enabled (non-optimized)" : ANA state is not optimized
>> - "disabled (ctrl_down)" : controller is not live
>> - "disabled (ana_pending)" : ANA state change pending
>> - "disabled (ns_not_ready)" : namespace is not ready
>>
>> This gives userspace visibility into the multipath path selection state
>> without requiring users to piece together controller state and namespace
>> flags manually.
>>
>> Signed-off-by: Guixin Liu <kanie at linux.alibaba.com>
>> ---
>> v2->v3:
>> - Factor path disable checks into nvme_path_get_state()
>> helper returning enum nvme_path_state, and rebuild
>> nvme_path_is_disabled() on top of it to keep path
>> selection logic and sysfs reporting in sync.
>> (Nilay Shroff)
>> - Distinguish "enabled (optimized)" vs
>> "enabled (non-optimized)" based on ANA state.
>> (Keith Busch)
>>
>> v1->v2:
>> - Show specific disabled reason instead of just
>> "disabled": "disabled (ctrl_down)",
>> "disabled (ana_pending)",
>> "disabled (ns_not_ready)". (Nilay Shroff)
>> ---
>> drivers/nvme/host/multipath.c | 47 ++++++++++++++++++++++++++++++-----
>> drivers/nvme/host/nvme.h | 1 +
>> drivers/nvme/host/sysfs.c | 4 ++-
>> 3 files changed, 45 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/nvme/host/multipath.c
>> b/drivers/nvme/host/multipath.c
>> index e033ede953cc..2da5e9da1866 100644
>> --- a/drivers/nvme/host/multipath.c
>> +++ b/drivers/nvme/host/multipath.c
>> @@ -288,7 +288,15 @@ void nvme_mpath_revalidate_paths(struct
>> nvme_ns_head *head)
>> kblockd_schedule_work(&head->requeue_work);
>> }
>> -static bool nvme_path_is_disabled(struct nvme_ns *ns)
>> +enum nvme_path_state {
>> + NVME_PATH_ENABLED_OPTIMIZED,
>> + NVME_PATH_ENABLED_NONOPTIMIZED,
>> + NVME_PATH_DISABLED_CTRL_DOWN,
>> + NVME_PATH_DISABLED_ANA_PENDING,
>> + NVME_PATH_DISABLED_NS_NOT_READY,
>> +};
>> +
>> +static enum nvme_path_state nvme_path_get_state(struct nvme_ns *ns)
>> {
>> enum nvme_ctrl_state state = nvme_ctrl_state(ns->ctrl);
>> @@ -298,11 +306,20 @@ static bool nvme_path_is_disabled(struct
>> nvme_ns *ns)
>> * Otherwise it will fail immediately and return to the requeue
>> list.
>> */
>> if (state != NVME_CTRL_LIVE && state != NVME_CTRL_DELETING)
>> - return true;
>> - if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
>> - !test_bit(NVME_NS_READY, &ns->flags))
>> - return true;
>> - return false;
>> + return NVME_PATH_DISABLED_CTRL_DOWN;
>> + if (test_bit(NVME_NS_ANA_PENDING, &ns->flags))
>> + return NVME_PATH_DISABLED_ANA_PENDING;
>> + if (!test_bit(NVME_NS_READY, &ns->flags))
>> + return NVME_PATH_DISABLED_NS_NOT_READY;
>> + if (nvme_ctrl_use_ana(ns->ctrl) &&
>> + ns->ana_state != NVME_ANA_OPTIMIZED)
>> + return NVME_PATH_ENABLED_NONOPTIMIZED;
>> + return NVME_PATH_ENABLED_OPTIMIZED;
>> +}
>> +
>> +static bool nvme_path_is_disabled(struct nvme_ns *ns)
>> +{
>> + return nvme_path_get_state(ns) > NVME_PATH_ENABLED_NONOPTIMIZED;
>
> I don't think that this is a particularly robust programming style,
> since nothing in enum nvme_path_state explicitly states that a very
> specific ordering or grouping is required
>
I think so, should add NVME_PATH_DISABLED_FIRST to identify the
start of the disabled state.
>
>> }
>> static struct nvme_ns *__nvme_find_path(struct nvme_ns_head
>> *head, int node)
>> @@ -1101,6 +1118,24 @@ static ssize_t queue_depth_show(struct device
>> *dev,
>> }
>> DEVICE_ATTR_RO(queue_depth);
>> +static const char * const nvme_path_state_names[] = {
>> + [NVME_PATH_ENABLED_OPTIMIZED] = "enabled (optimized)",
>> + [NVME_PATH_ENABLED_NONOPTIMIZED] = "enabled (non-optimized)",
>> + [NVME_PATH_DISABLED_CTRL_DOWN] = "disabled (ctrl_down)",
>> + [NVME_PATH_DISABLED_ANA_PENDING] = "disabled (ana_pending)",
>> + [NVME_PATH_DISABLED_NS_NOT_READY] = "disabled (ns_not_ready)",
>
> some strings are abbreviated and some aren't
"enabled (optimized)"
"enabled (non-optimized)"
"disabled (controller down)"
"disabled (ana pending)"
"disabled (namespace not ready)"
is this OK?
>
>> +};
>> +
>> +static ssize_t path_state_show(struct device *dev,
>> + struct device_attribute *attr, char *buf)
>> +{
>> + struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
>> +
>> + return sysfs_emit(buf, "%s\n",
>> + nvme_path_state_names[nvme_path_get_state(ns)]);
>
> you should really check that nvme_path_get_state(ns) does not exceed
> indexing into ARRAY_SIZE(nvme_path_state_names)
Changed in v4, thanks.
Best Regards,
Guixin Liu
More information about the Linux-nvme
mailing list