[PATCH v3] nvme-multipath: expose path_state via sysfs

Guixin Liu kanie at linux.alibaba.com
Wed Jun 24 04:08:37 PDT 2026



在 2026/6/24 15:36, John Garry 写道:
> On 24/06/2026 06:48, Guixin Liu wrote:
>> Add a read-only "path_state" sysfs attribute to each NVMe path namespace
>> device (/sys/class/nvme/nvmeX/nvmeXcYnZ/path_state) that exposes the
>> current path state, including whether the path is enabled or disabled
>> with a specific reason.
>>
>> Factor the path disable checks from nvme_path_is_disabled() into a new
>> nvme_path_get_state() helper that returns an enum nvme_path_state. This
>> keeps the path selection logic and sysfs reporting in sync, so any 
>> future
>> updates to the path disable criteria are automatically reflected in the
>> sysfs output.
>>
>> Possible values:
>>    - "enabled (optimized)"     : ANA state is optimized
>>    - "enabled (non-optimized)" : ANA state is not optimized
>>    - "disabled (ctrl_down)"    : controller is not live
>>    - "disabled (ana_pending)"  : ANA state change pending
>>    - "disabled (ns_not_ready)" : namespace is not ready
>>
>> This gives userspace visibility into the multipath path selection state
>> without requiring users to piece together controller state and namespace
>> flags manually.
>>
>> Signed-off-by: Guixin Liu <kanie at linux.alibaba.com>
>> ---
>> v2->v3:
>> - Factor path disable checks into nvme_path_get_state()
>>    helper returning enum nvme_path_state, and rebuild
>>    nvme_path_is_disabled() on top of it to keep path
>>    selection logic and sysfs reporting in sync.
>>    (Nilay Shroff)
>> - Distinguish "enabled (optimized)" vs
>>    "enabled (non-optimized)" based on ANA state.
>>    (Keith Busch)
>>
>> v1->v2:
>> - Show specific disabled reason instead of just
>>    "disabled": "disabled (ctrl_down)",
>>    "disabled (ana_pending)",
>>    "disabled (ns_not_ready)". (Nilay Shroff)
>> ---
>>   drivers/nvme/host/multipath.c | 47 ++++++++++++++++++++++++++++++-----
>>   drivers/nvme/host/nvme.h      |  1 +
>>   drivers/nvme/host/sysfs.c     |  4 ++-
>>   3 files changed, 45 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/nvme/host/multipath.c 
>> b/drivers/nvme/host/multipath.c
>> index e033ede953cc..2da5e9da1866 100644
>> --- a/drivers/nvme/host/multipath.c
>> +++ b/drivers/nvme/host/multipath.c
>> @@ -288,7 +288,15 @@ void nvme_mpath_revalidate_paths(struct 
>> nvme_ns_head *head)
>>       kblockd_schedule_work(&head->requeue_work);
>>   }
>>   -static bool nvme_path_is_disabled(struct nvme_ns *ns)
>> +enum nvme_path_state {
>> +    NVME_PATH_ENABLED_OPTIMIZED,
>> +    NVME_PATH_ENABLED_NONOPTIMIZED,
>> +    NVME_PATH_DISABLED_CTRL_DOWN,
>> +    NVME_PATH_DISABLED_ANA_PENDING,
>> +    NVME_PATH_DISABLED_NS_NOT_READY,
>> +};
>> +
>> +static enum nvme_path_state nvme_path_get_state(struct nvme_ns *ns)
>>   {
>>       enum nvme_ctrl_state state = nvme_ctrl_state(ns->ctrl);
>>   @@ -298,11 +306,20 @@ static bool nvme_path_is_disabled(struct 
>> nvme_ns *ns)
>>        * Otherwise it will fail immediately and return to the requeue 
>> list.
>>        */
>>       if (state != NVME_CTRL_LIVE && state != NVME_CTRL_DELETING)
>> -        return true;
>> -    if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
>> -        !test_bit(NVME_NS_READY, &ns->flags))
>> -        return true;
>> -    return false;
>> +        return NVME_PATH_DISABLED_CTRL_DOWN;
>> +    if (test_bit(NVME_NS_ANA_PENDING, &ns->flags))
>> +        return NVME_PATH_DISABLED_ANA_PENDING;
>> +    if (!test_bit(NVME_NS_READY, &ns->flags))
>> +        return NVME_PATH_DISABLED_NS_NOT_READY;
>> +    if (nvme_ctrl_use_ana(ns->ctrl) &&
>> +        ns->ana_state != NVME_ANA_OPTIMIZED)
>> +        return NVME_PATH_ENABLED_NONOPTIMIZED;
>> +    return NVME_PATH_ENABLED_OPTIMIZED;
>> +}
>> +
>> +static bool nvme_path_is_disabled(struct nvme_ns *ns)
>> +{
>> +    return nvme_path_get_state(ns) > NVME_PATH_ENABLED_NONOPTIMIZED;
>
> I don't think that this is a particularly robust programming style, 
> since nothing in enum nvme_path_state explicitly states that a very 
> specific ordering or grouping is required
>
I think so, should add NVME_PATH_DISABLED_FIRST to identify the
start of the disabled state.
>
>>   }
>>     static struct nvme_ns *__nvme_find_path(struct nvme_ns_head 
>> *head, int node)
>> @@ -1101,6 +1118,24 @@ static ssize_t queue_depth_show(struct device 
>> *dev,
>>   }
>>   DEVICE_ATTR_RO(queue_depth);
>>   +static const char * const nvme_path_state_names[] = {
>> +    [NVME_PATH_ENABLED_OPTIMIZED]        = "enabled (optimized)",
>> +    [NVME_PATH_ENABLED_NONOPTIMIZED]    = "enabled (non-optimized)",
>> +    [NVME_PATH_DISABLED_CTRL_DOWN]        = "disabled (ctrl_down)",
>> +    [NVME_PATH_DISABLED_ANA_PENDING]    = "disabled (ana_pending)",
>> +    [NVME_PATH_DISABLED_NS_NOT_READY]    = "disabled (ns_not_ready)",
>
> some strings are abbreviated and some aren't
   "enabled (optimized)"
   "enabled (non-optimized)"
   "disabled (controller down)"
   "disabled (ana pending)"
   "disabled (namespace not ready)"
is this OK?
>
>> +};
>> +
>> +static ssize_t path_state_show(struct device *dev,
>> +        struct device_attribute *attr, char *buf)
>> +{
>> +    struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
>> +
>> +    return sysfs_emit(buf, "%s\n",
>> + nvme_path_state_names[nvme_path_get_state(ns)]);
>
> you should really check that nvme_path_get_state(ns) does not exceed 
> indexing into ARRAY_SIZE(nvme_path_state_names)
Changed in v4, thanks.

Best Regards,
Guixin Liu



More information about the Linux-nvme mailing list