[PATCHv2 1/4] tree: add support for discovering nvme paths using sysfs multipath link

Hannes Reinecke hare at suse.de
Mon Apr 21 23:24:32 PDT 2025


On 4/17/25 15:59, Nilay Shroff wrote:
> With the upcoming Linux kernel v6.15, NVMe native multipath now provides
> a simplified mechanism for discovering all paths to a shared namespace
> through sysfs.
> 
> A new "multipath" directory is created under each NVMe head namespace
> device in "/sys/block/<head>/multipath/". This directory contains symlinks
> to all namespace path devices that access the same shared namespace.
> 
> For example, consider a shared namespace accessible via two paths under
> nvme-subsys1:
> 
> nvme-subsys1 - NQN=nqn.1994-11.com.samsung:nvme:PM1735a:2.5-inch:S6RTNE0R900057
>      hostnqn=nqn.2014-08.org.nvmexpress:uuid:41528538-e8ad-4eaf-84a7-9c552917d988
> \
>   +- ns 1
>   \
>    +- nvme0 pcie 052e:78:00.0 live optimized
>    +- nvme1 pcie 058e:78:00.0 live optimized
> 
> The head device `/dev/nvme1n1` will now have the following structure:
> 
> /sys/block/nvme1n1/multipath/
> ├── nvme1c0n1 -> ../../../../../pci052e:78/052e:78:00.0/nvme/nvme0/nvme1c0n1
> └── nvme1c1n1 -> ../../../../../pci058e:78/058e:78:00.0/nvme/nvme1/nvme1c1n1
> 
> This clearly shows that namespace 1 is accessible through both nvme1c0n1
> and nvme1c1n1. This new sysfs structure significantly simplifies multipath
> discovery and management, making it easier for tools and scripts to enumerate
> and manage NVMe multipath configurations. So leverage this functionality to
> update the path links for a shared NVMe namespace, simplifying path discovery
> and management.
> 
> This change adds a new struct nvme_ns_head to represent the head of a shared
> namespace. It contains a list head linking together struct nvme_path objects,
> where each path corresponds to a shared namespace instance. Additionally,
> struct nvme_ns has been updated to reference its associated nvme_ns_head,
> enabling straightforward traversal of all paths to a shared NVMe namespace.
> 
> Signed-off-by: Nilay Shroff <nilay at linux.ibm.com>
> ---
>   src/nvme/filters.c |   6 ++
>   src/nvme/filters.h |   9 +++
>   src/nvme/private.h |   9 ++-
>   src/nvme/tree.c    | 162 +++++++++++++++++++++++++++++++--------------
>   src/nvme/tree.h    |   9 +++
>   5 files changed, 143 insertions(+), 52 deletions(-)
> 
> diff --git a/src/nvme/filters.c b/src/nvme/filters.c
> index ceaba68f..4a8829db 100644
> --- a/src/nvme/filters.c
> +++ b/src/nvme/filters.c
> @@ -105,3 +105,9 @@ int nvme_scan_ctrl_namespaces(nvme_ctrl_t c, struct dirent ***ns)
>   	return scandir(nvme_ctrl_get_sysfs_dir(c), ns,
>   		       nvme_namespace_filter, alphasort);
>   }
> +
> +int nvme_scan_ns_head_paths(nvme_ns_head_t head, struct dirent ***paths)
> +{
> +	return scandir(nvme_ns_head_get_sysfs_dir(head), paths,
> +		       nvme_paths_filter, alphasort);
> +}
> diff --git a/src/nvme/filters.h b/src/nvme/filters.h
> index 4ceeffd5..9e9dbb95 100644
> --- a/src/nvme/filters.h
> +++ b/src/nvme/filters.h
> @@ -94,4 +94,13 @@ int nvme_scan_ctrl_namespace_paths(nvme_ctrl_t c, struct dirent ***paths);
>    */
>   int nvme_scan_ctrl_namespaces(nvme_ctrl_t c, struct dirent ***ns);
>   
> +/**
> + * nvme_scan_ns_head_paths() - Scan for namespace paths
> + * @head: Namespace head node to scan
> + * @paths : Pointer to array of dirents
> + *
> + * Return: number of entries in @ents
> + */
> +int nvme_scan_ns_head_paths(nvme_ns_head_t head, struct dirent ***paths);
> +
>   #endif /* _LIBNVME_FILTERS_H */
> diff --git a/src/nvme/private.h b/src/nvme/private.h
> index 33cdd555..f45c5823 100644
> --- a/src/nvme/private.h
> +++ b/src/nvme/private.h
> @@ -36,12 +36,19 @@ struct nvme_path {
>   	int grpid;
>   };
>   
> +struct nvme_ns_head {
> +	struct list_head paths;
> +	struct nvme_ns *n;
> +
> +	char *sysfs_dir;
> +};
> +
>   struct nvme_ns {
>   	struct list_node entry;
> -	struct list_head paths;
>   
>   	struct nvme_subsystem *s;
>   	struct nvme_ctrl *c;
> +	struct nvme_ns_head *head;
>   
>   	int fd;
>   	__u32 nsid;

Why isn't 'nvme_subsystem' moved to 'nvme_ns_head'?
That certainly is how the kernel structures are laid
out, and it would make sense to follow it here...

> diff --git a/src/nvme/tree.c b/src/nvme/tree.c
> index b0a4696f..bd7fb53e 100644
> --- a/src/nvme/tree.c
> +++ b/src/nvme/tree.c
> @@ -564,12 +564,12 @@ nvme_ns_t nvme_subsystem_next_ns(nvme_subsystem_t s, nvme_ns_t n)
>   
>   nvme_path_t nvme_namespace_first_path(nvme_ns_t ns)
>   {
> -	return list_top(&ns->paths, struct nvme_path, nentry);
> +	return list_top(&ns->head->paths, struct nvme_path, nentry);
>   }
>   
>   nvme_path_t nvme_namespace_next_path(nvme_ns_t ns, nvme_path_t p)
>   {
> -	return p ? list_next(&ns->paths, p, nentry) : NULL;
> +	return p ? list_next(&ns->head->paths, p, nentry) : NULL;
>   }
>   
>   static void __nvme_free_ns(struct nvme_ns *n)
> @@ -579,6 +579,8 @@ static void __nvme_free_ns(struct nvme_ns *n)
>   	free(n->generic_name);
>   	free(n->name);
>   	free(n->sysfs_dir);
> +	free(n->head->sysfs_dir);
> +	free(n->head);
>   	free(n);
>   }
>   
> @@ -916,25 +918,6 @@ void nvme_free_path(struct nvme_path *p)
>   	free(p);
>   }
>   
> -static void nvme_subsystem_set_path_ns(nvme_subsystem_t s, nvme_path_t p)
> -{
> -	char n_name[32] = { };
> -	int i, c, nsid, ret;
> -	nvme_ns_t n;
> -
> -	ret = sscanf(nvme_path_get_name(p), "nvme%dc%dn%d", &i, &c, &nsid);
> -	if (ret != 3)
> -		return;
> -
> -	sprintf(n_name, "nvme%dn%d", i, nsid);
> -	nvme_subsystem_for_each_ns(s, n) {
> -		if (!strcmp(n_name, nvme_ns_get_name(n))) {
> -			list_add_tail(&n->paths, &p->nentry);
> -			p->n = n;
> -		}
> -	}
> -}
> -
>   static int nvme_ctrl_scan_path(nvme_root_t r, struct nvme_ctrl *c, char *name)
>   {
>   	struct nvme_path *p;
> @@ -973,7 +956,6 @@ static int nvme_ctrl_scan_path(nvme_root_t r, struct nvme_ctrl *c, char *name)
>   	}
>   
>   	list_node_init(&p->nentry);
> -	nvme_subsystem_set_path_ns(c->s, p);
>   	list_node_init(&p->entry);
>   	list_add_tail(&c->paths, &p->entry);
>   	return 0;
> @@ -2250,8 +2232,8 @@ nvme_ctrl_t nvme_scan_ctrl(nvme_root_t r, const char *name)
>   		return NULL;
>   
>   	path = NULL;
> -	nvme_ctrl_scan_namespaces(r, c);
>   	nvme_ctrl_scan_paths(r, c);
> +	nvme_ctrl_scan_namespaces(r, c);
>   	return c;
>   }
>   
> @@ -2323,6 +2305,11 @@ const char *nvme_ns_get_sysfs_dir(nvme_ns_t n)
>   	return n->sysfs_dir;
>   }
>   
> +const char *nvme_ns_head_get_sysfs_dir(nvme_ns_head_t head)
> +{
> +	return head->sysfs_dir;
> +}
> +
>   const char *nvme_ns_get_name(nvme_ns_t n)
>   {
>   	return n->name;
> @@ -2749,7 +2736,11 @@ static void nvme_ns_set_generic_name(struct nvme_ns *n, const char *name)
>   
>   static nvme_ns_t nvme_ns_open(const char *sys_path, const char *name)
>   {
> +	int ret;
>   	struct nvme_ns *n;
> +	struct nvme_ns_head *head;
> +	struct stat arg;
> +	_cleanup_free_ char *path = NULL;
>   
>   	n = calloc(1, sizeof(*n));
>   	if (!n) {
> @@ -2757,6 +2748,32 @@ static nvme_ns_t nvme_ns_open(const char *sys_path, const char *name)
>   		return NULL;
>   	}
>   
> +	head = calloc(1, sizeof(*head));
> +	if (!head) {
> +		errno = ENOMEM;
> +		free(n);
> +		return NULL;
> +	}
> +
> +	head->n = n;
> +	list_head_init(&head->paths);
> +	ret = asprintf(&path, "%s/%s", sys_path, "multipath");
> +	if (ret < 0) {
> +		errno = ENOMEM;
> +		goto free_ns_head;
> +	}
> +	/*
> +	 * The sysfs-dir "multipath" is available only when nvme multipath
> +	 * is configured and we're running kernel version >= 6.14.
> +	 */
> +	ret = stat(path, &arg);
> +	if (ret == 0) {
> +		head->sysfs_dir = path;
> +		path = NULL;
> +	} else
> +		head->sysfs_dir = NULL;
> +
> +	n->head = head;
>   	n->fd = -1;
>   	n->name = strdup(name);
>   
> @@ -2765,15 +2782,17 @@ static nvme_ns_t nvme_ns_open(const char *sys_path, const char *name)
>   	if (nvme_ns_init(sys_path, n) != 0)
>   		goto free_ns;
>   
> -	list_head_init(&n->paths);
>   	list_node_init(&n->entry);
>   
>   	nvme_ns_release_fd(n); /* Do not leak fds */
> +
>   	return n;
>   
>   free_ns:
>   	free(n->generic_name);
>   	free(n->name);
> +free_ns_head:
> +	free(head);
>   	free(n);
>   	return NULL;
>   }
> @@ -2836,6 +2855,71 @@ nvme_ns_t nvme_scan_namespace(const char *name)
>   	return __nvme_scan_namespace(nvme_ns_sysfs_dir(), name);
>   }
>   
> +
> +static void nvme_ns_head_scan_path(nvme_subsystem_t s, nvme_ns_t n, char *name)
> +{
> +	nvme_ctrl_t c;
> +	nvme_path_t p;
> +
> +	nvme_subsystem_for_each_ctrl(s, c) {
> +		nvme_ctrl_for_each_path(c, p) {
> +			if (!strcmp(nvme_path_get_name(p), name)) {
> +				list_add_tail(&n->head->paths, &p->nentry);
> +				p->n = n;
> +				return;
> +			}
> +		}
> +	}
> +}
> +
> +static void nvme_subsystem_set_ns_path(nvme_subsystem_t s, nvme_ns_t n)
> +{
> +	struct nvme_ns_head *head = n->head;
> +
> +	if (nvme_ns_head_get_sysfs_dir(head)) {
> +		struct dirents paths = {};
> +		int i;
> +
> +		/*
> +		 * When multipath is configured on kernel version >= 6.14,
> +		 * we use multipath sysfs link to get each path of a namespace.
> +		 */
> +		paths.num = nvme_scan_ns_head_paths(head, &paths.ents);
> +
> +		for (i = 0; i < paths.num; i++)
> +			nvme_ns_head_scan_path(s, n, paths.ents[i]->d_name);
> +	} else {
> +		nvme_ctrl_t c;
> +		nvme_path_t p;
> +		int ns_ctrl, ns_nsid, ret;
> +
> +		/*
> +		 * If multipath is not configured or we're running on kernel
> +		 * version < 6.14, fallback to the old way.
> +		 */
> +		ret = sscanf(nvme_ns_get_name(n), "nvme%dn%d",
> +				&ns_ctrl, &ns_nsid);
> +		if (ret != 2)
> +			return;
> +
> +		nvme_subsystem_for_each_ctrl(s, c) {
> +			nvme_ctrl_for_each_path(c, p) {
> +				int p_subsys, p_ctrl, p_nsid;
> +
> +				ret = sscanf(nvme_path_get_name(p),
> +					     "nvme%dc%dn%d",
> +					     &p_subsys, &p_ctrl, &p_nsid);
> +				if (ret != 3)
> +					continue;
> +				if (ns_ctrl == p_subsys && ns_nsid == p_nsid) {
> +					list_add_tail(&head->paths, &p->nentry);
> +					p->n = n;
> +				}
> +			}
> +		}
> +	}

Can't you just use the existing nvme_subsystem_set_path_ns() function
here instead of deleting and open-code it?

> +}
> +
>   static int nvme_ctrl_scan_namespace(nvme_root_t r, struct nvme_ctrl *c,
>   				    char *name)
>   {
> @@ -2861,33 +2945,9 @@ static int nvme_ctrl_scan_namespace(nvme_root_t r, struct nvme_ctrl *c,
>   	n->s = c->s;
>   	n->c = c;
>   	list_add_tail(&c->namespaces, &n->entry);
> -	return 0;
> -}
> -
> -static void nvme_subsystem_set_ns_path(nvme_subsystem_t s, nvme_ns_t n)
> -{
> -	nvme_ctrl_t c;
> -	nvme_path_t p;
> -	int ns_ctrl, ns_nsid, ret;
> -
> -	ret = sscanf(nvme_ns_get_name(n), "nvme%dn%d", &ns_ctrl, &ns_nsid);
> -	if (ret != 2)
> -		return;
> +	nvme_subsystem_set_ns_path(c->s, n);
>   
> -	nvme_subsystem_for_each_ctrl(s, c) {
> -		nvme_ctrl_for_each_path(c, p) {
> -			int p_subsys, p_ctrl, p_nsid;
> -
> -			ret = sscanf(nvme_path_get_name(p), "nvme%dc%dn%d",
> -				     &p_subsys, &p_ctrl, &p_nsid);
> -			if (ret != 3)
> -				continue;
> -			if (ns_ctrl == p_subsys && ns_nsid == p_nsid) {
> -				list_add_tail(&n->paths, &p->nentry);
> -				p->n = n;
> -			}
> -		}
> -	}
> +	return 0;
>   }
>   
>   static int nvme_subsystem_scan_namespace(nvme_root_t r, nvme_subsystem_t s,

Similar here; better to use the existing functions.

> @@ -2917,7 +2977,7 @@ static int nvme_subsystem_scan_namespace(nvme_root_t r, nvme_subsystem_t s,
>   			list_del_init(&p->nentry);
>   			p->n = NULL;
>   		}
> -		list_head_init(&_n->paths);
> +		list_head_init(&_n->head->paths);
>   		__nvme_free_ns(_n);
>   	}
>   	n->s = s;
> diff --git a/src/nvme/tree.h b/src/nvme/tree.h
> index 25d4b31b..9f382e9c 100644
> --- a/src/nvme/tree.h
> +++ b/src/nvme/tree.h
> @@ -27,6 +27,7 @@
>    */
>   
>   typedef struct nvme_ns *nvme_ns_t;
> +typedef struct nvme_ns_head *nvme_ns_head_t;
>   typedef struct nvme_path *nvme_path_t;
>   typedef struct nvme_ctrl *nvme_ctrl_t;
>   typedef struct nvme_subsystem *nvme_subsystem_t;
> @@ -1091,6 +1092,14 @@ void nvme_ctrl_set_dhchap_host_key(nvme_ctrl_t c, const char *key);
>    */
>   const char *nvme_ctrl_get_dhchap_key(nvme_ctrl_t c);
>   
> +/**
> + * nvme_ns_head_get_sysfs_dir() - sysfs dir of namespave head
> + * @head: namespace head instance
> + *
> + * Returns: sysfs directory name of @head
> + */
> +const char *nvme_ns_head_get_sysfs_dir(nvme_ns_head_t head);
> +
>   /**
>    * nvme_ctrl_set_dhchap_key() - Set controller key
>    * @c:		Controller for which the key should be set

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare at suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich



More information about the Linux-nvme mailing list