[PATCH RFC] nvme: fix crash due to invalid cdev teardown

Maurizio Lombardi mlombard at arkamax.eu
Thu Jun 4 06:51:23 PDT 2026


I marked it as RFC because I am not sure if not creating
the cdevs is an acceptable solution.

Thoughts?

Maurizio

On Thu Jun 4, 2026 at 3:43 PM CEST, Maurizio Lombardi wrote:
> In the NVMe multipath code, if nvme_add_ns_head_cdev() fails during
> nvme_mpath_set_live(), the error is completely ignored. However, during
> teardown, nvme_remove_head() unconditionally calls nvme_cdev_del().
> This teardown asymmetry leads to a kernel panic if the character device
> was never successfully initialized, which might happen in very rare
> circumstances.
>
> BUG: kernel NULL pointer dereference, address: 00000000000000d0
>  device_del+0x39/0x3c0
>  cdev_device_del+0x15/0x50
>  nvme_cdev_del+0xe/0x20 [nvme_core]
>  nvme_mpath_shutdown_disk+0x38/0x60 [nvme_core]
>  nvme_ns_remove+0x177/0x1f0 [nvme_core]
>  nvme_remove_namespaces+0xdc/0x130 [nvme_core]
>  nvme_do_delete_ctrl+0x71/0xd0 [nvme_core]
>
> Regarding head namespaces, introduce the NVME_NSHEAD_CDEV_LIVE bit to
> track the successful creation of the head character device.
> nvme_remove_head() now checks this bit before attempting deletion.
> Enforcing rollback here is unsafe, as it triggers kobject_init panics
> on the embedded device struct during asynchronous ANA retries.
>
> Regarding non-head namespaces, enforce strict failure by capturing the
> return value of nvme_add_ns_cdev() and rolling back the namespace's
> creation. Because nvme_alloc_ns() completely frees the nvme_ns structure
> on error, rolling back is safe here.
>
> Signed-off-by: Maurizio Lombardi <mlombard at redhat.com>
> ---
>  drivers/nvme/host/core.c      | 5 ++++-
>  drivers/nvme/host/multipath.c | 5 ++++-
>  drivers/nvme/host/nvme.h      | 1 +
>  3 files changed, 9 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 72c50d5e938d..1e24232c0aac 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -4213,13 +4213,16 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
>  		goto out_cleanup_ns_from_list;
>  
>  	if (!nvme_ns_head_multipath(ns->head))
> -		nvme_add_ns_cdev(ns);
> +		if (nvme_add_ns_cdev(ns))
> +			goto out_del_disk;
>  
>  	nvme_mpath_add_disk(ns, info->anagrpid);
>  	nvme_fault_inject_init(&ns->fault_inject, ns->disk->disk_name);
>  
>  	return;
>  
> + out_del_disk:
> +	del_gendisk(ns->disk);
>   out_cleanup_ns_from_list:
>  	nvme_put_ctrl(ctrl);
>  	mutex_lock(&ctrl->namespaces_lock);
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index 263161cb8ac0..a786eb18735a 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -628,6 +628,8 @@ static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
>  		return ret;
>  	ret = nvme_cdev_add(&head->cdev, &head->cdev_device,
>  			    &nvme_ns_head_chr_fops, THIS_MODULE);
> +	if (!ret)
> +		set_bit(NVME_NSHEAD_CDEV_LIVE, &head->flags);
>  	return ret;
>  }
>  
> @@ -672,7 +674,8 @@ static void nvme_remove_head(struct nvme_ns_head *head)
>  		 */
>  		kblockd_schedule_work(&head->requeue_work);
>  
> -		nvme_cdev_del(&head->cdev, &head->cdev_device);
> +		if (test_and_clear_bit(NVME_NSHEAD_CDEV_LIVE, &head->flags))
> +			nvme_cdev_del(&head->cdev, &head->cdev_device);
>  		synchronize_srcu(&head->srcu);
>  		del_gendisk(head->disk);
>  	}
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 9ccaed0b9dbf..e8833e56e9fd 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -567,6 +567,7 @@ struct nvme_ns_head {
>  	unsigned int		delayed_removal_secs;
>  #define NVME_NSHEAD_DISK_LIVE		0
>  #define NVME_NSHEAD_QUEUE_IF_NO_PATH	1
> +#define NVME_NSHEAD_CDEV_LIVE		2
>  	struct nvme_ns __rcu	*current_path[];
>  #endif
>  };




More information about the Linux-nvme mailing list