[PATCH 3/4] nvmet: fix hang in nvmet_ns_disable()

Sagi Grimberg sagi at grimberg.me
Tue Jan 3 02:58:09 PST 2023



On 1/3/23 12:03, Taehee Yoo wrote:
> nvme target namespace is enabled or disabled by nvmet_ns_enable() or
> nvmet_ns_disable().
> The subsys->lock is used to disallow to use namespace data while
> nvmet_ns_enable() or nvmet_ns_disable() are working.
> The ns->enabled boolean variable prevents using namespace data in wrong
> state such as uninitialized state.
> 
> nvmet_ns_disable() acquires ns->lock and set ns->enabled false.
> Then, it releases ns->lock for a while to wait ns->disable_done completion.
> At this point, nvmet_ns_enable() can be worked concurrently and it calls
> percpu_ref_init().
> So, ns->disable_done will never be completed.
> Therefore hang would occur at this point.
> 
>     CPU0                                     CPU1
>     nvmet_ns_disable();
>     mutex_lock(&subsys->lock);               nvmet_ns_enable();
>                                              mutex_lock(&subsys->lock);
>     ns->enabled = false;
>     mutex_unlock(&subsys->lock);
>                                              percpu_ref_init();
>     wait_for_completion(&ns->disable_done);  <-- infinite wait
> 
>     mutex_lock(&subsys->lock);
>     mutex_unlock(&subsys->lock);
> 
> INFO: task bash:926 blocked for more than 30 seconds.
>        Tainted: G        W          6.1.0+ #17
> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> task:bash            state:D stack:27200 pid:926   ppid:911
> flags:0x00004000
> Call Trace:
>   <TASK>
>   __schedule+0xafc/0x2930
>   ? io_schedule_timeout+0x160/0x160
>   ? _raw_spin_unlock_irq+0x24/0x50
>   ? __wait_for_common+0x39b/0x5c0
>   ? usleep_range_state+0x190/0x190
>   schedule+0x130/0x230
>   schedule_timeout+0x18a/0x240
>   ? usleep_range_state+0x190/0x190
>   ? rcu_read_lock_sched_held+0x12/0x80
>   ? lock_downgrade+0x700/0x700
>   ? do_raw_spin_trylock+0xb5/0x180
>   ? lock_contended+0xdf0/0xdf0
>   ? _raw_spin_unlock_irq+0x24/0x50
>   ? trace_hardirqs_on+0x3c/0x190
>   __wait_for_common+0x1ca/0x5c0
>   ? usleep_range_state+0x190/0x190
>   ? bit_wait_io+0xf0/0xf0
>   ? _raw_spin_unlock_irqrestore+0x59/0x70
>   nvmet_ns_disable+0x288/0x490
>   ? nvmet_ns_enable+0x970/0x970
>   ? lockdep_hardirqs_on_prepare+0x410/0x410
>   ? rcu_read_lock_sched_held+0x12/0x80
>   ? configfs_write_iter+0x1df/0x480
>   ? nvmet_ns_revalidate_size_store+0x220/0x220
>   nvmet_ns_enable_store+0x85/0xe0
> [ ... ]
> 
> Fixes: a07b4970f464 ("nvmet: add a generic NVMe target")
> Signed-off-by: Taehee Yoo <ap420073 at gmail.com>
> ---
>   drivers/nvme/target/configfs.c | 14 +++++++-------
>   drivers/nvme/target/core.c     | 10 ++++++----
>   drivers/nvme/target/nvmet.h    |  8 +++++++-
>   3 files changed, 20 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
> index 907143870da5..d878c4231d65 100644
> --- a/drivers/nvme/target/configfs.c
> +++ b/drivers/nvme/target/configfs.c
> @@ -348,7 +348,7 @@ static ssize_t nvmet_ns_device_path_store(struct config_item *item,
>   
>   	mutex_lock(&subsys->lock);
>   	ret = -EBUSY;
> -	if (ns->enabled)
> +	if (ns->state != NVMET_NS_DISABLED)
>   		goto out_unlock;
>   
>   	ret = -EINVAL;
> @@ -390,7 +390,7 @@ static ssize_t nvmet_ns_p2pmem_store(struct config_item *item,
>   	int error;
>   
>   	mutex_lock(&ns->subsys->lock);
> -	if (ns->enabled) {
> +	if (ns->state != NVMET_NS_DISABLED) {
>   		ret = -EBUSY;
>   		goto out_unlock;
>   	}
> @@ -427,7 +427,7 @@ static ssize_t nvmet_ns_device_uuid_store(struct config_item *item,
>   	int ret = 0;
>   
>   	mutex_lock(&subsys->lock);
> -	if (ns->enabled) {
> +	if (ns->state != NVMET_NS_DISABLED) {
>   		ret = -EBUSY;
>   		goto out_unlock;
>   	}
> @@ -458,7 +458,7 @@ static ssize_t nvmet_ns_device_nguid_store(struct config_item *item,
>   	int ret = 0;
>   
>   	mutex_lock(&subsys->lock);
> -	if (ns->enabled) {
> +	if (ns->state != NVMET_NS_DISABLED) {
>   		ret = -EBUSY;
>   		goto out_unlock;
>   	}
> @@ -523,7 +523,7 @@ CONFIGFS_ATTR(nvmet_ns_, ana_grpid);
>   
>   static ssize_t nvmet_ns_enable_show(struct config_item *item, char *page)
>   {
> -	return sprintf(page, "%d\n", to_nvmet_ns(item)->enabled);
> +	return sprintf(page, "%d\n", !!to_nvmet_ns(item)->state);
>   }
>   
>   static ssize_t nvmet_ns_enable_store(struct config_item *item,
> @@ -561,7 +561,7 @@ static ssize_t nvmet_ns_buffered_io_store(struct config_item *item,
>   		return -EINVAL;
>   
>   	mutex_lock(&ns->subsys->lock);
> -	if (ns->enabled) {
> +	if (ns->state != NVMET_NS_DISABLED) {
>   		pr_err("disable ns before setting buffered_io value.\n");
>   		mutex_unlock(&ns->subsys->lock);
>   		return -EINVAL;
> @@ -587,7 +587,7 @@ static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item,
>   		return -EINVAL;
>   
>   	mutex_lock(&ns->subsys->lock);
> -	if (!ns->enabled) {
> +	if (ns->state != NVMET_NS_ENABLED) {
>   		pr_err("enable ns before revalidate.\n");
>   		mutex_unlock(&ns->subsys->lock);
>   		return -EINVAL;
> diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
> index f66ed13d7c11..58a91fb9c2f7 100644
> --- a/drivers/nvme/target/core.c
> +++ b/drivers/nvme/target/core.c
> @@ -563,7 +563,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
>   		goto out_unlock;
>   	}
>   
> -	if (ns->enabled)
> +	if (ns->state != NVMET_NS_DISABLED)
>   		goto out_unlock;
>   
>   	ret = -EMFILE;
> @@ -598,7 +598,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
>   	subsys->nr_namespaces++;
>   
>   	nvmet_ns_changed(subsys, ns->nsid);
> -	ns->enabled = true;
> +	ns->state = NVMET_NS_ENABLED;
>   	ret = 0;
>   out_unlock:
>   	mutex_unlock(&subsys->lock);
> @@ -621,10 +621,10 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
>   	struct nvmet_ctrl *ctrl;
>   
>   	mutex_lock(&subsys->lock);
> -	if (!ns->enabled)
> +	if (ns->state != NVMET_NS_ENABLED)
>   		goto out_unlock;
>   
> -	ns->enabled = false;
> +	ns->state = NVMET_NS_DISABLING;
>   	xa_erase(&ns->subsys->namespaces, ns->nsid);
>   	if (ns->nsid == subsys->max_nsid)
>   		subsys->max_nsid = nvmet_max_nsid(subsys);
> @@ -652,6 +652,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
>   	subsys->nr_namespaces--;
>   	nvmet_ns_changed(subsys, ns->nsid);
>   	nvmet_ns_dev_disable(ns);
> +	ns->state = NVMET_NS_DISABLED;
>   out_unlock:
>   	mutex_unlock(&subsys->lock);
>   }
> @@ -689,6 +690,7 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
>   	uuid_gen(&ns->uuid);
>   	ns->buffered_io = false;
>   	ns->csi = NVME_CSI_NVM;
> +	ns->state = NVMET_NS_DISABLED;
>   
>   	return ns;
>   }
> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
> index 89bedfcd974c..e609787577c6 100644
> --- a/drivers/nvme/target/nvmet.h
> +++ b/drivers/nvme/target/nvmet.h
> @@ -56,6 +56,12 @@
>   #define IPO_IATTR_CONNECT_SQE(x)	\
>   	(cpu_to_le32(offsetof(struct nvmf_connect_command, x)))
>   
> +enum nvmet_ns_state {
> +	NVMET_NS_ENABLED,
> +	NVMET_NS_DISABLING,
> +	NVMET_NS_DISABLED
> +};
> +
>   struct nvmet_ns {
>   	struct percpu_ref	ref;
>   	struct block_device	*bdev;
> @@ -69,7 +75,7 @@ struct nvmet_ns {
>   	u32			anagrpid;
>   
>   	bool			buffered_io;
> -	bool			enabled;
> +	enum nvmet_ns_state	state;
>   	struct nvmet_subsys	*subsys;
>   	const char		*device_path;
>   

This looks reasonable to me...
Reviewed-by: Sagi Grimberg <sagi at grimberg.me>



More information about the Linux-nvme mailing list