[PATCH] Revert "nvme-multipath: fix hang when disk goes live over reconnect"

John Meneghini jmeneghi at redhat.com
Wed Mar 23 18:08:49 PDT 2022


Yes, please revert.

Reviewed-by: John Meneghini <jmeneghi at redhat.com>

On 3/23/22 20:06, Chaitanya Kulkarni wrote:
> This reverts commit d50c992edf10b95d2034097405c94fecfbe1ef7f which is
> causing following OOPs:
> <1>[    1.943642] BUG: kernel NULL pointer dereference, address: 0000000000000008
> <1>[    1.943645] #PF: supervisor read access in kernel mode
> <1>[    1.943646] #PF: error_code(0x0000) - not-present page
> <6>[    1.943648] PGD 0 P4D 0
> <4>[    1.943649] Oops: 0000 [#1] PREEMPT SMP NOPTI
> <4>[    1.943651] CPU: 0 PID: 7 Comm: kworker/u96:0 Not tainted 5.17.0-rc2nvme+ #58
> <4>[    1.943653] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
> <4>[    1.943654] Workqueue: nvme-reset-wq nvme_reset_work [nvme]
> <4>[    1.943662] RIP: 0010:nvme_parse_ana_log+0x1e/0x160 [nvme_core]
> 
> <4>[    1.943670] RSP: 0018:ffffc90000043e00 EFLAGS: 00010286
> <4>[    1.943672] RAX: 0000000000000000 RBX: ffff888103fd4210 RCX: 0000000000000000
> <4>[    1.943673] RDX: ffffffffc00b62c0 RSI: ffffc90000043e44 RDI: ffff888103fd4210
> <4>[    1.943673] RBP: ffff888103fd4210 R08: 0000000000000001 R09: ffff888100051828
> <4>[    1.943674] R10: 0000000000000000 R11: fffffffffff4a904 R12: 0000000000000000
> <4>[    1.943675] R13: ffff88817daa6500 R14: 0000000000000000 R15: ffff88817daa6505
> <4>[    1.943677] FS:  0000000000000000(0000) GS:ffff888fff200000(0000) knlGS:0000000000000000
> <4>[    1.943678] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> <4>[    1.943678] CR2: 0000000000000008 CR3: 00000001788a4000 CR4: 0000000000350ef0
> <4>[    1.943680] Call Trace:
> <4>[    1.943683]  <TASK>
> <4>[    1.943684]  ? nvme_update_ns_ana_state+0x40/0x40 [nvme_core]
> <4>[    1.943690]  nvme_mpath_update+0x4a/0x70 [nvme_core]
> <4>[    1.943695]  nvme_start_ctrl+0x110/0x140 [nvme_core]
> <4>[    1.943700]  process_one_work+0x1af/0x380
> <4>[    1.943709]  worker_thread+0x50/0x3a0
> <4>[    1.943711]  ? rescuer_thread+0x370/0x370
> <4>[    1.943712]  kthread+0xe7/0x110
> <4>[    1.943714]  ? kthread_complete_and_exit+0x20/0x20
> <4>[    1.943716]  ret_from_fork+0x22/0x30
> <4>[    1.943719]  </TASK>
> 
> [0]kdb>
> 
> With this revert not testing can proceed forward.
> 
> Signed-off-by: Chaitanya Kulkarni <kch at nvidia.com>
> ---
>   drivers/nvme/host/core.c      |  1 -
>   drivers/nvme/host/multipath.c | 23 ++---------------------
>   drivers/nvme/host/nvme.h      |  4 ----
>   3 files changed, 2 insertions(+), 26 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 8cb1197aac42..ccc5877d514b 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -4511,7 +4511,6 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl)
>   	if (ctrl->queue_count > 1) {
>   		nvme_queue_scan(ctrl);
>   		nvme_start_queues(ctrl);
> -		nvme_mpath_update(ctrl);
>   	}
>   
>   	nvme_change_uevent(ctrl, "NVME_EVENT=connected");
> diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> index 12d4afde3662..c97d7f843977 100644
> --- a/drivers/nvme/host/multipath.c
> +++ b/drivers/nvme/host/multipath.c
> @@ -612,18 +612,8 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
>   	ns->ana_grpid = le32_to_cpu(desc->grpid);
>   	ns->ana_state = desc->state;
>   	clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
> -	/*
> -	 * nvme_mpath_set_live() will trigger I/O to the mpath
> -	 * device node and in turn to this path device, however we
> -	 * cannot accept this I/O if the ctrl is not live.
> -	 * This may deadlock if called from the nvme_mpath_init_identify()
> -	 * and the ctrl will never complete initialization,
> -	 * preventing I/O from completing.
> -	 * For this case we will reprocess the ANA log page
> -	 * in nvme_mpath_update() once the ctrl ready.
> -	 */
> -	if (nvme_state_is_live(ns->ana_state) &&
> -	    ns->ctrl->state == NVME_CTRL_LIVE)
> +
> +	if (nvme_state_is_live(ns->ana_state))
>   		nvme_mpath_set_live(ns);
>   }
>   
> @@ -710,15 +700,6 @@ static void nvme_ana_work(struct work_struct *work)
>   	nvme_read_ana_log(ctrl);
>   }
>   
> -void nvme_mpath_update(struct nvme_ctrl *ctrl)
> -{
> -	u32 nr_change_groups = 0;
> -
> -	mutex_lock(&ctrl->ana_lock);
> -	nvme_parse_ana_log(ctrl, &nr_change_groups, nvme_update_ana_state);
> -	mutex_unlock(&ctrl->ana_lock);
> -}
> -
>   static void nvme_anatt_timeout(struct timer_list *t)
>   {
>   	struct nvme_ctrl *ctrl = from_timer(ctrl, t, anatt_timer);
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 76f7a5f37379..1ea908d43e17 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -781,7 +781,6 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
>   void nvme_mpath_remove_disk(struct nvme_ns_head *head);
>   int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
>   void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl);
> -void nvme_mpath_update(struct nvme_ctrl *ctrl);
>   void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
>   void nvme_mpath_stop(struct nvme_ctrl *ctrl);
>   bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
> @@ -853,9 +852,6 @@ static inline int nvme_mpath_init_identify(struct nvme_ctrl *ctrl,
>   "Please enable CONFIG_NVME_MULTIPATH for full support of multi-port devices.\n");
>   	return 0;
>   }
> -void nvme_mpath_update(struct nvme_ctrl *ctrl)
> -{
> -}
>   static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
>   {
>   }




More information about the Linux-nvme mailing list