bad unlock balance WARNING at nvme/045

Wed Oct 26 05:01:51 PDT 2022

On Oct 26, 2022 / 08:42, Hannes Reinecke wrote:
> On 10/18/22 12:57, Sagi Grimberg wrote:
> > 
> > > Hello Hannes,
> > > 
> > > I observed "WARNING: bad unlock balance detected!" at nvme/045 [1].
> > > As the Call
> > > Trace shows, nvme_auth_reset() has unbalanced mutex lock/unlock.
> > > 
> > >     mutex_lock(&ctrl->dhchap_auth_mutex);
> > >     list_for_each_entry(chap, &ctrl->dhchap_auth_list, entry) {
> > >         mutex_unlock(&ctrl->dhchap_auth_mutex);
> > >         flush_work(&chap->auth_work);
> > >         __nvme_auth_reset(chap);
> > >     }
> > >     mutex_unlock(&ctrl->dhchap_auth_mutex);
> > > 
> > > I tried to remove the mutex_unlock in the list iteration with a
> > > patch [2], but
> > > it resulted in another "WARNING: possible recursive locking
> > > detected" [3]. I'm
> > > not sure but cause of this WARN could be __nvme_auth_work and
> > > nvme_dhchap_auth_work in same nvme_wq.
> > > 
> > > Could you take a look for fix?
> > 
> > I'm looking at the code and I think that the way the concurrent
> > negotiations and how dhchap_auth_mutex is handled is very fragile,
> > also why should the per-queue auth_work hold the controller-wide
> > dhchap_auth_mutex? The only reason I see is because nvme_auth_negotiate
> > is checking if the chap context is already queued? Why should we
> > allow that?
> > 
> Well; that's partially due to the internal design of linux-nvme.
> The controller structure itself doesn't have 'queues' per se; there just is
> a general 'ctrl' structure. So while I would have loved to have a per-queue
> structure to hook the chap authentication into, all I have is the controller
> structure.
> Hence we have a controller-wide list holding all 'chap' structures for the
> individual queues.
> Hence the controller-wide mutex to gate list modifications.
> 
> > I'd suggest to splice dhchap_auth_list, to a local list and then just
> > flush nvmet_wq in teardown flows. Same for renegotiations/reset flows.
> > And we should prevent for the double-queuing of chap negotiations to
> > begin with, instead of handling them (I still don't understand why this
> > is permitted, but perhaps just return EBUSY in this case?)
> 
> We don't double queue; we're re-using the existing entries.

Hannes, thanks for the explanations.

> 
> Can you check if this fix works?
> 
> diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
> index c8a6db7c4498..4e824aab30eb 100644
> --- a/drivers/nvme/host/auth.c
> +++ b/drivers/nvme/host/auth.c
> @@ -926,7 +926,6 @@ void nvme_auth_reset(struct nvme_ctrl *ctrl)
> 
>         mutex_lock(&ctrl->dhchap_auth_mutex);
>         list_for_each_entry(chap, &ctrl->dhchap_auth_list, entry) {
> -               mutex_unlock(&ctrl->dhchap_auth_mutex);
>                 flush_work(&chap->auth_work);
>                 __nvme_auth_reset(chap);
>         }

I confirmed this hunk avoids the "WARNING: bad unlock balance detected!". As far
as I ran blktests with this change, I observe no failure in other test cases.

However, I observed another new WARN at nvme/045: "WARNING: possible recursive
locking detected". I think it was caused by nvme_dhchap_auth_work in nvme_wq
tried to flush another work __nvme_auth_work in the same workqueue. I created a
patch below which creates another workqueue nvme_auth_wq for __nvme_auth_work.
Do you think this fix approach is acceptable?

diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
index 4e824aab30eb..946085070223 100644
--- a/drivers/nvme/host/auth.c
+++ b/drivers/nvme/host/auth.c
@@ -42,6 +42,8 @@ struct nvme_dhchap_queue_context {
 	int sess_key_len;
 };
 
+struct workqueue_struct *nvme_auth_wq;
+
 #define nvme_auth_flags_from_qid(qid) \
 	(qid == 0) ? 0 : BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_RESERVED
 #define nvme_auth_queue_from_qid(ctrl, qid) \
@@ -869,7 +871,7 @@ int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid)
 			mutex_unlock(&ctrl->dhchap_auth_mutex);
 			flush_work(&chap->auth_work);
 			__nvme_auth_reset(chap);
-			queue_work(nvme_wq, &chap->auth_work);
+			queue_work(nvme_auth_wq, &chap->auth_work);
 			return 0;
 		}
 	}
@@ -896,7 +898,7 @@ int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid)
 	INIT_WORK(&chap->auth_work, __nvme_auth_work);
 	list_add(&chap->entry, &ctrl->dhchap_auth_list);
 	mutex_unlock(&ctrl->dhchap_auth_mutex);
-	queue_work(nvme_wq, &chap->auth_work);
+	queue_work(nvme_auth_wq, &chap->auth_work);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nvme_auth_negotiate);
@@ -969,6 +971,21 @@ static void nvme_dhchap_auth_work(struct work_struct *work)
 	 */
 }
 
+int nvme_auth_init(void)
+{
+	nvme_auth_wq = alloc_workqueue("nvme-auth-wq",
+				       WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+	if (!nvme_auth_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void nvme_auth_exit(void)
+{
+	destroy_workqueue(nvme_auth_wq);
+}
+
 void nvme_auth_init_ctrl(struct nvme_ctrl *ctrl)
 {
 	INIT_LIST_HEAD(&ctrl->dhchap_auth_list);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 059737c1a2c1..aa06c686ad29 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -5341,8 +5341,14 @@ static int __init nvme_core_init(void)
 		goto unregister_generic_ns;
 	}
 
+	result = nvme_auth_init();
+	if (result)
+		goto exit_nvme_auth;
+
 	return 0;
 
+exit_nvme_auth:
+	nvme_auth_exit();
 unregister_generic_ns:
 	unregister_chrdev_region(nvme_ns_chr_devt, NVME_MINORS);
 destroy_subsys_class:
@@ -5363,6 +5369,7 @@ static int __init nvme_core_init(void)
 
 static void __exit nvme_core_exit(void)
 {
+	nvme_auth_exit();
 	class_destroy(nvme_ns_chr_class);
 	class_destroy(nvme_subsys_class);
 	class_destroy(nvme_class);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a29877217ee6..472ab2d14a67 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -1019,6 +1019,8 @@ static inline bool nvme_ctrl_sgl_supported(struct nvme_ctrl *ctrl)
 }
 
 #ifdef CONFIG_NVME_AUTH
+int nvme_auth_init(void);
+void nvme_auth_exit(void);
 void nvme_auth_init_ctrl(struct nvme_ctrl *ctrl);
 void nvme_auth_stop(struct nvme_ctrl *ctrl);
 int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid);
@@ -1026,6 +1028,8 @@ int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid);
 void nvme_auth_reset(struct nvme_ctrl *ctrl);
 void nvme_auth_free(struct nvme_ctrl *ctrl);
 #else
+static inline int nvme_auth_init(void) {};
+static inline void nvme_auth_exit(void) {};
 static inline void nvme_auth_init_ctrl(struct nvme_ctrl *ctrl) {};
 static inline void nvme_auth_stop(struct nvme_ctrl *ctrl) {};
 static inline int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid)


-- 
Shin'ichiro Kawasaki