[PATCH] nvme: introduce panic_on_double_cqe param

Guixin Liu kanie at linux.alibaba.com
Wed Oct 22 06:54:54 PDT 2025


Add a new debug switch to control whether to trigger a kernel crash
when duplicate CQEs are detected, in order to preserve the kernel
context, such as sq, cq, and so on, for subsequent debugging and
analysis.

Signed-off-by: Guixin Liu <kanie at linux.alibaba.com>
---
 drivers/nvme/host/core.c | 5 +++++
 drivers/nvme/host/nvme.h | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index fa4181d7de73..7a3f9129a39c 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -95,6 +95,11 @@ module_param(apst_secondary_latency_tol_us, ulong, 0644);
 MODULE_PARM_DESC(apst_secondary_latency_tol_us,
 	"secondary APST latency tolerance in us");
 
+bool panic_on_double_cqe;
+EXPORT_SYMBOL_GPL(panic_on_double_cqe);
+module_param(panic_on_double_cqe, bool, 0644);
+MODULE_PARM_DESC(panic_on_double_cqe, "crash the kernel to save the scene");
+
 /*
  * Older kernels didn't enable protection information if it was at an offset.
  * Newer kernels do, so it breaks reads on the upgrade if such formats were
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 102fae6a231c..24010d5d15ce 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -595,6 +595,8 @@ static inline u16 nvme_cid(struct request *rq)
 	return nvme_cid_install_genctr(nvme_req(rq)->genctr) | rq->tag;
 }
 
+extern bool panic_on_double_cqe;
+
 static inline struct request *nvme_find_rq(struct blk_mq_tags *tags,
 		u16 command_id)
 {
@@ -612,6 +614,7 @@ static inline struct request *nvme_find_rq(struct blk_mq_tags *tags,
 		dev_err(nvme_req(rq)->ctrl->device,
 			"request %#x genctr mismatch (got %#x expected %#x)\n",
 			tag, genctr, nvme_genctr_mask(nvme_req(rq)->genctr));
+		BUG_ON(panic_on_double_cqe);
 		return NULL;
 	}
 	return rq;
-- 
2.43.0




More information about the Linux-nvme mailing list