[PATCH] nvme: introduce panic_on_double_cqe param

Tue Oct 28 22:03:00 PDT 2025

On 10/28/25 21:17, Chaitanya Kulkarni wrote:
> On 10/28/25 18:42, Guixin Liu wrote:
>>
>>
>> 在 2025/10/23 13:14, Chaitanya Kulkarni 写道:
>>> On 10/22/25 6:54 AM, Guixin Liu wrote:
>>>> Add a new debug switch to control whether to trigger a kernel crash
>>>> when duplicate CQEs are detected, in order to preserve the kernel
>>>> context, such as sq, cq, and so on, for subsequent debugging and
>>>> analysis.
>>>>
>>>> Signed-off-by: Guixin Liu <kanie at linux.alibaba.com>
>>>> ---
>>>>    drivers/nvme/host/core.c | 5 +++++
>>>>    drivers/nvme/host/nvme.h | 3 +++
>>>>    2 files changed, 8 insertions(+)
>>>>
>>>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>>>> index fa4181d7de73..7a3f9129a39c 100644
>>>> --- a/drivers/nvme/host/core.c
>>>> +++ b/drivers/nvme/host/core.c
>>>> @@ -95,6 +95,11 @@ module_param(apst_secondary_latency_tol_us, 
>>>> ulong, 0644);
>>>>    MODULE_PARM_DESC(apst_secondary_latency_tol_us,
>>>>        "secondary APST latency tolerance in us");
>>>>    +bool panic_on_double_cqe;
>>>> +EXPORT_SYMBOL_GPL(panic_on_double_cqe);
>>>> +module_param(panic_on_double_cqe, bool, 0644);
>>>> +MODULE_PARM_DESC(panic_on_double_cqe, "crash the kernel to save 
>>>> the scene");
>>>> +
>>>>    /*
>>>>     * Older kernels didn't enable protection information if it was 
>>>> at an offset.
>>>>     * Newer kernels do, so it breaks reads on the upgrade if such 
>>>> formats were
>>>> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
>>>> index 102fae6a231c..24010d5d15ce 100644
>>>> --- a/drivers/nvme/host/nvme.h
>>>> +++ b/drivers/nvme/host/nvme.h
>>>> @@ -595,6 +595,8 @@ static inline u16 nvme_cid(struct request *rq)
>>>>        return nvme_cid_install_genctr(nvme_req(rq)->genctr) | rq->tag;
>>>>    }
>>>>    +extern bool panic_on_double_cqe;
>>>> +
>>>>    static inline struct request *nvme_find_rq(struct blk_mq_tags 
>>>> *tags,
>>>>            u16 command_id)
>>>>    {
>>>> @@ -612,6 +614,7 @@ static inline struct request 
>>>> *nvme_find_rq(struct blk_mq_tags *tags,
>>>>            dev_err(nvme_req(rq)->ctrl->device,
>>>>                "request %#x genctr mismatch (got %#x expected %#x)\n",
>>>>                tag, genctr, nvme_genctr_mask(nvme_req(rq)->genctr));
>>>> +        BUG_ON(panic_on_double_cqe);
>>>>            return NULL;
>>>>        }
>>>>        return rq;
>>>
>>> I'm really not sure this is a good idea, I'll leave to others.
>>>
>>>
>>> -ck
>> Yeah, I think so too, and I'd also like to find a more elegant solution.
>>
>> Best Regards,
>> Guixin Liu
>>
>
> What about logging the necessary information and still continuing the 
> setup ?
> When you are debugging you can always setup a breakpoint there and get 
> the system state in the breakpoint, something like following totally 
> untested :-
>
> Usage:
> # Enable at runtime
> echo 'module nvme +p' > /sys/kernel/debug/dynamic_debug/control
>
> # Or at module load
> modprobe nvme dyndbg='+p'
>

something is wrong with my mail trying it again :-

  drivers/nvme/host/pci.c | 48 ++++++++++++++++++++++++++++++++++++++---
  1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index c916176bd9f0..a9f9c61d3fc9 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1294,6 +1294,50 @@ static inline struct blk_mq_tags 
*nvme_queue_tagset(struct nvme_queue *nvmeq)
      return nvmeq->dev->tagset.tags[nvmeq->qid - 1];
  }

+/**
+ * nvme_handle_duplicate_cqe - Handle duplicate CQE detection
+ * @nvmeq: The queue where duplicate was detected
+ * @cqe: The completion queue entry
+ * @command_id: The command ID from the CQE
+ *
+ * Logs detailed information about the duplicate CQE including CQE details
+ * and full queue state to aid in debugging hardware or firmware issues.
+ */
+static void nvme_handle_duplicate_cqe(struct nvme_queue *nvmeq,
+                      struct nvme_completion *cqe,
+                      u16 command_id)
+{
+    struct nvme_dev *dev = nvmeq->dev;
+
+    dev_dbg(dev->ctrl.device,
+        "Duplicate/Invalid CQE detected:\n"
+        "  Queue: %d (SQ ID: %u)\n"
+        "  Command ID: %u\n"
+        "  Status: %#x\n"
+        "  Result: %#llx\n"
+        "Queue state:\n"
+        "  Queue depth: %u\n"
+        "  SQ tail: %u, last_sq_tail: %u\n"
+        "  CQ head: %u, phase: %u\n"
+        "  CQ vector: %u\n"
+        "  SQ DMA addr: %pad\n"
+        "  CQ DMA addr: %pad\n"
+        "  Flags: %#lx (enabled=%d, sq_cmb=%d, polled=%d)\n",
+        nvmeq->qid, le16_to_cpu(cqe->sq_id),
+        command_id, le16_to_cpu(cqe->status),
+        le64_to_cpu(cqe->result.u64),
+        nvmeq->q_depth,
+        nvmeq->sq_tail, nvmeq->last_sq_tail,
+        nvmeq->cq_head, nvmeq->cq_phase,
+        nvmeq->cq_vector,
+        &nvmeq->sq_dma_addr,
+        &nvmeq->cq_dma_addr,
+        nvmeq->flags,
+        test_bit(NVMEQ_ENABLED, &nvmeq->flags),
+        test_bit(NVMEQ_SQ_CMB, &nvmeq->flags),
+        test_bit(NVMEQ_POLLED, &nvmeq->flags));
+}
+
  static inline void nvme_handle_cqe(struct nvme_queue *nvmeq,
                     struct io_comp_batch *iob, u16 idx)
  {
@@ -1315,9 +1359,7 @@ static inline void nvme_handle_cqe(struct 
nvme_queue *nvmeq,

      req = nvme_find_rq(nvme_queue_tagset(nvmeq), command_id);
      if (unlikely(!req)) {
-        dev_warn(nvmeq->dev->ctrl.device,
-            "invalid id %d completed on queue %d\n",
-            command_id, le16_to_cpu(cqe->sq_id));
+        nvme_handle_duplicate_cqe(nvmeq, cqe, command_id);
          return;
      }

-ck