[PATCH] nvme-pci: add NVMe controller statistics

Tokunori Ikegami ikegami.t at gmail.com
Wed May 28 10:02:56 PDT 2025


This is to count the controller warning events.

Signed-off-by: Tokunori Ikegami <ikegami.t at gmail.com>
---
 drivers/nvme/host/nvme.h |  9 +++++++++
 drivers/nvme/host/pci.c  | 20 ++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 1de1b843afa5..aa28bea48783 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -277,6 +277,13 @@ enum nvme_ctrl_flags {
 	NVME_CTRL_FROZEN		= 6,
 };
 
+struct nvme_stats {
+	unsigned long timeouts;
+	unsigned long aborts;
+	unsigned long resets;
+	unsigned long disables;
+};
+
 struct nvme_ctrl {
 	bool comp_seen;
 	bool identified;
@@ -410,6 +417,8 @@ struct nvme_ctrl {
 
 	enum nvme_ctrl_type cntrltype;
 	enum nvme_dctype dctype;
+
+	struct nvme_stats stats;
 };
 
 static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 94ed13903b1b..5ce26408e426 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1465,6 +1465,7 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
 		dev_warn(dev->ctrl.device,
 			 "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
 			 csts, result);
+	dev->ctrl.stats.resets++;
 
 	if (csts != ~0)
 		return;
@@ -1526,6 +1527,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
 		dev_warn(dev->ctrl.device,
 			 "I/O tag %d (%04x) QID %d timeout, completion polled\n",
 			 req->tag, nvme_cid(req), nvmeq->qid);
+		dev->ctrl.stats.timeouts++;
 		return BLK_EH_DONE;
 	}
 
@@ -1563,6 +1565,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
 			 "I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n",
 			 req->tag, nvme_cid(req), opcode,
 			 nvme_opcode_str(nvmeq->qid, opcode), nvmeq->qid);
+		dev->ctrl.stats.resets++;
 		nvme_req(req)->flags |= NVME_REQ_CANCELLED;
 		goto disable;
 	}
@@ -1582,6 +1585,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
 		 req->tag, nvme_cid(req), opcode, nvme_get_opcode_str(opcode),
 		 nvmeq->qid, blk_op_str(req_op(req)), req_op(req),
 		 blk_rq_bytes(req));
+	dev->ctrl.stats.aborts++;
 
 	abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd),
 					 BLK_MQ_REQ_NOWAIT);
@@ -2390,6 +2394,19 @@ static ssize_t hmb_store(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RW(hmb);
 
+static ssize_t stats_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+	struct nvme_stats *stats = &ctrl->stats;
+
+	return sysfs_emit(buf,
+			  "timeouts: %lu, aborts: %lu, resets: %lu, disables: %lu\n",
+			  stats->timeouts, stats->aborts, stats->resets,
+			  stats->disables);
+}
+static DEVICE_ATTR_RO(stats);
+
 static umode_t nvme_pci_attrs_are_visible(struct kobject *kobj,
 		struct attribute *a, int n)
 {
@@ -2414,6 +2431,7 @@ static struct attribute *nvme_pci_attrs[] = {
 	&dev_attr_cmbloc.attr,
 	&dev_attr_cmbsz.attr,
 	&dev_attr_hmb.attr,
+	&dev_attr_stats.attr,
 	NULL,
 };
 
@@ -3055,6 +3073,7 @@ static void nvme_reset_work(struct work_struct *work)
 	 */
 	dev_warn(dev->ctrl.device, "Disabling device after reset failure: %d\n",
 		 result);
+	dev->ctrl.stats.disables++;
 	nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
 	nvme_dev_disable(dev, true);
 	nvme_sync_queues(&dev->ctrl);
@@ -3591,6 +3610,7 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
 	case pci_channel_io_frozen:
 		dev_warn(dev->ctrl.device,
 			"frozen state error detected, reset controller\n");
+		dev->ctrl.stats.resets++;
 		if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) {
 			nvme_dev_disable(dev, true);
 			return PCI_ERS_RESULT_DISCONNECT;
-- 
2.48.1




More information about the Linux-nvme mailing list