[PATCH] nvme-pci: add NVMe controller statistics
Tokunori Ikegami
ikegami.t at gmail.com
Wed May 28 10:02:56 PDT 2025
This is to count the controller warning events.
Signed-off-by: Tokunori Ikegami <ikegami.t at gmail.com>
---
drivers/nvme/host/nvme.h | 9 +++++++++
drivers/nvme/host/pci.c | 20 ++++++++++++++++++++
2 files changed, 29 insertions(+)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 1de1b843afa5..aa28bea48783 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -277,6 +277,13 @@ enum nvme_ctrl_flags {
NVME_CTRL_FROZEN = 6,
};
+struct nvme_stats {
+ unsigned long timeouts;
+ unsigned long aborts;
+ unsigned long resets;
+ unsigned long disables;
+};
+
struct nvme_ctrl {
bool comp_seen;
bool identified;
@@ -410,6 +417,8 @@ struct nvme_ctrl {
enum nvme_ctrl_type cntrltype;
enum nvme_dctype dctype;
+
+ struct nvme_stats stats;
};
static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 94ed13903b1b..5ce26408e426 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1465,6 +1465,7 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
dev_warn(dev->ctrl.device,
"controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
csts, result);
+ dev->ctrl.stats.resets++;
if (csts != ~0)
return;
@@ -1526,6 +1527,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
dev_warn(dev->ctrl.device,
"I/O tag %d (%04x) QID %d timeout, completion polled\n",
req->tag, nvme_cid(req), nvmeq->qid);
+ dev->ctrl.stats.timeouts++;
return BLK_EH_DONE;
}
@@ -1563,6 +1565,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
"I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n",
req->tag, nvme_cid(req), opcode,
nvme_opcode_str(nvmeq->qid, opcode), nvmeq->qid);
+ dev->ctrl.stats.resets++;
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
goto disable;
}
@@ -1582,6 +1585,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
req->tag, nvme_cid(req), opcode, nvme_get_opcode_str(opcode),
nvmeq->qid, blk_op_str(req_op(req)), req_op(req),
blk_rq_bytes(req));
+ dev->ctrl.stats.aborts++;
abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd),
BLK_MQ_REQ_NOWAIT);
@@ -2390,6 +2394,19 @@ static ssize_t hmb_store(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR_RW(hmb);
+static ssize_t stats_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ struct nvme_stats *stats = &ctrl->stats;
+
+ return sysfs_emit(buf,
+ "timeouts: %lu, aborts: %lu, resets: %lu, disables: %lu\n",
+ stats->timeouts, stats->aborts, stats->resets,
+ stats->disables);
+}
+static DEVICE_ATTR_RO(stats);
+
static umode_t nvme_pci_attrs_are_visible(struct kobject *kobj,
struct attribute *a, int n)
{
@@ -2414,6 +2431,7 @@ static struct attribute *nvme_pci_attrs[] = {
&dev_attr_cmbloc.attr,
&dev_attr_cmbsz.attr,
&dev_attr_hmb.attr,
+ &dev_attr_stats.attr,
NULL,
};
@@ -3055,6 +3073,7 @@ static void nvme_reset_work(struct work_struct *work)
*/
dev_warn(dev->ctrl.device, "Disabling device after reset failure: %d\n",
result);
+ dev->ctrl.stats.disables++;
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
nvme_dev_disable(dev, true);
nvme_sync_queues(&dev->ctrl);
@@ -3591,6 +3610,7 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
case pci_channel_io_frozen:
dev_warn(dev->ctrl.device,
"frozen state error detected, reset controller\n");
+ dev->ctrl.stats.resets++;
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) {
nvme_dev_disable(dev, true);
return PCI_ERS_RESULT_DISCONNECT;
--
2.48.1
More information about the Linux-nvme
mailing list