[PATCH v2] nvme-pci: add NVMe controller statistics
Tokunori Ikegami
ikegami.t at gmail.com
Thu May 29 10:29:02 PDT 2025
This is to count the controller warning events.
Signed-off-by: Tokunori Ikegami <ikegami.t at gmail.com>
---
Changes since v1:
- Split the sysfs stats attribute to create 4 new files.
- Create stats subdirectory for the attibutes split.
- Change the device attributes to read-write version.
drivers/nvme/host/nvme.h | 9 +++
drivers/nvme/host/pci.c | 127 +++++++++++++++++++++++++++++++++++++++
2 files changed, 136 insertions(+)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index ad0c1f834f09..5a6d0aebc9f8 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -277,6 +277,13 @@ enum nvme_ctrl_flags {
NVME_CTRL_FROZEN = 6,
};
+struct nvme_stats {
+ unsigned long timeouts;
+ unsigned long aborts;
+ unsigned long resets;
+ unsigned long disables;
+};
+
struct nvme_ctrl {
bool comp_seen;
bool identified;
@@ -411,6 +418,8 @@ struct nvme_ctrl {
enum nvme_ctrl_type cntrltype;
enum nvme_dctype dctype;
u16 awupf; /* 0's based value. */
+
+ struct nvme_stats stats;
};
static inline enum nvme_ctrl_state nvme_ctrl_state(struct nvme_ctrl *ctrl)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index e0bfe04a2bc2..632b222b51ff 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1467,6 +1467,7 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
dev_warn(dev->ctrl.device,
"controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
csts, result);
+ dev->ctrl.stats.resets++;
if (csts != ~0)
return;
@@ -1528,6 +1529,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
dev_warn(dev->ctrl.device,
"I/O tag %d (%04x) QID %d timeout, completion polled\n",
req->tag, nvme_cid(req), nvmeq->qid);
+ dev->ctrl.stats.timeouts++;
return BLK_EH_DONE;
}
@@ -1565,6 +1567,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
"I/O tag %d (%04x) opcode %#x (%s) QID %d timeout, reset controller\n",
req->tag, nvme_cid(req), opcode,
nvme_opcode_str(nvmeq->qid, opcode), nvmeq->qid);
+ dev->ctrl.stats.resets++;
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
goto disable;
}
@@ -1584,6 +1587,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
req->tag, nvme_cid(req), opcode, nvme_get_opcode_str(opcode),
nvmeq->qid, blk_op_str(req_op(req)), req_op(req),
blk_rq_bytes(req));
+ dev->ctrl.stats.aborts++;
abort_req = blk_mq_alloc_request(dev->ctrl.admin_q, nvme_req_op(&cmd),
BLK_MQ_REQ_NOWAIT);
@@ -2424,9 +2428,130 @@ static const struct attribute_group nvme_pci_dev_attrs_group = {
.is_visible = nvme_pci_attrs_are_visible,
};
+static ssize_t timeouts_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%lu\n", ctrl->stats.timeouts);
+}
+
+static ssize_t timeouts_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ unsigned long timeouts;
+ int err;
+
+ err = kstrtoul(buf, 10, &timeouts);
+ if (err)
+ return -EINVAL;
+
+ ctrl->stats.timeouts = timeouts;
+
+ return count;
+}
+static DEVICE_ATTR_RW(timeouts);
+
+static ssize_t aborts_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%lu\n", ctrl->stats.aborts);
+}
+
+static ssize_t aborts_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ unsigned long aborts;
+ int err;
+
+ err = kstrtoul(buf, 10, &aborts);
+ if (err)
+ return -EINVAL;
+
+ ctrl->stats.aborts = aborts;
+
+ return count;
+}
+static DEVICE_ATTR_RW(aborts);
+
+static ssize_t resets_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%lu\n", ctrl->stats.resets);
+}
+
+static ssize_t resets_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ unsigned long resets;
+ int err;
+
+ err = kstrtoul(buf, 10, &resets);
+ if (err)
+ return -EINVAL;
+
+ ctrl->stats.resets = resets;
+
+ return count;
+}
+static DEVICE_ATTR_RW(resets);
+
+static ssize_t disables_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%lu\n", ctrl->stats.disables);
+}
+
+static ssize_t disables_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+ unsigned long disables;
+ int err;
+
+ err = kstrtoul(buf, 10, &disables);
+ if (err)
+ return -EINVAL;
+
+ ctrl->stats.disables = disables;
+
+ return count;
+}
+static DEVICE_ATTR_RW(disables);
+
+static umode_t nvme_stats_attrs_are_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ return a->mode;
+}
+
+static struct attribute *nvme_stats_attrs[] = {
+ &dev_attr_timeouts.attr,
+ &dev_attr_aborts.attr,
+ &dev_attr_resets.attr,
+ &dev_attr_disables.attr,
+ NULL,
+};
+
+static const struct attribute_group nvme_stats_attrs_group = {
+ .name = "stats",
+ .attrs = nvme_stats_attrs,
+ .is_visible = nvme_stats_attrs_are_visible,
+};
+
static const struct attribute_group *nvme_pci_dev_attr_groups[] = {
&nvme_dev_attrs_group,
&nvme_pci_dev_attrs_group,
+ &nvme_stats_attrs_group,
NULL,
};
@@ -3057,6 +3182,7 @@ static void nvme_reset_work(struct work_struct *work)
*/
dev_warn(dev->ctrl.device, "Disabling device after reset failure: %d\n",
result);
+ dev->ctrl.stats.disables++;
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
nvme_dev_disable(dev, true);
nvme_sync_queues(&dev->ctrl);
@@ -3593,6 +3719,7 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
case pci_channel_io_frozen:
dev_warn(dev->ctrl.device,
"frozen state error detected, reset controller\n");
+ dev->ctrl.stats.resets++;
if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) {
nvme_dev_disable(dev, true);
return PCI_ERS_RESULT_DISCONNECT;
--
2.48.1
More information about the Linux-nvme
mailing list