[PATCHv4 7/8] nvme: export controller reset event count via sysfs

Nilay Shroff nilay at linux.ibm.com
Sat May 16 11:36:54 PDT 2026


The NVMe controller transitions into the RESETTING state during error
recovery, link instability, firmware activation, or when a reset is
explicitly triggered by the user.

Expose a per-ctrl sysfs attribute reset_count, under diag attribute
group to provide visibility into these RESETTING state transitions.
Observing the frequency of reset events can help users identify issues
such as PCIe errors or unstable fabric links. This counter is also
writable thus allowing user to reset its value, if needed.

This counter can also be consumed by monitoring tools such as nvme-top
to improve controller-level observability.

Signed-off-by: Nilay Shroff <nilay at linux.ibm.com>
---
 drivers/nvme/host/core.c  |  1 +
 drivers/nvme/host/nvme.h  |  1 +
 drivers/nvme/host/sysfs.c | 27 +++++++++++++++++++++++++++
 3 files changed, 29 insertions(+)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 3b2f7a972941..b4dace419552 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -596,6 +596,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 		case NVME_CTRL_NEW:
 		case NVME_CTRL_LIVE:
 			changed = true;
+			atomic_long_inc(&ctrl->nr_reset);
 			fallthrough;
 		default:
 			break;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9434abf2659e..e575bef99d4a 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -414,6 +414,7 @@ struct nvme_ctrl {
 	struct work_struct fw_act_work;
 	unsigned long events;
 	atomic_long_t errors;
+	atomic_long_t nr_reset;
 
 #ifdef CONFIG_NVME_MULTIPATH
 	/* asymmetric namespace access: */
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index 01d771d85f31..72300d6de880 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -1070,8 +1070,35 @@ struct device_attribute dev_attr_adm_errors =
 	__ATTR(command_error_count, 0644,
 		nvme_adm_errors_show, nvme_adm_errors_store);
 
+static ssize_t reset_count_show(struct device *dev,
+		   struct device_attribute *attr, char *buf)
+{
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%lu\n", atomic_long_read(&ctrl->nr_reset));
+}
+
+static ssize_t reset_count_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	int err;
+	unsigned long reset_cnt;
+	struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+	err = kstrtoul(buf, 0, &reset_cnt);
+	if (err)
+		return -EINVAL;
+
+	atomic_long_set(&ctrl->nr_reset, reset_cnt);
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(reset_count);
+
 static struct attribute *nvme_dev_diag_attrs[] = {
 	&dev_attr_adm_errors.attr,
+	&dev_attr_reset_count.attr,
 	NULL,
 };
 
-- 
2.53.0




More information about the Linux-nvme mailing list