[PATCHv3 7/7] nvme: export controller reconnect event count via sysfs
Nilay Shroff
nilay at linux.ibm.com
Fri Feb 20 09:48:52 PST 2026
When an NVMe-oF link goes down, the driver attempts to recover the
connection by repeatedly reconnecting to the remote controller at
configured intervals. A maximum number of reconnect attempts is also
configured, after which recovery stops and the controller is removed
if the connection cannot be re-established.
The driver maintains a counter, nr_reconnects, which is incremented on
each reconnect attempt. However if in case the reconnect is successful
then this counter reset to zero. Moreover, currently, this counter is
only reported via kernel log messages and is not exposed to userspace.
Since dmesg is a circular buffer, this information may be lost over
time.
So introduce a new accumulator which accumulates nr_reconnect
attempts and also expose this accumulator via a new sysfs attribute
"reconnect_events" to provide persistent visibility into the number
of reconnect attempts made by the host. This information can help
users diagnose unstable links or connectivity issues. Furthermore,
this sysfs attribute is also writable so user may reset it to zero,
if needed.
The "reconnect_events" can also be consumed by monitoring tools such
as nvme-top to improve controller-level observability.
Signed-off-by: Nilay Shroff <nilay at linux.ibm.com>
---
drivers/nvme/host/fc.c | 5 +++++
drivers/nvme/host/nvme.h | 2 ++
drivers/nvme/host/rdma.c | 4 ++++
drivers/nvme/host/sysfs.c | 30 ++++++++++++++++++++++++++++++
drivers/nvme/host/tcp.c | 3 +++
5 files changed, 44 insertions(+)
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 6948de3f438a..a918217620d1 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3148,6 +3148,10 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
goto out_term_aen_ops;
}
+ /* accumulate reconnect attempts before resetting it to zero */
+ WRITE_ONCE(ctrl->ctrl.acc_reconnects,
+ READ_ONCE(ctrl->ctrl.acc_reconnects) +
+ ctrl->ctrl.nr_reconnects);
ctrl->ctrl.nr_reconnects = 0;
nvme_start_ctrl(&ctrl->ctrl);
@@ -3470,6 +3474,7 @@ nvme_fc_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ctrl->ctrl.opts = opts;
ctrl->ctrl.nr_reconnects = 0;
+ ctrl->ctrl.acc_reconnects = 0;
INIT_LIST_HEAD(&ctrl->ctrl_list);
ctrl->lport = lport;
ctrl->rport = rport;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 5d90e5fa7298..9146d1b48606 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -401,6 +401,8 @@ struct nvme_ctrl {
u16 icdoff;
u16 maxcmd;
int nr_reconnects;
+ /* accumulate reconenct attempts, as nr_reconnects can reset to zero */
+ size_t acc_reconnects;
unsigned long flags;
struct nvmf_ctrl_options *opts;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 35c0822edb2d..bd5492ad3da6 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1110,6 +1110,10 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
ctrl->ctrl.nr_reconnects);
+ /* accumulate reconnect attempts before resetting it to zero */
+ WRITE_ONCE(ctrl->ctrl.acc_reconnects,
+ READ_ONCE(ctrl->ctrl.acc_reconnects) +
+ ctrl->ctrl.nr_reconnects);
ctrl->ctrl.nr_reconnects = 0;
return;
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index f3e6c7208315..166e45b589ad 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -736,6 +736,33 @@ static ssize_t reset_events_store(struct device *dev,
static DEVICE_ATTR_RW(reset_events);
+static ssize_t reconnect_events_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%lu\n",
+ READ_ONCE(ctrl->acc_reconnects) + ctrl->nr_reconnects);
+}
+
+static ssize_t reconnect_events_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ int err;
+ unsigned long reconnect_cnt;
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ err = kstrtoul(buf, 0, &reconnect_cnt);
+ if (err)
+ return -EINVAL;
+
+ WRITE_ONCE(ctrl->acc_reconnects, reconnect_cnt);
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(reconnect_events);
+
#ifdef CONFIG_NVME_HOST_AUTH
static ssize_t nvme_ctrl_dhchap_secret_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -884,6 +911,7 @@ static struct attribute *nvme_dev_attrs[] = {
&dev_attr_adm_passthru_err_log_enabled.attr,
&dev_attr_adm_errors.attr,
&dev_attr_reset_events.attr,
+ &dev_attr_reconnect_events.attr,
NULL
};
@@ -913,6 +941,8 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
if (a == &dev_attr_dhchap_ctrl_secret.attr && !ctrl->opts)
return 0;
#endif
+ if (a == &dev_attr_reconnect_events.attr && !ctrl->opts)
+ return 0;
return a->mode;
}
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 69cb04406b47..46398c826368 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2460,6 +2460,9 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
dev_info(ctrl->device, "Successfully reconnected (attempt %d/%d)\n",
ctrl->nr_reconnects, ctrl->opts->max_reconnects);
+ /* accumulate reconnect attempts before resetting it to zero */
+ WRITE_ONCE(ctrl->acc_reconnects,
+ READ_ONCE(ctrl->acc_reconnects) + ctrl->nr_reconnects);
ctrl->nr_reconnects = 0;
return;
--
2.52.0
More information about the Linux-nvme
mailing list