[PATCHv4 8/8] nvme: export controller reconnect event count via sysfs
Nilay Shroff
nilay at linux.ibm.com
Sat May 16 11:36:55 PDT 2026
When an NVMe-oF link goes down, the driver attempts to recover the
connection by repeatedly reconnecting to the remote controller at
configured intervals. A maximum number of reconnect attempts is also
configured, after which recovery stops and the controller is removed
if the connection cannot be re-established.
The driver maintains a counter, nr_reconnects, which is incremented on
each reconnect attempt. However if in case the reconnect is successful
then this counter reset to zero. Moreover, currently, this counter is
only reported via kernel log messages and is not exposed to userspace.
Since dmesg is a circular buffer, this information may be lost over
time.
So introduce a new accumulator which accumulates nr_reconnect attempts
and also expose this accumulator per-fabric ctrl via a new sysfs
attribute reconnect_count, under diag attribute grroup to provide
persistent visibility into the number of reconnect attempts made by the
host. This information can help users diagnose unstable links or
connectivity issues. Furthermore, this sysfs attribute is also writable
so user may reset it to zero, if needed.
The reconnect_count can also be consumed by monitoring tools such as
nvme-top to improve controller-level observability.
Signed-off-by: Nilay Shroff <nilay at linux.ibm.com>
---
drivers/nvme/host/fc.c | 3 +++
drivers/nvme/host/nvme.h | 2 ++
drivers/nvme/host/rdma.c | 2 ++
drivers/nvme/host/sysfs.c | 35 +++++++++++++++++++++++++++++++++++
drivers/nvme/host/tcp.c | 2 ++
5 files changed, 44 insertions(+)
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index e4f4528fe2a2..f04eb13dd5e9 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3148,6 +3148,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
goto out_term_aen_ops;
}
+ /* accumulate reconnect attempts before resetting it to zero */
+ atomic_long_add(ctrl->ctrl.nr_reconnects, &ctrl->ctrl.acc_reconnects);
ctrl->ctrl.nr_reconnects = 0;
nvme_start_ctrl(&ctrl->ctrl);
@@ -3470,6 +3472,7 @@ nvme_fc_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ctrl->ctrl.opts = opts;
ctrl->ctrl.nr_reconnects = 0;
+ atomic_long_set(&ctrl->ctrl.acc_reconnects, 0);
INIT_LIST_HEAD(&ctrl->ctrl_list);
ctrl->lport = lport;
ctrl->rport = rport;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index e575bef99d4a..22535328fdd5 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -456,6 +456,8 @@ struct nvme_ctrl {
u16 icdoff;
u16 maxcmd;
int nr_reconnects;
+ /* accumulate reconenct attempts, as nr_reconnects can reset to zero */
+ atomic_long_t acc_reconnects;
unsigned long flags;
struct nvmf_ctrl_options *opts;
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index f77c960f7632..de45fefdc15e 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1110,6 +1110,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
ctrl->ctrl.nr_reconnects);
+ /* accumulate reconnect attempts before resetting it to zero */
+ atomic_long_add(ctrl->ctrl.nr_reconnects, &ctrl->ctrl.acc_reconnects);
ctrl->ctrl.nr_reconnects = 0;
return;
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index 72300d6de880..9c15e7d869ed 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -1094,17 +1094,52 @@ static ssize_t reset_count_store(struct device *dev,
return count;
}
+static ssize_t reconnect_count_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%lu\n",
+ atomic_long_read(&ctrl->acc_reconnects) +
+ ctrl->nr_reconnects);
+}
+
+static ssize_t reconnect_count_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ int err;
+ unsigned long reconnect_cnt;
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ err = kstrtoul(buf, 0, &reconnect_cnt);
+ if (err)
+ return -EINVAL;
+
+ atomic_long_set(&ctrl->acc_reconnects, reconnect_cnt);
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(reconnect_count);
+
static DEVICE_ATTR_RW(reset_count);
static struct attribute *nvme_dev_diag_attrs[] = {
&dev_attr_adm_errors.attr,
&dev_attr_reset_count.attr,
+ &dev_attr_reconnect_count.attr,
NULL,
};
static umode_t nvme_dev_diag_attrs_are_visible(struct kobject *kobj,
struct attribute *a, int n)
{
+ struct device *dev = container_of(kobj, struct device, kobj);
+ struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+ if (a == &dev_attr_reconnect_count.attr && !ctrl->opts)
+ return 0;
+
return a->mode;
}
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 15d36d6a728e..ab9d19497b3f 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2475,6 +2475,8 @@ static void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
dev_info(ctrl->device, "Successfully reconnected (attempt %d/%d)\n",
ctrl->nr_reconnects, ctrl->opts->max_reconnects);
+ /* accumulate reconnect attempts before resetting it to zero */
+ atomic_long_add(ctrl->nr_reconnects, &ctrl->acc_reconnects);
ctrl->nr_reconnects = 0;
return;
--
2.53.0
More information about the Linux-nvme
mailing list