[PATCH 4/9] libnvme: add support for retrieving per-path gendisk I/O statistics

Nilay Shroff nilay at linux.ibm.com
Sat Mar 21 08:28:03 PDT 2026


Gendisk I/O statistics provide useful insight into disk activity,
including read/write/discard/flush operations, as well as information
about in-flight I/Os and I/O timing.

Parsing these statistics allows users to determine the number of I/Os
processed, time spent servicing I/O, number of sectors accessed, and
the count of in-flight requests.

Add support for retrieving per-path gendisk I/O statistics. Also add
support for computing deltas of these statistics between samples, such
as I/O ticks, number of sectors, and number of serviced I/Os.

These metrics can be used by tools such as nvme-top to display
real-time disk activity. Per-path metrics are particularly useful when
NVMe native multipath is enabled.

Signed-off-by: Nilay Shroff <nilay at linux.ibm.com>
---
 libnvme/src/libnvme.ld     |  12 ++
 libnvme/src/nvme/private.h |  26 ++++
 libnvme/src/nvme/tree.c    | 255 +++++++++++++++++++++++++++++++++++++
 libnvme/src/nvme/tree.h    | 102 +++++++++++++++
 4 files changed, 395 insertions(+)

diff --git a/libnvme/src/libnvme.ld b/libnvme/src/libnvme.ld
index f842eb770..ceb9f9bcb 100644
--- a/libnvme/src/libnvme.ld
+++ b/libnvme/src/libnvme.ld
@@ -144,6 +144,18 @@ LIBNVME_3 {
 		nvme_path_get_queue_depth;
 		nvme_path_get_ana_state;
 		nvme_path_get_numa_nodes;
+		nvme_path_get_stat;
+		nvme_path_get_read_ios;
+		nvme_path_get_write_ios;
+		nvme_path_get_inflights;
+		nvme_path_get_stat_interval;
+		nvme_path_get_io_ticks;
+		nvme_path_get_read_ticks;
+		nvme_path_get_write_ticks;
+		nvme_path_get_read_sectors;
+		nvme_path_get_write_sectors;
+		nvme_path_reset_stat;
+		nvme_path_update_stat;
 		nvme_filter_paths;
 		nvme_read_config;
 		nvme_read_key;
diff --git a/libnvme/src/nvme/private.h b/libnvme/src/nvme/private.h
index 84852c8b0..8e327f1e2 100644
--- a/libnvme/src/nvme/private.h
+++ b/libnvme/src/nvme/private.h
@@ -20,6 +20,7 @@
 
 #include <nvme/fabrics.h>
 #include <nvme/mi.h>
+#include <nvme/tree.h>
 
 const char *nvme_subsys_sysfs_dir(void);
 const char *nvme_ctrl_sysfs_dir(void);
@@ -128,9 +129,34 @@ struct nvme_transport_handle {
 	struct nvme_log *log;
 };
 
+enum stat_group {
+	READ = 0,
+	WRITE,
+	DISCARD,
+	FLUSH,
+
+	NR_STAT_GROUPS
+};
+
+struct nvme_stat {
+	struct {
+		unsigned long ios;
+		unsigned long merges;
+		unsigned long long sectors;
+		unsigned int ticks;	/* in milliseconds */
+	} group[NR_STAT_GROUPS];
+
+	unsigned int inflights;
+	unsigned int io_ticks;		/* in milliseconds */
+	unsigned int tot_ticks;		/* in milliseconds */
+
+	double ts_ms;			/* timestamp when the stat is updated */
+};
+
 struct nvme_path { /*!generate-accessors*/
 	struct list_node entry;
 	struct list_node nentry;
+	struct nvme_stat stat[2];
 
 	struct nvme_ctrl *c;
 	struct nvme_ns *n;
diff --git a/libnvme/src/nvme/tree.c b/libnvme/src/nvme/tree.c
index 18aabe044..94f900307 100644
--- a/libnvme/src/nvme/tree.c
+++ b/libnvme/src/nvme/tree.c
@@ -16,6 +16,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <time.h>
 
 #include <arpa/inet.h>
 #include <netdb.h>
@@ -850,6 +851,260 @@ __public char *nvme_path_get_numa_nodes(nvme_path_t p)
 	return p->numa_nodes;
 }
 
+static nvme_stat_t nvme_path_get_stat(nvme_path_t p, int curr)
+{
+	if (curr < 0 || curr > 1)
+		return NULL;
+
+	return &p->stat[curr];
+}
+
+__public void nvme_path_reset_stat(nvme_path_t p)
+{
+	nvme_stat_t stat = &p->stat[0];
+
+	memset(stat, 0, 2 * sizeof(struct nvme_stat));
+}
+
+static int nvme_update_stat(const char *sysfs_stat_path, nvme_stat_t stat)
+{
+	int n;
+	struct timespec ts;
+	unsigned long rd_ios, rd_merges, wr_ios, wr_merges;
+	unsigned long dc_ios, dc_merges, fl_ios;
+	unsigned long long rd_sectors, wr_sectors, dc_sectors;
+	unsigned int rd_ticks, wr_ticks, dc_ticks, fl_ticks;
+	unsigned int io_ticks, tot_ticks, inflights;
+
+	memset(stat, 0, sizeof(struct nvme_stat));
+
+	n = sscanf(sysfs_stat_path,
+		"%lu %lu %llu %u %lu %lu %llu %u %u %u %u %lu %lu %llu %u %lu %u",
+		&rd_ios, &rd_merges, &rd_sectors, &rd_ticks,
+		&wr_ios, &wr_merges, &wr_sectors, &wr_ticks,
+		&inflights, &io_ticks, &tot_ticks,
+		&dc_ios, &dc_merges, &dc_sectors, &dc_ticks,
+		&fl_ios, &fl_ticks);
+
+	if (n < 17)
+		return -1;
+
+	/* update read stat */
+	stat->group[READ].ios = rd_ios;
+	stat->group[READ].merges = rd_merges;
+	stat->group[READ].sectors = rd_sectors;
+	stat->group[READ].ticks = rd_ticks;
+
+	/* update write stat */
+	stat->group[WRITE].ios = wr_ios;
+	stat->group[WRITE].merges = wr_merges;
+	stat->group[WRITE].sectors = wr_sectors;
+	stat->group[WRITE].ticks = wr_ticks;
+
+	/* update inflight counters and ticks */
+	stat->inflights = inflights;
+	stat->io_ticks = io_ticks;
+	stat->tot_ticks = tot_ticks;
+
+	/* update discard stat */
+	stat->group[DISCARD].ios = dc_ios;
+	stat->group[DISCARD].merges = dc_merges;
+	stat->group[DISCARD].sectors = dc_sectors;
+	stat->group[DISCARD].ticks = dc_ticks;
+
+	/* update flush stat */
+	stat->group[FLUSH].ios = fl_ios;
+	stat->group[FLUSH].ticks = fl_ticks;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+	stat->ts_ms = ts.tv_sec * 1000 + (double)ts.tv_nsec / 1e6;
+
+	return 0;
+}
+
+__public int nvme_path_update_stat(nvme_path_t p, int curr)
+{
+	_cleanup_free_ char *sysfs_stat_path = NULL;
+	nvme_stat_t stat;
+
+	stat = nvme_path_get_stat(p, curr);
+	if (!stat)
+		return -1;
+
+	sysfs_stat_path = nvme_get_path_attr(p, "stat");
+	if (!sysfs_stat_path)
+		return -1;
+
+	return nvme_update_stat(sysfs_stat_path, stat);
+}
+
+static int nvme_stat_get_inflights(nvme_stat_t stat)
+{
+	return stat->inflights;
+}
+
+__public unsigned int nvme_path_get_inflights(nvme_path_t p, int this)
+{
+	nvme_stat_t curr;
+
+	curr = nvme_path_get_stat(p, this);
+	if (!curr)
+		return 0;
+
+	return nvme_stat_get_inflights(curr);
+}
+
+static int nvme_stat_get_io_ticks(nvme_stat_t curr, nvme_stat_t prev)
+{
+	unsigned int delta = 0;
+
+	if (curr->io_ticks > prev->io_ticks)
+		delta = curr->io_ticks - prev->io_ticks;
+
+	return delta;
+}
+
+__public unsigned int nvme_path_get_io_ticks(nvme_path_t p, int this)
+{
+	nvme_stat_t curr, prev;
+
+	curr = nvme_path_get_stat(p, this);
+	prev = nvme_path_get_stat(p, !this);
+
+	if (!curr || !prev)
+		return 0;
+
+	return nvme_stat_get_io_ticks(curr, prev);
+}
+
+static unsigned int nvme_stat_get_ticks(nvme_stat_t curr,
+		nvme_stat_t prev, enum stat_group grp)
+{
+	unsigned int delta = 0;
+
+	if (curr->group[grp].ticks > prev->group[grp].ticks)
+		delta = curr->group[grp].ticks - prev->group[grp].ticks;
+
+	return delta;
+}
+
+static unsigned int __nvme_path_get_ticks(nvme_path_t p,
+		enum stat_group grp, int this)
+{
+	nvme_stat_t curr, prev;
+
+	curr = nvme_path_get_stat(p, this);
+	prev = nvme_path_get_stat(p, !this);
+
+	if (!curr || !prev)
+		return 0;
+
+	return nvme_stat_get_ticks(curr, prev, grp);
+}
+
+__public unsigned int nvme_path_get_read_ticks(nvme_path_t p, int curr)
+{
+	return __nvme_path_get_ticks(p, READ, curr);
+}
+
+__public unsigned int nvme_path_get_write_ticks(nvme_path_t p, int curr)
+{
+	return __nvme_path_get_ticks(p, WRITE, curr);
+}
+
+static double nvme_stat_get_interval(nvme_stat_t curr, nvme_stat_t prev)
+{
+	double delta = 0.0;
+
+	if (curr->ts_ms > prev->ts_ms)
+		delta = curr->ts_ms - prev->ts_ms;
+
+	return delta;
+}
+
+__public double nvme_path_get_stat_interval(nvme_path_t p, int this)
+{
+	nvme_stat_t curr, prev;
+
+	curr = nvme_path_get_stat(p, this);
+	prev = nvme_path_get_stat(p, !this);
+
+	if (!curr || !prev)
+		return 0;
+
+	return nvme_stat_get_interval(curr, prev);
+}
+
+static unsigned long nvme_stat_get_ios(nvme_stat_t curr, nvme_stat_t prev,
+		enum stat_group grp)
+{
+	unsigned long ios = 0;
+
+	if (curr->group[grp].ios > prev->group[grp].ios)
+		ios = curr->group[grp].ios - prev->group[grp].ios;
+
+	return ios;
+}
+
+static unsigned long __nvme_path_get_ios(nvme_path_t p, enum stat_group grp,
+		int this)
+{
+	nvme_stat_t curr, prev;
+
+	curr = nvme_path_get_stat(p, this);
+	prev = nvme_path_get_stat(p, !this);
+
+	if (!curr || !prev)
+		return 0;
+
+	return nvme_stat_get_ios(curr, prev, grp);
+}
+
+__public unsigned long nvme_path_get_read_ios(nvme_path_t p, int curr)
+{
+	return __nvme_path_get_ios(p, READ, curr);
+}
+
+__public unsigned long nvme_path_get_write_ios(nvme_path_t p, int curr)
+{
+	return __nvme_path_get_ios(p, WRITE, curr);
+}
+
+static unsigned long long nvme_stat_get_sectors(nvme_stat_t curr,
+		nvme_stat_t prev, enum stat_group grp)
+{
+	unsigned long long sec = 0;
+
+	if (curr->group[grp].sectors > prev->group[grp].sectors)
+		sec = curr->group[grp].sectors - prev->group[grp].sectors;
+
+	return sec;
+}
+
+static unsigned long long __nvme_path_get_sectors(nvme_path_t p,
+		enum stat_group grp, int this)
+{
+	nvme_stat_t curr, prev;
+
+	curr = nvme_path_get_stat(p, this);
+	prev = nvme_path_get_stat(p, !this);
+
+	if (!curr || !prev)
+		return 0;
+
+	return nvme_stat_get_sectors(curr, prev, grp);
+}
+
+__public unsigned long long nvme_path_get_read_sectors(nvme_path_t p, int curr)
+{
+	return __nvme_path_get_sectors(p, READ, curr);
+}
+
+__public unsigned long long nvme_path_get_write_sectors(nvme_path_t p, int curr)
+{
+	return __nvme_path_get_sectors(p, WRITE, curr);
+}
+
 void nvme_free_path(struct nvme_path *p)
 {
 	list_del_init(&p->entry);
diff --git a/libnvme/src/nvme/tree.h b/libnvme/src/nvme/tree.h
index 39d715508..3924e061a 100644
--- a/libnvme/src/nvme/tree.h
+++ b/libnvme/src/nvme/tree.h
@@ -23,6 +23,7 @@
 typedef struct nvme_ns *nvme_ns_t;
 typedef struct nvme_ns_head *nvme_ns_head_t;
 typedef struct nvme_path *nvme_path_t;
+typedef struct nvme_stat *nvme_stat_t;
 typedef struct nvme_ctrl *nvme_ctrl_t;
 typedef struct nvme_subsystem *nvme_subsystem_t;
 typedef struct nvme_host *nvme_host_t;
@@ -719,6 +720,107 @@ nvme_ctrl_t nvme_path_get_ctrl(nvme_path_t p);
  */
 nvme_ns_t nvme_path_get_ns(nvme_path_t p);
 
+/**
+ * nvme_path_reset_stat() - Resets namespace path nvme stat
+ * @p:	&nvme_path_t object
+ */
+void nvme_path_reset_stat(nvme_path_t p);
+
+/**
+ * nvme_path_update_stat() - Update stat of an nvme_path_t object
+ * @p:		&nvme_path_t object
+ * @curr:	Index in nvme_path_stat_t object
+ *
+ * Return:	0 on success, -1 on error
+ */
+int nvme_path_update_stat(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_read_ios() - Calculate and return read IOs
+ * @p:		&nvme_path_t object
+ * @curr:	Index in nvme_path_stat_t object
+ *
+ * Return:	Num of read IOs processed between two stat samples
+ */
+unsigned long nvme_path_get_read_ios(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_write_ios() - Get write I/Os
+ * @p:		&nvme_path_t object
+ * @curr:	Index in nvme_path_stat_t object
+ *
+ * Return:	Num of write I/Os processed between two stat samples
+ */
+unsigned long nvme_path_get_write_ios(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_read_ticks() - Get read I/O ticks
+ * @p:		&nvme_path_t object
+ * @curr:	Index in nvme_path_stat_t object
+ *
+ * Return:	Time, in milliseconds, sepnt processing read I/O requests
+ *		between two stat samples
+ */
+unsigned int nvme_path_get_read_ticks(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_read_sectors() - Get read I/O sectors
+ * @p:		&nvme_path_t object
+ * @curr:	Index in nvme_path_stat_t object
+ *
+ * Return:	Number of sectors read from the device between two stat samples
+ */
+unsigned long long nvme_path_get_read_sectors(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_write_sectors() - Get write I/O sectors
+ * @p:		&nvme_path_t object
+ * @curr:	Index in nvme_path_stat_t object
+ *
+ * Return:	Num of sectors written to the device between two stat samples
+ */
+unsigned long long nvme_path_get_write_sectors(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_write_ticks() - Get write I/O ticks
+ * @p:		&nvme_path_t object
+ * @curr:	Index in nvme_path_stat_t object
+ *
+ * Return:	Time, in milliseconds, sepnt processing write I/O requests
+ *		between two stat samples
+ */
+unsigned int nvme_path_get_write_ticks(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_stat_interval() - Get interval between two stat samples
+ * @p:		&nvme_path_t object
+ * @curr:	Index in nvme_path_stat_t object
+ *
+ * Return:	Interval, in milliseconds between collection of two consecutive
+ *		stat samples
+ */
+double nvme_path_get_stat_interval(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_io_ticks() - Get I/O ticks
+ * @p:		&nvme_path_t object
+ * @curr:	Index in nvme_path_stat_t object
+ *
+ * Return:	Time consumed, in milliseconds, processing I/O requests between
+ *		two stat samples
+ */
+unsigned int nvme_path_get_io_ticks(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_inflights() - Inflight IOs for nvme_path_t object
+ *
+ * @p:		&nvme_path_t object
+ * @curr:	Index in nvme_path_stat_t object
+ *
+ * Return:	Inflight number of IOs
+ */
+unsigned int nvme_path_get_inflights(nvme_path_t p, int curr);
+
 /**
  * nvme_ctrl_get_transport_handle() - Get associated transport handle
  * @c:	Controller instance
-- 
2.53.0




More information about the Linux-nvme mailing list