[PATCH 4/9] libnvme: add support for retrieving per-path gendisk I/O statistics
Nilay Shroff
nilay at linux.ibm.com
Sat Mar 21 08:28:03 PDT 2026
Gendisk I/O statistics provide useful insight into disk activity,
including read/write/discard/flush operations, as well as information
about in-flight I/Os and I/O timing.
Parsing these statistics allows users to determine the number of I/Os
processed, time spent servicing I/O, number of sectors accessed, and
the count of in-flight requests.
Add support for retrieving per-path gendisk I/O statistics. Also add
support for computing deltas of these statistics between samples, such
as I/O ticks, number of sectors, and number of serviced I/Os.
These metrics can be used by tools such as nvme-top to display
real-time disk activity. Per-path metrics are particularly useful when
NVMe native multipath is enabled.
Signed-off-by: Nilay Shroff <nilay at linux.ibm.com>
---
libnvme/src/libnvme.ld | 12 ++
libnvme/src/nvme/private.h | 26 ++++
libnvme/src/nvme/tree.c | 255 +++++++++++++++++++++++++++++++++++++
libnvme/src/nvme/tree.h | 102 +++++++++++++++
4 files changed, 395 insertions(+)
diff --git a/libnvme/src/libnvme.ld b/libnvme/src/libnvme.ld
index f842eb770..ceb9f9bcb 100644
--- a/libnvme/src/libnvme.ld
+++ b/libnvme/src/libnvme.ld
@@ -144,6 +144,18 @@ LIBNVME_3 {
nvme_path_get_queue_depth;
nvme_path_get_ana_state;
nvme_path_get_numa_nodes;
+ nvme_path_get_stat;
+ nvme_path_get_read_ios;
+ nvme_path_get_write_ios;
+ nvme_path_get_inflights;
+ nvme_path_get_stat_interval;
+ nvme_path_get_io_ticks;
+ nvme_path_get_read_ticks;
+ nvme_path_get_write_ticks;
+ nvme_path_get_read_sectors;
+ nvme_path_get_write_sectors;
+ nvme_path_reset_stat;
+ nvme_path_update_stat;
nvme_filter_paths;
nvme_read_config;
nvme_read_key;
diff --git a/libnvme/src/nvme/private.h b/libnvme/src/nvme/private.h
index 84852c8b0..8e327f1e2 100644
--- a/libnvme/src/nvme/private.h
+++ b/libnvme/src/nvme/private.h
@@ -20,6 +20,7 @@
#include <nvme/fabrics.h>
#include <nvme/mi.h>
+#include <nvme/tree.h>
const char *nvme_subsys_sysfs_dir(void);
const char *nvme_ctrl_sysfs_dir(void);
@@ -128,9 +129,34 @@ struct nvme_transport_handle {
struct nvme_log *log;
};
+enum stat_group {
+ READ = 0,
+ WRITE,
+ DISCARD,
+ FLUSH,
+
+ NR_STAT_GROUPS
+};
+
+struct nvme_stat {
+ struct {
+ unsigned long ios;
+ unsigned long merges;
+ unsigned long long sectors;
+ unsigned int ticks; /* in milliseconds */
+ } group[NR_STAT_GROUPS];
+
+ unsigned int inflights;
+ unsigned int io_ticks; /* in milliseconds */
+ unsigned int tot_ticks; /* in milliseconds */
+
+ double ts_ms; /* timestamp when the stat is updated */
+};
+
struct nvme_path { /*!generate-accessors*/
struct list_node entry;
struct list_node nentry;
+ struct nvme_stat stat[2];
struct nvme_ctrl *c;
struct nvme_ns *n;
diff --git a/libnvme/src/nvme/tree.c b/libnvme/src/nvme/tree.c
index 18aabe044..94f900307 100644
--- a/libnvme/src/nvme/tree.c
+++ b/libnvme/src/nvme/tree.c
@@ -16,6 +16,7 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <time.h>
#include <arpa/inet.h>
#include <netdb.h>
@@ -850,6 +851,260 @@ __public char *nvme_path_get_numa_nodes(nvme_path_t p)
return p->numa_nodes;
}
+static nvme_stat_t nvme_path_get_stat(nvme_path_t p, int curr)
+{
+ if (curr < 0 || curr > 1)
+ return NULL;
+
+ return &p->stat[curr];
+}
+
+__public void nvme_path_reset_stat(nvme_path_t p)
+{
+ nvme_stat_t stat = &p->stat[0];
+
+ memset(stat, 0, 2 * sizeof(struct nvme_stat));
+}
+
+static int nvme_update_stat(const char *sysfs_stat_path, nvme_stat_t stat)
+{
+ int n;
+ struct timespec ts;
+ unsigned long rd_ios, rd_merges, wr_ios, wr_merges;
+ unsigned long dc_ios, dc_merges, fl_ios;
+ unsigned long long rd_sectors, wr_sectors, dc_sectors;
+ unsigned int rd_ticks, wr_ticks, dc_ticks, fl_ticks;
+ unsigned int io_ticks, tot_ticks, inflights;
+
+ memset(stat, 0, sizeof(struct nvme_stat));
+
+ n = sscanf(sysfs_stat_path,
+ "%lu %lu %llu %u %lu %lu %llu %u %u %u %u %lu %lu %llu %u %lu %u",
+ &rd_ios, &rd_merges, &rd_sectors, &rd_ticks,
+ &wr_ios, &wr_merges, &wr_sectors, &wr_ticks,
+ &inflights, &io_ticks, &tot_ticks,
+ &dc_ios, &dc_merges, &dc_sectors, &dc_ticks,
+ &fl_ios, &fl_ticks);
+
+ if (n < 17)
+ return -1;
+
+ /* update read stat */
+ stat->group[READ].ios = rd_ios;
+ stat->group[READ].merges = rd_merges;
+ stat->group[READ].sectors = rd_sectors;
+ stat->group[READ].ticks = rd_ticks;
+
+ /* update write stat */
+ stat->group[WRITE].ios = wr_ios;
+ stat->group[WRITE].merges = wr_merges;
+ stat->group[WRITE].sectors = wr_sectors;
+ stat->group[WRITE].ticks = wr_ticks;
+
+ /* update inflight counters and ticks */
+ stat->inflights = inflights;
+ stat->io_ticks = io_ticks;
+ stat->tot_ticks = tot_ticks;
+
+ /* update discard stat */
+ stat->group[DISCARD].ios = dc_ios;
+ stat->group[DISCARD].merges = dc_merges;
+ stat->group[DISCARD].sectors = dc_sectors;
+ stat->group[DISCARD].ticks = dc_ticks;
+
+ /* update flush stat */
+ stat->group[FLUSH].ios = fl_ios;
+ stat->group[FLUSH].ticks = fl_ticks;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ stat->ts_ms = ts.tv_sec * 1000 + (double)ts.tv_nsec / 1e6;
+
+ return 0;
+}
+
+__public int nvme_path_update_stat(nvme_path_t p, int curr)
+{
+ _cleanup_free_ char *sysfs_stat_path = NULL;
+ nvme_stat_t stat;
+
+ stat = nvme_path_get_stat(p, curr);
+ if (!stat)
+ return -1;
+
+ sysfs_stat_path = nvme_get_path_attr(p, "stat");
+ if (!sysfs_stat_path)
+ return -1;
+
+ return nvme_update_stat(sysfs_stat_path, stat);
+}
+
+static int nvme_stat_get_inflights(nvme_stat_t stat)
+{
+ return stat->inflights;
+}
+
+__public unsigned int nvme_path_get_inflights(nvme_path_t p, int this)
+{
+ nvme_stat_t curr;
+
+ curr = nvme_path_get_stat(p, this);
+ if (!curr)
+ return 0;
+
+ return nvme_stat_get_inflights(curr);
+}
+
+static int nvme_stat_get_io_ticks(nvme_stat_t curr, nvme_stat_t prev)
+{
+ unsigned int delta = 0;
+
+ if (curr->io_ticks > prev->io_ticks)
+ delta = curr->io_ticks - prev->io_ticks;
+
+ return delta;
+}
+
+__public unsigned int nvme_path_get_io_ticks(nvme_path_t p, int this)
+{
+ nvme_stat_t curr, prev;
+
+ curr = nvme_path_get_stat(p, this);
+ prev = nvme_path_get_stat(p, !this);
+
+ if (!curr || !prev)
+ return 0;
+
+ return nvme_stat_get_io_ticks(curr, prev);
+}
+
+static unsigned int nvme_stat_get_ticks(nvme_stat_t curr,
+ nvme_stat_t prev, enum stat_group grp)
+{
+ unsigned int delta = 0;
+
+ if (curr->group[grp].ticks > prev->group[grp].ticks)
+ delta = curr->group[grp].ticks - prev->group[grp].ticks;
+
+ return delta;
+}
+
+static unsigned int __nvme_path_get_ticks(nvme_path_t p,
+ enum stat_group grp, int this)
+{
+ nvme_stat_t curr, prev;
+
+ curr = nvme_path_get_stat(p, this);
+ prev = nvme_path_get_stat(p, !this);
+
+ if (!curr || !prev)
+ return 0;
+
+ return nvme_stat_get_ticks(curr, prev, grp);
+}
+
+__public unsigned int nvme_path_get_read_ticks(nvme_path_t p, int curr)
+{
+ return __nvme_path_get_ticks(p, READ, curr);
+}
+
+__public unsigned int nvme_path_get_write_ticks(nvme_path_t p, int curr)
+{
+ return __nvme_path_get_ticks(p, WRITE, curr);
+}
+
+static double nvme_stat_get_interval(nvme_stat_t curr, nvme_stat_t prev)
+{
+ double delta = 0.0;
+
+ if (curr->ts_ms > prev->ts_ms)
+ delta = curr->ts_ms - prev->ts_ms;
+
+ return delta;
+}
+
+__public double nvme_path_get_stat_interval(nvme_path_t p, int this)
+{
+ nvme_stat_t curr, prev;
+
+ curr = nvme_path_get_stat(p, this);
+ prev = nvme_path_get_stat(p, !this);
+
+ if (!curr || !prev)
+ return 0;
+
+ return nvme_stat_get_interval(curr, prev);
+}
+
+static unsigned long nvme_stat_get_ios(nvme_stat_t curr, nvme_stat_t prev,
+ enum stat_group grp)
+{
+ unsigned long ios = 0;
+
+ if (curr->group[grp].ios > prev->group[grp].ios)
+ ios = curr->group[grp].ios - prev->group[grp].ios;
+
+ return ios;
+}
+
+static unsigned long __nvme_path_get_ios(nvme_path_t p, enum stat_group grp,
+ int this)
+{
+ nvme_stat_t curr, prev;
+
+ curr = nvme_path_get_stat(p, this);
+ prev = nvme_path_get_stat(p, !this);
+
+ if (!curr || !prev)
+ return 0;
+
+ return nvme_stat_get_ios(curr, prev, grp);
+}
+
+__public unsigned long nvme_path_get_read_ios(nvme_path_t p, int curr)
+{
+ return __nvme_path_get_ios(p, READ, curr);
+}
+
+__public unsigned long nvme_path_get_write_ios(nvme_path_t p, int curr)
+{
+ return __nvme_path_get_ios(p, WRITE, curr);
+}
+
+static unsigned long long nvme_stat_get_sectors(nvme_stat_t curr,
+ nvme_stat_t prev, enum stat_group grp)
+{
+ unsigned long long sec = 0;
+
+ if (curr->group[grp].sectors > prev->group[grp].sectors)
+ sec = curr->group[grp].sectors - prev->group[grp].sectors;
+
+ return sec;
+}
+
+static unsigned long long __nvme_path_get_sectors(nvme_path_t p,
+ enum stat_group grp, int this)
+{
+ nvme_stat_t curr, prev;
+
+ curr = nvme_path_get_stat(p, this);
+ prev = nvme_path_get_stat(p, !this);
+
+ if (!curr || !prev)
+ return 0;
+
+ return nvme_stat_get_sectors(curr, prev, grp);
+}
+
+__public unsigned long long nvme_path_get_read_sectors(nvme_path_t p, int curr)
+{
+ return __nvme_path_get_sectors(p, READ, curr);
+}
+
+__public unsigned long long nvme_path_get_write_sectors(nvme_path_t p, int curr)
+{
+ return __nvme_path_get_sectors(p, WRITE, curr);
+}
+
void nvme_free_path(struct nvme_path *p)
{
list_del_init(&p->entry);
diff --git a/libnvme/src/nvme/tree.h b/libnvme/src/nvme/tree.h
index 39d715508..3924e061a 100644
--- a/libnvme/src/nvme/tree.h
+++ b/libnvme/src/nvme/tree.h
@@ -23,6 +23,7 @@
typedef struct nvme_ns *nvme_ns_t;
typedef struct nvme_ns_head *nvme_ns_head_t;
typedef struct nvme_path *nvme_path_t;
+typedef struct nvme_stat *nvme_stat_t;
typedef struct nvme_ctrl *nvme_ctrl_t;
typedef struct nvme_subsystem *nvme_subsystem_t;
typedef struct nvme_host *nvme_host_t;
@@ -719,6 +720,107 @@ nvme_ctrl_t nvme_path_get_ctrl(nvme_path_t p);
*/
nvme_ns_t nvme_path_get_ns(nvme_path_t p);
+/**
+ * nvme_path_reset_stat() - Resets namespace path nvme stat
+ * @p: &nvme_path_t object
+ */
+void nvme_path_reset_stat(nvme_path_t p);
+
+/**
+ * nvme_path_update_stat() - Update stat of an nvme_path_t object
+ * @p: &nvme_path_t object
+ * @curr: Index in nvme_path_stat_t object
+ *
+ * Return: 0 on success, -1 on error
+ */
+int nvme_path_update_stat(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_read_ios() - Calculate and return read IOs
+ * @p: &nvme_path_t object
+ * @curr: Index in nvme_path_stat_t object
+ *
+ * Return: Num of read IOs processed between two stat samples
+ */
+unsigned long nvme_path_get_read_ios(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_write_ios() - Get write I/Os
+ * @p: &nvme_path_t object
+ * @curr: Index in nvme_path_stat_t object
+ *
+ * Return: Num of write I/Os processed between two stat samples
+ */
+unsigned long nvme_path_get_write_ios(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_read_ticks() - Get read I/O ticks
+ * @p: &nvme_path_t object
+ * @curr: Index in nvme_path_stat_t object
+ *
+ * Return: Time, in milliseconds, sepnt processing read I/O requests
+ * between two stat samples
+ */
+unsigned int nvme_path_get_read_ticks(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_read_sectors() - Get read I/O sectors
+ * @p: &nvme_path_t object
+ * @curr: Index in nvme_path_stat_t object
+ *
+ * Return: Number of sectors read from the device between two stat samples
+ */
+unsigned long long nvme_path_get_read_sectors(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_write_sectors() - Get write I/O sectors
+ * @p: &nvme_path_t object
+ * @curr: Index in nvme_path_stat_t object
+ *
+ * Return: Num of sectors written to the device between two stat samples
+ */
+unsigned long long nvme_path_get_write_sectors(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_write_ticks() - Get write I/O ticks
+ * @p: &nvme_path_t object
+ * @curr: Index in nvme_path_stat_t object
+ *
+ * Return: Time, in milliseconds, sepnt processing write I/O requests
+ * between two stat samples
+ */
+unsigned int nvme_path_get_write_ticks(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_stat_interval() - Get interval between two stat samples
+ * @p: &nvme_path_t object
+ * @curr: Index in nvme_path_stat_t object
+ *
+ * Return: Interval, in milliseconds between collection of two consecutive
+ * stat samples
+ */
+double nvme_path_get_stat_interval(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_io_ticks() - Get I/O ticks
+ * @p: &nvme_path_t object
+ * @curr: Index in nvme_path_stat_t object
+ *
+ * Return: Time consumed, in milliseconds, processing I/O requests between
+ * two stat samples
+ */
+unsigned int nvme_path_get_io_ticks(nvme_path_t p, int curr);
+
+/**
+ * nvme_path_get_inflights() - Inflight IOs for nvme_path_t object
+ *
+ * @p: &nvme_path_t object
+ * @curr: Index in nvme_path_stat_t object
+ *
+ * Return: Inflight number of IOs
+ */
+unsigned int nvme_path_get_inflights(nvme_path_t p, int curr);
+
/**
* nvme_ctrl_get_transport_handle() - Get associated transport handle
* @c: Controller instance
--
2.53.0
More information about the Linux-nvme
mailing list