[PATCH] multipath-tools: Add Huawei prioritizer for nvme

Zou Ming zouming.zouming at huawei.com
Fri Apr 21 03:27:58 PDT 2017


Tool to make use of a NVMe-feature called HUAWEI Asymmetric Namespace Access.
It determines the ANA state of a device and prints a priority value to stdout.
---
 .../libmultipath/discovery.c                       |  31 ++-
 .../libmultipath/prioritizers/Makefile             |   1 +
 .../libmultipath/prioritizers/huawei_ana.c         | 222 +++++++++++++++++++++
 .../libmultipath/prioritizers/huawei_ana.h         | 191 ++++++++++++++++++
 .../libmultipath/structs.h                         |   6 +
 5 files changed, 449 insertions(+), 2 deletions(-)
 create mode 100644 multipath-tools-HEAD-be1191b/libmultipath/prioritizers/huawei_ana.c
 create mode 100644 multipath-tools-HEAD-be1191b/libmultipath/prioritizers/huawei_ana.h

diff --git a/multipath-tools-HEAD-be1191b/libmultipath/discovery.c b/multipath-tools-HEAD-be1191b/libmultipath/discovery.c
index 663c8ea..6d5acab 100644
--- a/multipath-tools-HEAD-be1191b/libmultipath/discovery.c
+++ b/multipath-tools-HEAD-be1191b/libmultipath/discovery.c
@@ -1195,6 +1195,8 @@ static int
 nvme_sysfs_pathinfo (struct path * pp, vector hwtable)
 {
 	struct udev_device *parent, *nvme = NULL;
+    char value[16];
+    int ret;
 
 	parent = pp->udev;
 	while (parent) {
@@ -1209,13 +1211,38 @@ nvme_sysfs_pathinfo (struct path * pp, vector hwtable)
 	if (!nvme)
 		return 1;
 
+    ret = sysfs_attr_get_value(pp->udev, "nsid", value, 16);
+    if (ret <= 0) {
+		condlog(0, "%s: failed to read nsid value, "
+			"error %d",  pp->dev, -ret);
+		return 1;
+	}
+
+    ret = sscanf(value, "%u\n", &pp->nvme_id.nsid);
+	if (ret != 1) {
+		condlog(0, "%s: Cannot parse nsid attribute", pp->dev);
+		return 1;
+	}
+
+    ret = sysfs_attr_get_value(nvme, "cntlid", value, 16);
+    if (ret <= 0) {
+		condlog(0, "%s: failed to read cntlid value, "
+			"error %d",  pp->dev, -ret);
+		return 1;
+	}
+
+    ret = sscanf(value, "%d\n", &pp->nvme_id.cntl_id);
+	if (ret != 1) {
+		condlog(0, "%s: Cannot parse cntlid attribute", pp->dev);
+		return 1;
+	}
 	snprintf(pp->vendor_id, SCSI_VENDOR_SIZE, "NVME");
 	snprintf(pp->product_id, SCSI_PRODUCT_SIZE, "%s", udev_device_get_sysattr_value(nvme, "model"));
 	snprintf(pp->serial, SERIAL_SIZE, "%s", udev_device_get_sysattr_value(nvme, "serial"));
 	snprintf(pp->rev, SCSI_REV_SIZE, "%s", udev_device_get_sysattr_value(nvme, "firmware_rev"));
 
-	condlog(3, "%s: vendor:%s product:%s serial:%s rev:%s", pp->dev,
-		pp->vendor_id, pp->product_id, pp->serial, pp->rev);
+	condlog(3, "%s: ctrl id:%d,nsid:%d,vendor:%s product:%s serial:%s rev:%s", pp->dev,pp->nvme_id.cntl_id,
+		pp->nvme_id.nsid,pp->vendor_id, pp->product_id, pp->serial, pp->rev);
 	pp->hwe = find_hwe(hwtable, pp->vendor_id, pp->product_id, NULL);
 
 	return 0;
diff --git a/multipath-tools-HEAD-be1191b/libmultipath/prioritizers/Makefile b/multipath-tools-HEAD-be1191b/libmultipath/prioritizers/Makefile
index 36b42e4..395a65b 100644
--- a/multipath-tools-HEAD-be1191b/libmultipath/prioritizers/Makefile
+++ b/multipath-tools-HEAD-be1191b/libmultipath/prioritizers/Makefile
@@ -17,6 +17,7 @@ LIBS = \
 	libprioontap.so \
 	libpriorandom.so \
 	libpriordac.so \
+	libpriohuawei_ana.so \
 	libprioweightedpath.so \
 	libpriosysfs.so
 
diff --git a/multipath-tools-HEAD-be1191b/libmultipath/prioritizers/huawei_ana.c b/multipath-tools-HEAD-be1191b/libmultipath/prioritizers/huawei_ana.c
new file mode 100644
index 0000000..1d64da2
--- /dev/null
+++ b/multipath-tools-HEAD-be1191b/libmultipath/prioritizers/huawei_ana.c
@@ -0,0 +1,222 @@
+/*
+ * (C) Copyright HUAWEI Technology Corp. 2017   All Rights Reserved.
+ *
+ * huawei_ana.c
+ * Version 1.00
+ *
+ * Tool to make use of a NVMe-feature called HUAWEI Asymmetric Namespace Access.
+ * It determines the ANA state of a device and prints a priority value to stdout. 
+ *
+ * Author(s): Zou Ming <zouming.zouming at huawei.com>
+ *            Yang Feng <philip.yang at huawei.com>
+ *
+ * This file is released under the GPL.
+ */
+#include <stdio.h>
+#include <sys/ioctl.h>
+
+#include "debug.h"
+#include "prio.h"
+#include "structs.h"
+
+#include "huawei_ana.h"
+
+#define ANA_GETSUPPORT_FAILED       1
+#define ANA_NOT_SUPPORTED           2
+#define ANA_GETNSG_FAILED           3
+#define ANA_GETAAS_FAILED           4
+#define ANA_NO_INFORMATION		    5
+
+#define ANA_SUPPORT            0
+#define ANA_NOT_SUPPORT        1
+
+#define NVME_ANA_LOG_PAGE 0xc0 
+
+#define NVME_SUPPORT_ANA (1 << 3)
+
+#define min(x, y) ((x) > (y) ? (y) : (x))
+
+static const char * anas_string[] = {
+	[ANAS_OPTIMIZED]		= "active/optimized",
+	[ANAS_NON_OPTIMIZED]	= "active/non-optimized",
+	[ANAS_INAVAILABLE]	    = "inaccessible",
+    [ANAS_TRANSITIONING]    = "transitioning between states",
+	[ANAS_RESERVED]		    = "ARRAY BUG: invalid namespace group state!",
+};
+
+static const char *aas_print_string(int rc)
+{
+    rc &= 0x7f;
+    
+    if (rc > ANAS_TRANSITIONING)
+        return anas_string[ANAS_RESERVED];
+
+    return anas_string[rc];
+}
+
+static int nvme_submit_admin_passthru(int fd, struct nvme_passthru_cmd *cmd)
+{
+	return ioctl(fd, NVME_IOCTL_ADMIN_CMD, cmd);
+}
+
+
+int nvme_get_log(int fd, __u32 nsid, __u8 log_id, __u32 data_len, void *data)
+{
+	struct nvme_admin_cmd cmd = {
+		.opcode		= nvme_admin_get_log_page,
+		.nsid		= nsid,
+		.addr		= (__u64)(uintptr_t) data,
+		.data_len	= data_len,
+	};
+	__u32 numd = (data_len >> 2) - 1;
+	__u16 numdu = numd >> 16, numdl = numd & 0xffff;
+
+	cmd.cdw10 = log_id | (numdl << 16);
+	cmd.cdw11 = numdu;
+
+	return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+
+int nvme_identify(int fd, __u32 nsid, __u32 cdw10, void *data)
+{
+	struct nvme_admin_cmd cmd = {
+		.opcode		= nvme_admin_identify,
+		.nsid		= nsid,
+		.addr		= (__u64)(uintptr_t) data,
+		.data_len	= 0x1000,
+		.cdw10		= cdw10,
+	};
+
+	return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+static int get_ana_support(struct path * pp)
+{
+    int rc;
+    struct nvme_id_ctrl ctrl;
+    
+    rc = nvme_identify(pp->fd, 0, 1, &ctrl);
+    if (rc)
+        return -ANA_GETSUPPORT_FAILED;
+    
+    if(ctrl.cmic & NVME_SUPPORT_ANA) {
+        return ANA_SUPPORT;
+    }
+    
+    return ANA_NOT_SUPPORT;
+}
+
+static int get_namespace_group(struct path * pp, __u32 *nsg)
+{
+    int rc;
+    struct nvme_id_ns ns;
+
+    rc = nvme_identify(pp->fd, pp->nvme_id.nsid, 0, &ns);
+    if (rc)
+        return -ANA_GETNSG_FAILED;
+    
+    *nsg = le32_to_cpu(ns.nsg);
+    return 0;
+}
+
+static int get_asymmetric_access_state(int fd, __u32 nsg)
+{
+    int rc;
+    struct nvme_ana_log ana_log;
+    int i,nsg_num;
+    int nsg_size;
+    struct namespace_group_desc *nsgd;
+
+    rc = nvme_get_log(fd, 0xffffffff, NVME_ANA_LOG_PAGE, sizeof(struct nvme_ana_log), &ana_log);
+    if (rc)
+        return -ANA_GETAAS_FAILED;
+
+    nsg_size = ana_log.nsgdsz;
+    if (nsg_size < sizeof(struct namespace_group_desc)) {
+        condlog(3, "get namespace group desc num equal %d", nsg_size);
+        return -ANA_GETAAS_FAILED;
+    }
+    nsg_num = min(le16_to_cpu(ana_log.nsgdn), (ANA_LOG_LEN - ANA_LOG_HEAD)/nsg_size);
+       
+    for (i = 0; i < nsg_num; i++) {
+        nsgd = ( struct namespace_group_desc *) (ana_log.nsgd + i*nsg_size);
+        if (nsg == le32_to_cpu(nsgd->nsgid))
+            return nsgd->anas;
+    }
+
+    return -ANA_GETAAS_FAILED;
+}
+
+int get_ana_info(struct path * pp, unsigned int timeout)
+{
+	int	rc;
+	__u32 nsg;
+
+    rc = get_ana_support(pp);
+    if (rc < 0)
+			return -ANA_GETSUPPORT_FAILED;
+	if (rc != ANA_SUPPORT)
+		return -ANA_NOT_SUPPORTED;
+
+	rc = get_namespace_group(pp, &nsg);
+	if (rc < 0) {
+		return -ANA_GETNSG_FAILED;
+	}
+    
+	condlog(3, "%s: reported namespace group is %u", pp->dev, nsg);
+	rc = get_asymmetric_access_state(pp->fd, nsg);
+	if (rc < 0)
+		return -ANA_GETAAS_FAILED;
+
+	condlog(3, "%s: aas = %02x [%s]", pp->dev, rc, aas_print_string(rc));
+    
+	return rc;
+}
+
+
+int getprio (struct path * pp, char * args, unsigned int timeout)
+{
+	int rc;
+	int aas;
+
+	if (pp->fd < 0)
+		return -ANA_NO_INFORMATION;
+
+	rc = get_ana_info(pp, timeout);
+	if (rc >= 0) {
+		aas = (rc & 0x0f);
+		switch(aas) {
+		case ANAS_OPTIMIZED:
+			rc = 50;
+			break;
+		case ANAS_NON_OPTIMIZED:
+			rc = 10;
+			break;
+		case ANAS_TRANSITIONING:
+			rc = 5;
+			break;
+		case ANAS_INAVAILABLE:
+			rc = 1;
+			break;
+		default:
+			rc = 0;
+		}
+	} else {
+		switch(-rc) {
+		case ANA_NOT_SUPPORTED:
+			condlog(0, "%s: ana not supported", pp->dev);
+			break;
+		case ANA_GETSUPPORT_FAILED:
+			condlog(0, "%s: couldn't get support ana", pp->dev);
+			break;
+		case ANA_GETNSG_FAILED:
+			condlog(0, "%s: couldn't get namespace group", pp->dev);
+			break;
+		case ANA_GETAAS_FAILED:
+			condlog(3, "%s: couldn't get  ana states", pp->dev);
+			break;
+		}
+	}
+	return rc;
+}
diff --git a/multipath-tools-HEAD-be1191b/libmultipath/prioritizers/huawei_ana.h b/multipath-tools-HEAD-be1191b/libmultipath/prioritizers/huawei_ana.h
new file mode 100644
index 0000000..c66d5d5
--- /dev/null
+++ b/multipath-tools-HEAD-be1191b/libmultipath/prioritizers/huawei_ana.h
@@ -0,0 +1,191 @@
+#ifndef _ANA_HUAWEI_H
+#define _ANA_HUAWEI_H 
+
+#include <linux/types.h>
+
+#define ANAS_OPTIMIZED			0x01
+#define ANAS_NON_OPTIMIZED		0x02
+#define ANAS_INAVAILABLE        0x03
+#define ANAS_TRANSITIONING		0x04
+#define ANAS_RESERVED			0x05
+
+#define nvme_admin_get_log_page 0x02
+#define nvme_admin_identify		0x06
+
+#ifdef __CHECKER__
+#define __force       __attribute__((force))
+#else
+#define __force
+#endif
+
+#define le16_to_cpu(x) \
+	le16toh((__force __u16)(x))
+#define le32_to_cpu(x) \
+	le32toh((__force __u32)(x))
+#define le64_to_cpu(x) \
+	le64toh((__force __u64)(x))
+
+struct nvme_passthru_cmd {
+	__u8	opcode;
+	__u8	flags;
+	__u16	rsvd1;
+	__u32	nsid;
+	__u32	cdw2;
+	__u32	cdw3;
+	__u64	metadata;
+	__u64	addr;
+	__u32	metadata_len;
+	__u32	data_len;
+	__u32	cdw10;
+	__u32	cdw11;
+	__u32	cdw12;
+	__u32	cdw13;
+	__u32	cdw14;
+	__u32	cdw15;
+	__u32	timeout_ms;
+	__u32	result;
+};
+
+struct nvme_id_power_state {
+	__le16			max_power;	/* centiwatts */
+	__u8			rsvd2;
+	__u8			flags;
+	__le32			entry_lat;	/* microseconds */
+	__le32			exit_lat;	/* microseconds */
+	__u8			read_tput;
+	__u8			read_lat;
+	__u8			write_tput;
+	__u8			write_lat;
+	__le16			idle_power;
+	__u8			idle_scale;
+	__u8			rsvd19;
+	__le16			active_power;
+	__u8			active_work_scale;
+	__u8			rsvd23[9];
+};
+
+struct nvme_lbaf {
+	__le16			ms;
+	__u8			ds;
+	__u8			rp;
+};
+
+struct nvme_id_ctrl {
+	__le16			vid;
+	__le16			ssvid;
+	char			sn[20];
+	char			mn[40];
+	char			fr[8];
+	__u8			rab;
+	__u8			ieee[3];
+	__u8			cmic;
+	__u8			mdts;
+	__le16			cntlid;
+	__le32			ver;
+	__le32			rtd3r;
+	__le32			rtd3e;
+	__le32			oaes;
+	__le32			ctratt;
+	__u8			rsvd100[156];
+	__le16			oacs;
+	__u8			acl;
+	__u8			aerl;
+	__u8			frmw;
+	__u8			lpa;
+	__u8			elpe;
+	__u8			npss;
+	__u8			avscc;
+	__u8			apsta;
+	__le16			wctemp;
+	__le16			cctemp;
+	__le16			mtfa;
+	__le32			hmpre;
+	__le32			hmmin;
+	__u8			tnvmcap[16];
+	__u8			unvmcap[16];
+	__le32			rpmbs;
+	__u8			rsvd316[4];
+	__le16			kas;
+	__u8			rsvd322[190];
+	__u8			sqes;
+	__u8			cqes;
+	__le16			maxcmd;
+	__le32			nn;
+	__le16			oncs;
+	__le16			fuses;
+	__u8			fna;
+	__u8			vwc;
+	__le16			awun;
+	__le16			awupf;
+	__u8			nvscc;
+	__u8			rsvd531;
+	__le16			acwu;
+	__u8			rsvd534[2];
+	__le32			sgls;
+	__u8			rsvd540[228];
+	char			subnqn[256];
+	__u8			rsvd1024[768];
+	__le32			ioccsz;
+	__le32			iorcsz;
+	__le16			icdoff;
+	__u8			ctrattr;
+	__u8			msdbd;
+	__u8			rsvd1804[244];
+	struct nvme_id_power_state	psd[32];
+	__u8			vs[1024];
+};
+
+struct nvme_id_ns {
+	__le64			nsze;
+	__le64			ncap;
+	__le64			nuse;
+	__u8			nsfeat;
+	__u8			nlbaf;
+	__u8			flbas;
+	__u8			mc;
+	__u8			dpc;
+	__u8			dps;
+	__u8			nmic;
+	__u8			rescap;
+	__u8			fpi;
+	__u8			rsvd33;
+	__le16			nawun;
+	__le16			nawupf;
+	__le16			nacwu;
+	__le16			nabsn;
+	__le16			nabo;
+	__le16			nabspf;
+	__u16			rsvd46;
+	__u8			nvmcap[16];
+	__u8			rsvd64[40];
+	__u8			nguid[16];
+	__u8			eui64[8];
+	struct nvme_lbaf	lbaf[16];
+    __le32          nsg;
+	__u8			rsvd188[188];
+	__u8			vs[3712];
+};
+
+#define nvme_admin_cmd nvme_passthru_cmd
+
+#define NVME_IOCTL_ID		    _IO('N', 0x40)
+#define NVME_IOCTL_ADMIN_CMD	_IOWR('N', 0x41, struct nvme_admin_cmd)
+#define NVME_IOCTL_IO_CMD	    _IOWR('N', 0x43, struct nvme_passthru_cmd)
+
+#define ANA_LOG_LEN 4096
+#define ANA_LOG_HEAD 16
+
+struct nvme_ana_log {
+    __le64 change_count;
+    __le16 nsgdn;
+    __u8   nsgdsz;
+    __u8   trans_time;
+    __u8   rsvd4[4];
+    __u8   nsgd[ANA_LOG_LEN - ANA_LOG_HEAD];
+};
+struct namespace_group_desc {    
+    __u32 nsgid;    
+    __u8  anas;/*0x01->Optimized, 0x02->Non-Optimized, 0x03->Inaccessible, 0x4->Transitioning*/
+};
+
+#endif
diff --git a/multipath-tools-HEAD-be1191b/libmultipath/structs.h b/multipath-tools-HEAD-be1191b/libmultipath/structs.h
index 98e13e4..d1a7721 100644
--- a/multipath-tools-HEAD-be1191b/libmultipath/structs.h
+++ b/multipath-tools-HEAD-be1191b/libmultipath/structs.h
@@ -182,6 +182,11 @@ struct sg_id {
 	int transport_id;
 };
 
+struct nvme_id
+{
+  int cntl_id;
+  unsigned int nsid;
+};
 # ifndef HDIO_GETGEO
 #  define HDIO_GETGEO	0x0301	/* get device geometry */
 
@@ -198,6 +203,7 @@ struct path {
 	char dev_t[BLK_DEV_SIZE];
 	struct udev_device *udev;
 	struct sg_id sg_id;
+    struct nvme_id nvme_id;
 	struct hd_geometry geom;
 	char wwid[WWID_SIZE];
 	char vendor_id[SCSI_VENDOR_SIZE];
-- 
2.6.4.windows.1





More information about the Linux-nvme mailing list