[PATCH] multipath-tools: Add Huawei prioritizer for nvme
Zou Ming
zouming.zouming at huawei.com
Fri Apr 21 03:27:58 PDT 2017
Tool to make use of a NVMe-feature called HUAWEI Asymmetric Namespace Access.
It determines the ANA state of a device and prints a priority value to stdout.
---
.../libmultipath/discovery.c | 31 ++-
.../libmultipath/prioritizers/Makefile | 1 +
.../libmultipath/prioritizers/huawei_ana.c | 222 +++++++++++++++++++++
.../libmultipath/prioritizers/huawei_ana.h | 191 ++++++++++++++++++
.../libmultipath/structs.h | 6 +
5 files changed, 449 insertions(+), 2 deletions(-)
create mode 100644 multipath-tools-HEAD-be1191b/libmultipath/prioritizers/huawei_ana.c
create mode 100644 multipath-tools-HEAD-be1191b/libmultipath/prioritizers/huawei_ana.h
diff --git a/multipath-tools-HEAD-be1191b/libmultipath/discovery.c b/multipath-tools-HEAD-be1191b/libmultipath/discovery.c
index 663c8ea..6d5acab 100644
--- a/multipath-tools-HEAD-be1191b/libmultipath/discovery.c
+++ b/multipath-tools-HEAD-be1191b/libmultipath/discovery.c
@@ -1195,6 +1195,8 @@ static int
nvme_sysfs_pathinfo (struct path * pp, vector hwtable)
{
struct udev_device *parent, *nvme = NULL;
+ char value[16];
+ int ret;
parent = pp->udev;
while (parent) {
@@ -1209,13 +1211,38 @@ nvme_sysfs_pathinfo (struct path * pp, vector hwtable)
if (!nvme)
return 1;
+ ret = sysfs_attr_get_value(pp->udev, "nsid", value, 16);
+ if (ret <= 0) {
+ condlog(0, "%s: failed to read nsid value, "
+ "error %d", pp->dev, -ret);
+ return 1;
+ }
+
+ ret = sscanf(value, "%u\n", &pp->nvme_id.nsid);
+ if (ret != 1) {
+ condlog(0, "%s: Cannot parse nsid attribute", pp->dev);
+ return 1;
+ }
+
+ ret = sysfs_attr_get_value(nvme, "cntlid", value, 16);
+ if (ret <= 0) {
+ condlog(0, "%s: failed to read cntlid value, "
+ "error %d", pp->dev, -ret);
+ return 1;
+ }
+
+ ret = sscanf(value, "%d\n", &pp->nvme_id.cntl_id);
+ if (ret != 1) {
+ condlog(0, "%s: Cannot parse cntlid attribute", pp->dev);
+ return 1;
+ }
snprintf(pp->vendor_id, SCSI_VENDOR_SIZE, "NVME");
snprintf(pp->product_id, SCSI_PRODUCT_SIZE, "%s", udev_device_get_sysattr_value(nvme, "model"));
snprintf(pp->serial, SERIAL_SIZE, "%s", udev_device_get_sysattr_value(nvme, "serial"));
snprintf(pp->rev, SCSI_REV_SIZE, "%s", udev_device_get_sysattr_value(nvme, "firmware_rev"));
- condlog(3, "%s: vendor:%s product:%s serial:%s rev:%s", pp->dev,
- pp->vendor_id, pp->product_id, pp->serial, pp->rev);
+ condlog(3, "%s: ctrl id:%d,nsid:%d,vendor:%s product:%s serial:%s rev:%s", pp->dev,pp->nvme_id.cntl_id,
+ pp->nvme_id.nsid,pp->vendor_id, pp->product_id, pp->serial, pp->rev);
pp->hwe = find_hwe(hwtable, pp->vendor_id, pp->product_id, NULL);
return 0;
diff --git a/multipath-tools-HEAD-be1191b/libmultipath/prioritizers/Makefile b/multipath-tools-HEAD-be1191b/libmultipath/prioritizers/Makefile
index 36b42e4..395a65b 100644
--- a/multipath-tools-HEAD-be1191b/libmultipath/prioritizers/Makefile
+++ b/multipath-tools-HEAD-be1191b/libmultipath/prioritizers/Makefile
@@ -17,6 +17,7 @@ LIBS = \
libprioontap.so \
libpriorandom.so \
libpriordac.so \
+ libpriohuawei_ana.so \
libprioweightedpath.so \
libpriosysfs.so
diff --git a/multipath-tools-HEAD-be1191b/libmultipath/prioritizers/huawei_ana.c b/multipath-tools-HEAD-be1191b/libmultipath/prioritizers/huawei_ana.c
new file mode 100644
index 0000000..1d64da2
--- /dev/null
+++ b/multipath-tools-HEAD-be1191b/libmultipath/prioritizers/huawei_ana.c
@@ -0,0 +1,222 @@
+/*
+ * (C) Copyright HUAWEI Technology Corp. 2017 All Rights Reserved.
+ *
+ * huawei_ana.c
+ * Version 1.00
+ *
+ * Tool to make use of a NVMe-feature called HUAWEI Asymmetric Namespace Access.
+ * It determines the ANA state of a device and prints a priority value to stdout.
+ *
+ * Author(s): Zou Ming <zouming.zouming at huawei.com>
+ * Yang Feng <philip.yang at huawei.com>
+ *
+ * This file is released under the GPL.
+ */
+#include <stdio.h>
+#include <sys/ioctl.h>
+
+#include "debug.h"
+#include "prio.h"
+#include "structs.h"
+
+#include "huawei_ana.h"
+
+#define ANA_GETSUPPORT_FAILED 1
+#define ANA_NOT_SUPPORTED 2
+#define ANA_GETNSG_FAILED 3
+#define ANA_GETAAS_FAILED 4
+#define ANA_NO_INFORMATION 5
+
+#define ANA_SUPPORT 0
+#define ANA_NOT_SUPPORT 1
+
+#define NVME_ANA_LOG_PAGE 0xc0
+
+#define NVME_SUPPORT_ANA (1 << 3)
+
+#define min(x, y) ((x) > (y) ? (y) : (x))
+
+static const char * anas_string[] = {
+ [ANAS_OPTIMIZED] = "active/optimized",
+ [ANAS_NON_OPTIMIZED] = "active/non-optimized",
+ [ANAS_INAVAILABLE] = "inaccessible",
+ [ANAS_TRANSITIONING] = "transitioning between states",
+ [ANAS_RESERVED] = "ARRAY BUG: invalid namespace group state!",
+};
+
+static const char *aas_print_string(int rc)
+{
+ rc &= 0x7f;
+
+ if (rc > ANAS_TRANSITIONING)
+ return anas_string[ANAS_RESERVED];
+
+ return anas_string[rc];
+}
+
+static int nvme_submit_admin_passthru(int fd, struct nvme_passthru_cmd *cmd)
+{
+ return ioctl(fd, NVME_IOCTL_ADMIN_CMD, cmd);
+}
+
+
+int nvme_get_log(int fd, __u32 nsid, __u8 log_id, __u32 data_len, void *data)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_get_log_page,
+ .nsid = nsid,
+ .addr = (__u64)(uintptr_t) data,
+ .data_len = data_len,
+ };
+ __u32 numd = (data_len >> 2) - 1;
+ __u16 numdu = numd >> 16, numdl = numd & 0xffff;
+
+ cmd.cdw10 = log_id | (numdl << 16);
+ cmd.cdw11 = numdu;
+
+ return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+
+int nvme_identify(int fd, __u32 nsid, __u32 cdw10, void *data)
+{
+ struct nvme_admin_cmd cmd = {
+ .opcode = nvme_admin_identify,
+ .nsid = nsid,
+ .addr = (__u64)(uintptr_t) data,
+ .data_len = 0x1000,
+ .cdw10 = cdw10,
+ };
+
+ return nvme_submit_admin_passthru(fd, &cmd);
+}
+
+static int get_ana_support(struct path * pp)
+{
+ int rc;
+ struct nvme_id_ctrl ctrl;
+
+ rc = nvme_identify(pp->fd, 0, 1, &ctrl);
+ if (rc)
+ return -ANA_GETSUPPORT_FAILED;
+
+ if(ctrl.cmic & NVME_SUPPORT_ANA) {
+ return ANA_SUPPORT;
+ }
+
+ return ANA_NOT_SUPPORT;
+}
+
+static int get_namespace_group(struct path * pp, __u32 *nsg)
+{
+ int rc;
+ struct nvme_id_ns ns;
+
+ rc = nvme_identify(pp->fd, pp->nvme_id.nsid, 0, &ns);
+ if (rc)
+ return -ANA_GETNSG_FAILED;
+
+ *nsg = le32_to_cpu(ns.nsg);
+ return 0;
+}
+
+static int get_asymmetric_access_state(int fd, __u32 nsg)
+{
+ int rc;
+ struct nvme_ana_log ana_log;
+ int i,nsg_num;
+ int nsg_size;
+ struct namespace_group_desc *nsgd;
+
+ rc = nvme_get_log(fd, 0xffffffff, NVME_ANA_LOG_PAGE, sizeof(struct nvme_ana_log), &ana_log);
+ if (rc)
+ return -ANA_GETAAS_FAILED;
+
+ nsg_size = ana_log.nsgdsz;
+ if (nsg_size < sizeof(struct namespace_group_desc)) {
+ condlog(3, "get namespace group desc num equal %d", nsg_size);
+ return -ANA_GETAAS_FAILED;
+ }
+ nsg_num = min(le16_to_cpu(ana_log.nsgdn), (ANA_LOG_LEN - ANA_LOG_HEAD)/nsg_size);
+
+ for (i = 0; i < nsg_num; i++) {
+ nsgd = ( struct namespace_group_desc *) (ana_log.nsgd + i*nsg_size);
+ if (nsg == le32_to_cpu(nsgd->nsgid))
+ return nsgd->anas;
+ }
+
+ return -ANA_GETAAS_FAILED;
+}
+
+int get_ana_info(struct path * pp, unsigned int timeout)
+{
+ int rc;
+ __u32 nsg;
+
+ rc = get_ana_support(pp);
+ if (rc < 0)
+ return -ANA_GETSUPPORT_FAILED;
+ if (rc != ANA_SUPPORT)
+ return -ANA_NOT_SUPPORTED;
+
+ rc = get_namespace_group(pp, &nsg);
+ if (rc < 0) {
+ return -ANA_GETNSG_FAILED;
+ }
+
+ condlog(3, "%s: reported namespace group is %u", pp->dev, nsg);
+ rc = get_asymmetric_access_state(pp->fd, nsg);
+ if (rc < 0)
+ return -ANA_GETAAS_FAILED;
+
+ condlog(3, "%s: aas = %02x [%s]", pp->dev, rc, aas_print_string(rc));
+
+ return rc;
+}
+
+
+int getprio (struct path * pp, char * args, unsigned int timeout)
+{
+ int rc;
+ int aas;
+
+ if (pp->fd < 0)
+ return -ANA_NO_INFORMATION;
+
+ rc = get_ana_info(pp, timeout);
+ if (rc >= 0) {
+ aas = (rc & 0x0f);
+ switch(aas) {
+ case ANAS_OPTIMIZED:
+ rc = 50;
+ break;
+ case ANAS_NON_OPTIMIZED:
+ rc = 10;
+ break;
+ case ANAS_TRANSITIONING:
+ rc = 5;
+ break;
+ case ANAS_INAVAILABLE:
+ rc = 1;
+ break;
+ default:
+ rc = 0;
+ }
+ } else {
+ switch(-rc) {
+ case ANA_NOT_SUPPORTED:
+ condlog(0, "%s: ana not supported", pp->dev);
+ break;
+ case ANA_GETSUPPORT_FAILED:
+ condlog(0, "%s: couldn't get support ana", pp->dev);
+ break;
+ case ANA_GETNSG_FAILED:
+ condlog(0, "%s: couldn't get namespace group", pp->dev);
+ break;
+ case ANA_GETAAS_FAILED:
+ condlog(3, "%s: couldn't get ana states", pp->dev);
+ break;
+ }
+ }
+ return rc;
+}
diff --git a/multipath-tools-HEAD-be1191b/libmultipath/prioritizers/huawei_ana.h b/multipath-tools-HEAD-be1191b/libmultipath/prioritizers/huawei_ana.h
new file mode 100644
index 0000000..c66d5d5
--- /dev/null
+++ b/multipath-tools-HEAD-be1191b/libmultipath/prioritizers/huawei_ana.h
@@ -0,0 +1,191 @@
+#ifndef _ANA_HUAWEI_H
+#define _ANA_HUAWEI_H
+
+#include <linux/types.h>
+
+#define ANAS_OPTIMIZED 0x01
+#define ANAS_NON_OPTIMIZED 0x02
+#define ANAS_INAVAILABLE 0x03
+#define ANAS_TRANSITIONING 0x04
+#define ANAS_RESERVED 0x05
+
+#define nvme_admin_get_log_page 0x02
+#define nvme_admin_identify 0x06
+
+#ifdef __CHECKER__
+#define __force __attribute__((force))
+#else
+#define __force
+#endif
+
+#define le16_to_cpu(x) \
+ le16toh((__force __u16)(x))
+#define le32_to_cpu(x) \
+ le32toh((__force __u32)(x))
+#define le64_to_cpu(x) \
+ le64toh((__force __u64)(x))
+
+struct nvme_passthru_cmd {
+ __u8 opcode;
+ __u8 flags;
+ __u16 rsvd1;
+ __u32 nsid;
+ __u32 cdw2;
+ __u32 cdw3;
+ __u64 metadata;
+ __u64 addr;
+ __u32 metadata_len;
+ __u32 data_len;
+ __u32 cdw10;
+ __u32 cdw11;
+ __u32 cdw12;
+ __u32 cdw13;
+ __u32 cdw14;
+ __u32 cdw15;
+ __u32 timeout_ms;
+ __u32 result;
+};
+
+struct nvme_id_power_state {
+ __le16 max_power; /* centiwatts */
+ __u8 rsvd2;
+ __u8 flags;
+ __le32 entry_lat; /* microseconds */
+ __le32 exit_lat; /* microseconds */
+ __u8 read_tput;
+ __u8 read_lat;
+ __u8 write_tput;
+ __u8 write_lat;
+ __le16 idle_power;
+ __u8 idle_scale;
+ __u8 rsvd19;
+ __le16 active_power;
+ __u8 active_work_scale;
+ __u8 rsvd23[9];
+};
+
+struct nvme_lbaf {
+ __le16 ms;
+ __u8 ds;
+ __u8 rp;
+};
+
+struct nvme_id_ctrl {
+ __le16 vid;
+ __le16 ssvid;
+ char sn[20];
+ char mn[40];
+ char fr[8];
+ __u8 rab;
+ __u8 ieee[3];
+ __u8 cmic;
+ __u8 mdts;
+ __le16 cntlid;
+ __le32 ver;
+ __le32 rtd3r;
+ __le32 rtd3e;
+ __le32 oaes;
+ __le32 ctratt;
+ __u8 rsvd100[156];
+ __le16 oacs;
+ __u8 acl;
+ __u8 aerl;
+ __u8 frmw;
+ __u8 lpa;
+ __u8 elpe;
+ __u8 npss;
+ __u8 avscc;
+ __u8 apsta;
+ __le16 wctemp;
+ __le16 cctemp;
+ __le16 mtfa;
+ __le32 hmpre;
+ __le32 hmmin;
+ __u8 tnvmcap[16];
+ __u8 unvmcap[16];
+ __le32 rpmbs;
+ __u8 rsvd316[4];
+ __le16 kas;
+ __u8 rsvd322[190];
+ __u8 sqes;
+ __u8 cqes;
+ __le16 maxcmd;
+ __le32 nn;
+ __le16 oncs;
+ __le16 fuses;
+ __u8 fna;
+ __u8 vwc;
+ __le16 awun;
+ __le16 awupf;
+ __u8 nvscc;
+ __u8 rsvd531;
+ __le16 acwu;
+ __u8 rsvd534[2];
+ __le32 sgls;
+ __u8 rsvd540[228];
+ char subnqn[256];
+ __u8 rsvd1024[768];
+ __le32 ioccsz;
+ __le32 iorcsz;
+ __le16 icdoff;
+ __u8 ctrattr;
+ __u8 msdbd;
+ __u8 rsvd1804[244];
+ struct nvme_id_power_state psd[32];
+ __u8 vs[1024];
+};
+
+struct nvme_id_ns {
+ __le64 nsze;
+ __le64 ncap;
+ __le64 nuse;
+ __u8 nsfeat;
+ __u8 nlbaf;
+ __u8 flbas;
+ __u8 mc;
+ __u8 dpc;
+ __u8 dps;
+ __u8 nmic;
+ __u8 rescap;
+ __u8 fpi;
+ __u8 rsvd33;
+ __le16 nawun;
+ __le16 nawupf;
+ __le16 nacwu;
+ __le16 nabsn;
+ __le16 nabo;
+ __le16 nabspf;
+ __u16 rsvd46;
+ __u8 nvmcap[16];
+ __u8 rsvd64[40];
+ __u8 nguid[16];
+ __u8 eui64[8];
+ struct nvme_lbaf lbaf[16];
+ __le32 nsg;
+ __u8 rsvd188[188];
+ __u8 vs[3712];
+};
+
+#define nvme_admin_cmd nvme_passthru_cmd
+
+#define NVME_IOCTL_ID _IO('N', 0x40)
+#define NVME_IOCTL_ADMIN_CMD _IOWR('N', 0x41, struct nvme_admin_cmd)
+#define NVME_IOCTL_IO_CMD _IOWR('N', 0x43, struct nvme_passthru_cmd)
+
+#define ANA_LOG_LEN 4096
+#define ANA_LOG_HEAD 16
+
+struct nvme_ana_log {
+ __le64 change_count;
+ __le16 nsgdn;
+ __u8 nsgdsz;
+ __u8 trans_time;
+ __u8 rsvd4[4];
+ __u8 nsgd[ANA_LOG_LEN - ANA_LOG_HEAD];
+};
+struct namespace_group_desc {
+ __u32 nsgid;
+ __u8 anas;/*0x01->Optimized, 0x02->Non-Optimized, 0x03->Inaccessible, 0x4->Transitioning*/
+};
+
+#endif
diff --git a/multipath-tools-HEAD-be1191b/libmultipath/structs.h b/multipath-tools-HEAD-be1191b/libmultipath/structs.h
index 98e13e4..d1a7721 100644
--- a/multipath-tools-HEAD-be1191b/libmultipath/structs.h
+++ b/multipath-tools-HEAD-be1191b/libmultipath/structs.h
@@ -182,6 +182,11 @@ struct sg_id {
int transport_id;
};
+struct nvme_id
+{
+ int cntl_id;
+ unsigned int nsid;
+};
# ifndef HDIO_GETGEO
# define HDIO_GETGEO 0x0301 /* get device geometry */
@@ -198,6 +203,7 @@ struct path {
char dev_t[BLK_DEV_SIZE];
struct udev_device *udev;
struct sg_id sg_id;
+ struct nvme_id nvme_id;
struct hd_geometry geom;
char wwid[WWID_SIZE];
char vendor_id[SCSI_VENDOR_SIZE];
--
2.6.4.windows.1
More information about the Linux-nvme
mailing list