[PATCH 1/2] NVMe-CLI : WDC-Plugin Updated Smart Log Commands
jeffreyalien
jeff.lien at wdc.com
Wed Nov 8 08:49:47 PST 2017
From: Jeff Lien <jlien at ddtest-jeff.hgst.com>
Renamed wdc smart-log-add command to vs-smart-add-log-c1 command.
This command is used to return and format the vendor specific data
from the 0xC1 smart log page.
Created the new command: vs-smart-add-log command; used to return
and format the vendor specific data from the 0xCA smart log page.
Signed-off-by: Jeff Lien <jlien at ddtest-jeff.hgst.com>
---
wdc-nvme.c | 301 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
wdc-nvme.h | 15 +--
2 files changed, 307 insertions(+), 9 deletions(-)
diff --git a/wdc-nvme.c b/wdc-nvme.c
index b981298..2ad085e 100644
--- a/wdc-nvme.c
+++ b/wdc-nvme.c
@@ -108,6 +108,15 @@
#define WDC_GET_LOG_PAGE_SSD_PERFORMANCE 0x37
#define WDC_NVME_GET_STAT_PERF_INTERVAL_LIFETIME 0x0F
+/* C2 Log Page */
+#define WDC_NVME_GET_AVAILABLE_LOG_PAGES_OPCODE 0xC2
+#define WDC_C2_LOG_BUF_LEN 0x1000
+#define WDC_C2_LOG_PAGES_SUPPORTED_ID 0x08
+
+/* CA Log Page */
+#define WDC_NVME_GET_DEVICE_INFO_LOG_OPCODE 0xCA
+#define WDC_CA_LOG_BUF_LEN 0x80
+
static int wdc_get_serial_name(int fd, char *file, size_t len, char *suffix);
static int wdc_create_log_file(char *file, __u8 *drive_log_data,
__u32 drive_log_length);
@@ -126,6 +135,7 @@ static int wdc_purge(int argc, char **argv,
struct command *command, struct plugin *plugin);
static int wdc_purge_monitor(int argc, char **argv,
struct command *command, struct plugin *plugin);
+static int wdc_nvme_check_supported_log_page(int fd, __u8 log_id);
/* Drive log data size */
struct wdc_log_size {
@@ -178,6 +188,47 @@ struct wdc_ssd_perf_stats {
__le64 nrbw; /* NAND Read Before Write */
};
+/* Additional C2 Log Page */
+struct wdc_c2_log_page_header {
+ __le32 length;
+ __le32 version;
+};
+
+struct wdc_c2_log_subpage_header {
+ __le32 length;
+ __le32 entry_id;
+ __le32 data;
+};
+
+struct wdc_c2_cbs_data {
+ __le32 length;
+ __u8 data[];
+};
+
+struct __attribute__((__packed__)) wdc_ssd_ca_perf_stats {
+ __le64 nand_bytes_wr_lo; /* 0x00 - NAND Bytes Written lo */
+ __le64 nand_bytes_wr_hi; /* 0x08 - NAND Bytes Written hi */
+ __le64 nand_bytes_rd_lo; /* 0x10 - NAND Bytes Read lo */
+ __le64 nand_bytes_rd_hi; /* 0x18 - NAND Bytes Read hi */
+ __le64 nand_bad_block; /* 0x20 - NAND Bad Block Count */
+ __le64 uncorr_read_count; /* 0x28 - Uncorrectable Read Count */
+ __le64 ecc_error_count; /* 0x30 - Soft ECC Error Count */
+ __le32 ssd_detect_count; /* 0x38 - SSD End to End Detection Count */
+ __le32 ssd_correct_count; /* 0x3C - SSD End to End Correction Count */
+ __le32 data_percent_used; /* 0x40 - System Data Percent Used */
+ __le32 data_erase_max; /* 0x44 - User Data Erase Counts */
+ __le32 data_erase_min; /* 0x48 - User Data Erase Counts */
+ __le64 refresh_count; /* 0x4c - Refresh Count */
+ __le64 program_fail; /* 0x54 - Program Fail Count */
+ __le64 user_erase_fail; /* 0x5C - User Data Erase Fail Count */
+ __le64 system_erase_fail; /* 0x64 - System Area Erase Fail Count */
+ __le16 thermal_throttle_status; /* 0x6C - Thermal Throttling Status */
+ __le16 thermal_throttle_count; /* 0x6E - Thermal Throttling Count */
+ __le64 pcie_corr_error; /* 0x70 - pcie Correctable Error Count */
+ __le32 rsvd1; /* 0x78 - Reserved */
+ __le32 rsvd2; /* 0x7C - Reserved */
+};
+
static double safe_div_fp(double numerator, double denominator)
{
return denominator ? numerator / denominator : 0;
@@ -280,6 +331,75 @@ static int wdc_create_log_file(char *file, __u8 *drive_log_data,
return 0;
}
+static int wdc_nvme_check_supported_log_page(int fd, __u8 log_id)
+{
+ int i;
+ int ret = -1;
+ int found = 0;
+ __u8* data;
+ __u32 length = 0;
+ struct wdc_c2_cbs_data *cbs_data;
+ struct wdc_c2_log_page_header *hdr_ptr;
+ struct wdc_c2_log_subpage_header *sph;
+
+ if ((data = (__u8*) malloc(sizeof (__u8) * WDC_C2_LOG_BUF_LEN)) == NULL) {
+ fprintf(stderr, "ERROR : WDC : malloc : %s\n", strerror(errno));
+ return ret;
+ }
+ memset(data, 0, sizeof (__u8) * WDC_C2_LOG_BUF_LEN);
+
+ /* get the log page length */
+ ret = nvme_get_log(fd, 0xFFFFFFFF, WDC_NVME_GET_AVAILABLE_LOG_PAGES_OPCODE, WDC_C2_LOG_BUF_LEN, data);
+ if (ret) {
+ fprintf(stderr, "ERROR : WDC : Unable to get C2 Log Page length, ret = %d\n", ret);
+ goto out;
+ }
+
+ hdr_ptr = (struct wdc_c2_log_page_header *)data;
+
+ if (hdr_ptr->length > WDC_C2_LOG_BUF_LEN) {
+ fprintf(stderr, "ERROR : WDC : data length > buffer size : 0x%x\n", hdr_ptr->length);
+ goto out;
+ }
+
+ ret = nvme_get_log(fd, 0xFFFFFFFF, WDC_NVME_GET_AVAILABLE_LOG_PAGES_OPCODE, hdr_ptr->length, data);
+ /* parse the data until the List of log page ID's is found */
+ if (ret) {
+ fprintf(stderr, "ERROR : WDC : Unable to read C2 Log Page data, ret = %d\n", ret);
+ goto out;
+ }
+
+ length = sizeof(struct wdc_c2_log_page_header);
+ while (length < hdr_ptr->length) {
+ sph = (struct wdc_c2_log_subpage_header *)(data + length);
+
+ if (sph->entry_id == WDC_C2_LOG_PAGES_SUPPORTED_ID) {
+ cbs_data = (struct wdc_c2_cbs_data *)&sph->data;
+
+ for (i = 0; i < cbs_data->length; i++) {
+ if (log_id == cbs_data->data[i]) {
+ found = 1;
+ ret = 0;
+ break;
+ }
+ }
+
+ if (!found) {
+ fprintf(stderr, "ERROR : WDC : Log Page 0x%x not supported\n", log_id);
+ fprintf(stderr, "WDC : Supported Log Pages:\n");
+ /* print the supported pages */
+ d((__u8 *)&sph->data + 4, sph->length - 12, 16, 1);
+ ret = -1;
+ }
+ break;
+ }
+ length += le32_to_cpu(sph->length);
+ }
+out:
+ free(data);
+ return ret;
+}
+
static int wdc_do_clear_dump(int fd, __u8 opcode, __u32 cdw12)
{
int ret;
@@ -837,7 +957,119 @@ static int wdc_print_log(struct wdc_ssd_perf_stats *perf, int fmt)
return 0;
}
-static int wdc_smart_log_add(int argc, char **argv, struct command *command,
+static void wdc_print_ca_log_normal(struct wdc_ssd_ca_perf_stats *perf)
+{
+ printf(" CA Log Page Performance Statistics :- \n");
+ printf(" NAND Bytes Written %20"PRIu64 "%20"PRIu64"\n",
+ (uint64_t)le64_to_cpu(perf->nand_bytes_wr_hi), (uint64_t)le64_to_cpu(perf->nand_bytes_wr_lo));
+ printf(" NAND Bytes Read %20"PRIu64 "%20"PRIu64"\n",
+ (uint64_t)le64_to_cpu(perf->nand_bytes_rd_hi), (uint64_t)le64_to_cpu(perf->nand_bytes_rd_lo));
+ printf(" NAND Bad Block Count (Normalized) %20"PRIu16"\n",
+ (uint16_t)le16_to_cpu(perf->nand_bad_block & 0x000000000000FFFF));
+ printf(" NAND Bad Block Count (Raw) %20"PRIu64"\n",
+ (uint64_t)le64_to_cpu(perf->nand_bad_block & 0xFFFFFFFFFFFF0000)>>16);
+ printf(" Uncorrectable Read Count %20"PRIu64"\n",
+ (uint64_t)le64_to_cpu(perf->uncorr_read_count));
+ printf(" Soft ECC Error Count %20"PRIu64"\n",
+ (uint64_t)le64_to_cpu(perf->ecc_error_count));
+ printf(" SSD End to End Detected Correction Count %20"PRIu32"\n",
+ (uint32_t)le32_to_cpu(perf->ssd_detect_count));
+ printf(" SSD End to End Corrected Correction Count %20"PRIu32"\n",
+ (uint32_t)le32_to_cpu(perf->ssd_correct_count));
+ printf(" System Data Percent Used %20"PRIu32"%%\n",
+ (uint32_t)le32_to_cpu(perf->data_percent_used));
+ printf(" User Data Erase Counts Max %20"PRIu32"\n",
+ (uint32_t)le32_to_cpu(perf->data_erase_max));
+ printf(" User Data Erase Counts Min %20"PRIu32"\n",
+ (uint32_t)le32_to_cpu(perf->data_erase_min));
+ printf(" Refresh Count %20"PRIu64"\n",
+ (uint64_t)le64_to_cpu(perf->refresh_count));
+ printf(" Program Fail Count (Normalized) %20"PRIu16"\n",
+ (uint16_t)le16_to_cpu(perf->program_fail & 0x000000000000FFFF));
+ printf(" Program Fail Count (Raw) %20"PRIu64"\n",
+ (uint64_t)le64_to_cpu(perf->program_fail & 0xFFFFFFFFFFFF0000)>>16);
+ printf(" User Data Erase Fail Count (Normalized) %20"PRIu16"\n",
+ (uint16_t)le16_to_cpu(perf->user_erase_fail & 0x000000000000FFFF));
+ printf(" User Data Erase Fail Count (Raw) %20"PRIu64"\n",
+ (uint64_t)le64_to_cpu(perf->user_erase_fail & 0xFFFFFFFFFFFF0000)>>16);
+ printf(" System Area Erase Fail Count (Normalized) %20"PRIu16"\n",
+ (uint16_t)le16_to_cpu(perf->system_erase_fail & 0x000000000000FFFF));
+ printf(" System Area Erase Fail Count (Raw) %20"PRIu64"\n",
+ (uint64_t)le64_to_cpu(perf->system_erase_fail & 0xFFFFFFFFFFFF0000)>>16);
+ printf(" Thermal Throttling Status %20"PRIu16"\n",
+ (uint16_t)le16_to_cpu(perf->thermal_throttle_status));
+ printf(" Thermal Throttling Count %20"PRIu16"\n",
+ (uint16_t)le16_to_cpu(perf->thermal_throttle_count));
+ printf(" PCIe Correctable Error Count %20"PRIu64"\n",
+ (uint64_t)le64_to_cpu(perf->pcie_corr_error));
+}
+
+static void wdc_print_ca_log_json(struct wdc_ssd_ca_perf_stats *perf)
+{
+ struct json_object *root;
+
+ root = json_create_object();
+ json_object_add_value_int(root, "NAND Bytes Written Hi", le64_to_cpu(perf->nand_bytes_wr_hi));
+ json_object_add_value_int(root, "NAND Bytes Written Lo", le64_to_cpu(perf->nand_bytes_wr_lo));
+ json_object_add_value_int(root, "NAND Bytes Read Hi", le64_to_cpu(perf->nand_bytes_rd_hi));
+ json_object_add_value_int(root, "NAND Bytes Read Lo", le64_to_cpu(perf->nand_bytes_rd_lo));
+ json_object_add_value_int(root, "NAND Bad Block Count (Normalized)",
+ le16_to_cpu(perf->nand_bad_block & 0x000000000000FFFF));
+ json_object_add_value_int(root, "NAND Bad Block Count (Raw)",
+ le64_to_cpu(perf->nand_bad_block & 0xFFFFFFFFFFFF0000)>>16);
+ json_object_add_value_int(root, "Uncorrectable Read Count", le64_to_cpu(perf->uncorr_read_count));
+ json_object_add_value_int(root, "Soft ECC Error Count", le64_to_cpu(perf->ecc_error_count));
+ json_object_add_value_int(root, "SSD End to End Detected Correction Count",
+ le32_to_cpu(perf->ssd_detect_count));
+ json_object_add_value_int(root, "SSD End to End Corrected Correction Count",
+ le32_to_cpu(perf->ssd_correct_count));
+ json_object_add_value_int(root, "System Data Percent Used",
+ le32_to_cpu(perf->data_percent_used));
+ json_object_add_value_int(root, "User Data Erase Counts Max",
+ le32_to_cpu(perf->data_erase_max));
+ json_object_add_value_int(root, "User Data Erase Counts Min",
+ le32_to_cpu(perf->data_erase_min));
+ json_object_add_value_int(root, "Refresh Count", le64_to_cpu(perf->refresh_count));
+ json_object_add_value_int(root, "Program Fail Count (Normalized)",
+ le16_to_cpu(perf->program_fail & 0x000000000000FFFF));
+ json_object_add_value_int(root, "Program Fail Count (Raw)",
+ le64_to_cpu(perf->program_fail & 0xFFFFFFFFFFFF0000)>>16);
+ json_object_add_value_int(root, "User Data Erase Fail Count (Normalized)",
+ le16_to_cpu(perf->user_erase_fail & 0x000000000000FFFF));
+ json_object_add_value_int(root, "User Data Erase Fail Count (Raw)",
+ le64_to_cpu(perf->user_erase_fail & 0xFFFFFFFFFFFF0000)>>16);
+ json_object_add_value_int(root, "System Area Erase Fail Count (Normalized)",
+ le16_to_cpu(perf->system_erase_fail & 0x000000000000FFFF));
+ json_object_add_value_int(root, "System Area Erase Fail Count (Raw)",
+ le64_to_cpu(perf->system_erase_fail & 0xFFFFFFFFFFFF0000)>>16);
+ json_object_add_value_int(root, "Thermal Throttling Status",
+ le16_to_cpu(perf->thermal_throttle_status));
+ json_object_add_value_int(root, "Thermal Throttling Count",
+ le16_to_cpu(perf->thermal_throttle_count));
+ json_object_add_value_int(root, "PCIe Correctable Error", le64_to_cpu(perf->pcie_corr_error));
+ json_print_object(root, NULL);
+ printf("\n");
+ json_free_object(root);
+}
+
+static int wdc_print_ca_log(struct wdc_ssd_ca_perf_stats *perf, int fmt)
+{
+ if (!perf) {
+ fprintf(stderr, "ERROR : WDC : Invalid buffer to read perf stats\n");
+ return -1;
+ }
+ switch (fmt) {
+ case NORMAL:
+ wdc_print_ca_log_normal(perf);
+ break;
+ case JSON:
+ wdc_print_ca_log_json(perf);
+ break;
+ }
+ return 0;
+}
+
+static int wdc_smart_add_log_c1(int argc, char **argv, struct command *command,
struct plugin *plugin)
{
char *desc = "Retrieve additional performance statistics.";
@@ -894,7 +1126,8 @@ static int wdc_smart_log_add(int argc, char **argv, struct command *command,
memset(data, 0, sizeof (__u8) * WDC_ADD_LOG_BUF_LEN);
ret = nvme_get_log(fd, 0x01, WDC_NVME_ADD_LOG_OPCODE, WDC_ADD_LOG_BUF_LEN, data);
- fprintf(stderr, "NVMe Status:%s(%x)\n", nvme_status_to_string(ret), ret);
+ if (strcmp(cfg.output_format, "json"))
+ fprintf(stderr, "NVMe Status:%s(%x)\n", nvme_status_to_string(ret), ret);
if (ret == 0) {
l = (struct wdc_log_page_header*)data;
total_subpages = l->num_subpages + WDC_NVME_GET_STAT_PERF_INTERVAL_LIFETIME - 1;
@@ -916,3 +1149,67 @@ static int wdc_smart_log_add(int argc, char **argv, struct command *command,
free(data);
return ret;
}
+
+static int wdc_smart_add_log(int argc, char **argv, struct command *command,
+ struct plugin *plugin)
+{
+ char *desc = "Retrieve additional performance statistics.";
+ __u8 *data;
+ int fd;
+ int ret = 0;
+ int fmt = -1;
+ struct wdc_ssd_ca_perf_stats *perf;
+
+ struct config {
+ int vendor_specific;
+ char *output_format;
+ };
+
+ struct config cfg = {
+ .output_format = "normal",
+ };
+
+ const struct argconfig_commandline_options command_line_options[] = {
+ {"output-format", 'o', "FMT", CFG_STRING, &cfg.output_format, required_argument, "Output Format: normal|json" },
+ {NULL}
+ };
+
+ fd = parse_and_open(argc, argv, desc, command_line_options, NULL, 0);
+ if (fd < 0)
+ return fd;
+
+ wdc_check_device(fd);
+ fmt = validate_output_format(cfg.output_format);
+ if (fmt < 0) {
+ fprintf(stderr, "ERROR : WDC : invalid output format\n");
+ return fmt;
+ }
+
+ /* verify the 0xCA log page is supported */
+ if (wdc_nvme_check_supported_log_page(fd, WDC_NVME_GET_DEVICE_INFO_LOG_OPCODE)) {
+ fprintf(stderr, "ERROR : WDC : 0xCA Log Page not supported\n");
+ return -1;
+ }
+
+ if ((data = (__u8*) malloc(sizeof (__u8) * WDC_CA_LOG_BUF_LEN)) == NULL) {
+ fprintf(stderr, "ERROR : WDC : malloc : %s\n", strerror(errno));
+ return -1;
+ }
+ memset(data, 0, sizeof (__u8) * WDC_CA_LOG_BUF_LEN);
+
+ ret = nvme_get_log(fd, 0xFFFFFFFF, WDC_NVME_GET_DEVICE_INFO_LOG_OPCODE, WDC_CA_LOG_BUF_LEN, data);
+ if (strcmp(cfg.output_format, "json"))
+ fprintf(stderr, "NVMe Status:%s(%x)\n", nvme_status_to_string(ret), ret);
+
+ if (ret == 0) {
+ /* parse the data */
+ perf = (struct wdc_ssd_ca_perf_stats *)(data);
+ ret = wdc_print_ca_log(perf, fmt);
+ } else {
+ fprintf(stderr, "ERROR : WDC : Unable to read CA Log Page data\n");
+ ret = -1;
+ }
+
+ free(data);
+ return ret;
+}
diff --git a/wdc-nvme.h b/wdc-nvme.h
index a3574c1..13496ae 100644
--- a/wdc-nvme.h
+++ b/wdc-nvme.h
@@ -8,13 +8,14 @@
PLUGIN(NAME("wdc", "Western Digital vendor specific extensions"),
COMMAND_LIST(
- ENTRY("cap-diag", "WDC Capture-Diagnostics", wdc_cap_diag)
- ENTRY("drive-log", "WDC Drive Log", wdc_drive_log)
- ENTRY("get-crash-dump", "WDC Crash Dump", wdc_get_crash_dump)
- ENTRY("id-ctrl", "WDC identify controller", wdc_id_ctrl)
- ENTRY("purge", "WDC Purge", wdc_purge)
- ENTRY("purge-monitor", "WDC Purge Monitor", wdc_purge_monitor)
- ENTRY("smart-log-add", "WDC Additional Smart Log", wdc_smart_log_add)
+ ENTRY("cap-diag", " WDC Capture-Diagnostics", wdc_cap_diag)
+ ENTRY("drive-log", " WDC Drive Log", wdc_drive_log)
+ ENTRY("get-crash-dump", " WDC Crash Dump", wdc_get_crash_dump)
+ ENTRY("id-ctrl", " WDC identify controller", wdc_id_ctrl)
+ ENTRY("purge", " WDC Purge", wdc_purge)
+ ENTRY("purge-monitor", " WDC Purge Monitor", wdc_purge_monitor)
+ ENTRY("vs-smart-add-log", " WDC Additional Smart Log", wdc_smart_add_log)
+ ENTRY("vs-smart-add-log-c1", " WDC Additional Smart Log for C1 Log page", wdc_smart_add_log_c1)
)
);
--
2.14.2.746.g8fb8a94
More information about the Linux-nvme
mailing list