[RFC v2 2/3] nvme: add error logging opt-in

alan.adamson at oracle.com alan.adamson at oracle.com
Tue Mar 28 13:45:48 PDT 2023


On 3/27/23 5:57 PM, Christoph Hellwig wrote:
> On Thu, Mar 23, 2023 at 04:03:15PM -0700, Alan Adamson wrote:
>> Commit d7ac8dca938c ("nvme: quiet user passthrough command errors") disabled error
>> logging for user passthrough commands.  This commit adds the ability to opt-in
>> to passthrough error logging.
>>
>> To enable passthrough error logging:
>>          echo 1 > /sys/kernel/debug/nvme0/error-logging
>>
>> To disable passthrough error logging:
>>          echo 0 > /sys/kernel/debug/nvme0/error-logging
>>
>> By default, passthrough error logging will remain disabled.
>>
>> CONFIG_NVME_ERROR_LOGGING_DEBUG_FS needs to be enabled to
>> to enable passthrough error logging.
> Any reason to do this in debugfs vs sysfs which is someting
> that we can a handle too much more easily?
>
> Also why do we need a config option for a trivial mount of code?
>
Putting it in sysfs may make more sense and not require debugfs to be 
configured.  See below.

Alan


diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 53ef028596c6..24b4dcbfe819 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -337,6 +337,49 @@ static void nvme_log_error(struct request *req)
                 nr->status & NVME_SC_DNR  ? "DNR "  : "");
  }

+static void nvme_log_error_passthrough(struct request *req)
+{
+    struct nvme_ns *ns = req->q->queuedata;
+    struct nvme_request *nr = nvme_req(req);
+
+    if (ns) {
+        pr_err_ratelimited("%s: %s(0x%x), %s (sct 0x%x / sc 0x%x) %s%s: "
+               "cdw10=0x%x cdw11=0x%x cdw12=0x%x cdw13=0x%x cdw14=0x%x 
cdw15=0x%x\n",
+               ns->disk ? ns->disk->disk_name : "?",
+               nvme_get_opcode_str(nr->cmd->common.opcode),
+               nr->cmd->common.opcode,
+               nvme_get_error_status_str(nr->status),
+               nr->status >> 8 & 7,    /* Status Code Type */
+               nr->status & 0xff,    /* Status Code */
+               nr->status & NVME_SC_MORE ? "MORE " : "",
+               nr->status & NVME_SC_DNR  ? "DNR "  : "",
+               nr->cmd->common.cdw10,
+               nr->cmd->common.cdw11,
+               nr->cmd->common.cdw12,
+               nr->cmd->common.cdw13,
+               nr->cmd->common.cdw14,
+               nr->cmd->common.cdw15);
+        return;
+    }
+
+    pr_err_ratelimited("%s: %s(0x%x), %s (sct 0x%x / sc 0x%x) %s%s: "
+               "cdw10=0x%x cdw11=0x%x cdw12=0x%x cdw13=0x%x cdw14=0x%x 
cdw15=0x%x\n",
+               dev_name(nr->ctrl->device),
+ nvme_get_admin_opcode_str(nr->cmd->common.opcode),
+               nr->cmd->common.opcode,
+               nvme_get_error_status_str(nr->status),
+               nr->status >> 8 & 7,    /* Status Code Type */
+               nr->status & 0xff,    /* Status Code */
+               nr->status & NVME_SC_MORE ? "MORE " : "",
+               nr->status & NVME_SC_DNR  ? "DNR "  : "",
+                   nr->cmd->common.cdw10,
+                   nr->cmd->common.cdw11,
+                   nr->cmd->common.cdw12,
+                   nr->cmd->common.cdw13,
+                   nr->cmd->common.cdw14,
+                   nr->cmd->common.cdw15);
+}
+
  enum nvme_disposition {
      COMPLETE,
      RETRY,
@@ -381,8 +424,12 @@ static inline void nvme_end_req(struct request *req)
  {
      blk_status_t status = nvme_error_status(nvme_req(req)->status);

-    if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET)))
-        nvme_log_error(req);
+    if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET))) {
+        if (blk_rq_is_passthrough(req))
+            nvme_log_error_passthrough(req);
+        else
+            nvme_log_error(req);
+    }
      nvme_end_req_zoned(req);
      nvme_trace_bio_complete(req);
      if (req->cmd_flags & REQ_NVME_MPATH)
@@ -666,6 +713,8 @@ static inline void nvme_clear_nvme_request(struct 
request *req)
  /* initialize a passthrough request */
  void nvme_init_request(struct request *req, struct nvme_command *cmd)
  {
+    struct nvme_request *nr = nvme_req(req);
+
      if (req->q->queuedata)
          req->timeout = NVME_IO_TIMEOUT;
      else /* no queuedata implies admin queue */
@@ -678,8 +727,10 @@ void nvme_init_request(struct request *req, struct 
nvme_command *cmd)
      if (req->mq_hctx->type == HCTX_TYPE_POLL)
          req->cmd_flags |= REQ_POLLED;
      nvme_clear_nvme_request(req);
-    req->rq_flags |= RQF_QUIET;
-    memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd));
+    if (!nr->ctrl->error_logging)
+        req->rq_flags |= RQF_QUIET;
+
+    memcpy(nr->cmd, cmd, sizeof(*cmd));
  }
  EXPORT_SYMBOL_GPL(nvme_init_request);

@@ -3418,6 +3469,37 @@ static ssize_t nvme_sysfs_rescan(struct device *dev,
  }
  static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL, nvme_sysfs_rescan);

+static ssize_t nvme_passthrough_show(struct device *dev,
+        struct device_attribute *attr, char *buf)
+{
+    struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+
+    if (ctrl->error_logging)
+        return sysfs_emit(buf, "on\n");
+    else
+        return sysfs_emit(buf, "off\n");
+}
+
+static ssize_t nvme_passthrough_store(struct device *dev,
+        struct device_attribute *attr, const char *buf, size_t count)
+{
+    struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
+    int passthrough_enable, err;
+
+    err = kstrtoint(buf, 10, &passthrough_enable);
+    if (err)
+        return -EINVAL;
+
+    if (passthrough_enable)
+        ctrl->error_logging = true;
+    else
+        ctrl->error_logging = false;
+
+    return count;
+}
+
+static DEVICE_ATTR(passthrough_logging, S_IRUGO | S_IWUSR, 
nvme_passthrough_show, nvme_passthrough_store);
+
  static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
  {
      struct gendisk *disk = dev_to_disk(dev);
@@ -3926,6 +4008,7 @@ static struct attribute *nvme_dev_attrs[] = {
      &dev_attr_dhchap_secret.attr,
      &dev_attr_dhchap_ctrl_secret.attr,
  #endif
+    &dev_attr_passthrough_logging.attr,
      NULL
  };

@@ -5125,6 +5208,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct 
device *dev,
      int ret;

      ctrl->state = NVME_CTRL_NEW;
+    ctrl->error_logging = false;
      clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
      spin_lock_init(&ctrl->lock);
      mutex_init(&ctrl->scan_lock);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index bf46f122e9e1..dce5e6f7260c 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -248,6 +248,7 @@ struct nvme_ctrl {
      bool comp_seen;
      enum nvme_ctrl_state state;
      bool identified;
+    bool error_logging;
      spinlock_t lock;
      struct mutex scan_lock;
      const struct nvme_ctrl_ops *ops;
-- 
2.31.1






More information about the Linux-nvme mailing list