[RFC v2 2/3] nvme: add error logging opt-in
Sagi Grimberg
sagi at grimberg.me
Thu Mar 30 06:38:34 PDT 2023
>>>>> Commit d7ac8dca938c ("nvme: quiet user passthrough command errors")
>>>>> disabled error
>>>>> logging for user passthrough commands. This commit adds the
>>>>> ability to opt-in
>>>>> to passthrough error logging.
>>>>>
>>>>> To enable passthrough error logging:
>>>>> echo 1 > /sys/kernel/debug/nvme0/error-logging
>>>>>
>>>>> To disable passthrough error logging:
>>>>> echo 0 > /sys/kernel/debug/nvme0/error-logging
>>>>>
>>>>> By default, passthrough error logging will remain disabled.
>>>>>
>>>>> CONFIG_NVME_ERROR_LOGGING_DEBUG_FS needs to be enabled to
>>>>> to enable passthrough error logging.
>>>> Any reason to do this in debugfs vs sysfs which is someting
>>>> that we can a handle too much more easily?
>>>>
>>>> Also why do we need a config option for a trivial mount of code?
>>>>
>>> Putting it in sysfs may make more sense and not require debugfs to be
>>> configured. See below.
>>>
>>> Alan
>>>
>>>
>>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>>> index 53ef028596c6..24b4dcbfe819 100644
>>> --- a/drivers/nvme/host/core.c
>>> +++ b/drivers/nvme/host/core.c
>>> @@ -337,6 +337,49 @@ static void nvme_log_error(struct request *req)
>>> nr->status & NVME_SC_DNR ? "DNR " : "");
>>> }
>>>
>>> +static void nvme_log_error_passthrough(struct request *req)
>>> +{
>>> + struct nvme_ns *ns = req->q->queuedata;
>>> + struct nvme_request *nr = nvme_req(req);
>>> +
>>> + if (ns) {
>>> + pr_err_ratelimited("%s: %s(0x%x), %s (sct 0x%x / sc 0x%x)
>>> %s%s: "
>>> + "cdw10=0x%x cdw11=0x%x cdw12=0x%x cdw13=0x%x
>>> cdw14=0x%x cdw15=0x%x\n",
>>> + ns->disk ? ns->disk->disk_name : "?",
>>> + nvme_get_opcode_str(nr->cmd->common.opcode),
>>> + nr->cmd->common.opcode,
>>> + nvme_get_error_status_str(nr->status),
>>> + nr->status >> 8 & 7, /* Status Code Type */
>>> + nr->status & 0xff, /* Status Code */
>>> + nr->status & NVME_SC_MORE ? "MORE " : "",
>>> + nr->status & NVME_SC_DNR ? "DNR " : "",
>>> + nr->cmd->common.cdw10,
>>> + nr->cmd->common.cdw11,
>>> + nr->cmd->common.cdw12,
>>> + nr->cmd->common.cdw13,
>>> + nr->cmd->common.cdw14,
>>> + nr->cmd->common.cdw15);
>>> + return;
>>> + }
>>> +
>>> + pr_err_ratelimited("%s: %s(0x%x), %s (sct 0x%x / sc 0x%x) %s%s: "
>>> + "cdw10=0x%x cdw11=0x%x cdw12=0x%x cdw13=0x%x
>>> cdw14=0x%x cdw15=0x%x\n",
>>> + dev_name(nr->ctrl->device),
>>> + nvme_get_admin_opcode_str(nr->cmd->common.opcode),
>>> + nr->cmd->common.opcode,
>>> + nvme_get_error_status_str(nr->status),
>>> + nr->status >> 8 & 7, /* Status Code Type */
>>> + nr->status & 0xff, /* Status Code */
>>> + nr->status & NVME_SC_MORE ? "MORE " : "",
>>> + nr->status & NVME_SC_DNR ? "DNR " : "",
>>> + nr->cmd->common.cdw10,
>>> + nr->cmd->common.cdw11,
>>> + nr->cmd->common.cdw12,
>>> + nr->cmd->common.cdw13,
>>> + nr->cmd->common.cdw14,
>>> + nr->cmd->common.cdw15);
>>> +}
>>> +
>>> enum nvme_disposition {
>>> COMPLETE,
>>> RETRY,
>>> @@ -381,8 +424,12 @@ static inline void nvme_end_req(struct request
>>> *req)
>>> {
>>> blk_status_t status = nvme_error_status(nvme_req(req)->status);
>>>
>>> - if (unlikely(nvme_req(req)->status && !(req->rq_flags &
>>> RQF_QUIET)))
>>> - nvme_log_error(req);
>>> + if (unlikely(nvme_req(req)->status && !(req->rq_flags &
>>> RQF_QUIET))) {
>>> + if (blk_rq_is_passthrough(req))
>>> + nvme_log_error_passthrough(req);
>>> + else
>>> + nvme_log_error(req);
>>> + }
>>> nvme_end_req_zoned(req);
>>> nvme_trace_bio_complete(req);
>>> if (req->cmd_flags & REQ_NVME_MPATH)
>>> @@ -666,6 +713,8 @@ static inline void nvme_clear_nvme_request(struct
>>> request *req)
>>> /* initialize a passthrough request */
>>> void nvme_init_request(struct request *req, struct nvme_command *cmd)
>>> {
>>> + struct nvme_request *nr = nvme_req(req);
>>> +
>>> if (req->q->queuedata)
>>> req->timeout = NVME_IO_TIMEOUT;
>>> else /* no queuedata implies admin queue */
>>> @@ -678,8 +727,10 @@ void nvme_init_request(struct request *req,
>>> struct nvme_command *cmd)
>>> if (req->mq_hctx->type == HCTX_TYPE_POLL)
>>> req->cmd_flags |= REQ_POLLED;
>>> nvme_clear_nvme_request(req);
>>> - req->rq_flags |= RQF_QUIET;
>>> - memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd));
>>> + if (!nr->ctrl->error_logging)
>>> + req->rq_flags |= RQF_QUIET;
>>> +
>>> + memcpy(nr->cmd, cmd, sizeof(*cmd));
>>> }
>>> EXPORT_SYMBOL_GPL(nvme_init_request);
>>>
>>> @@ -3418,6 +3469,37 @@ static ssize_t nvme_sysfs_rescan(struct device
>>> *dev,
>>> }
>>> static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL,
>>> nvme_sysfs_rescan);
>>>
>>> +static ssize_t nvme_passthrough_show(struct device *dev,
>>> + struct device_attribute *attr, char *buf)
>>> +{
>>> + struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
>>> +
>>> + if (ctrl->error_logging)
>>> + return sysfs_emit(buf, "on\n");
>>> + else
>>> + return sysfs_emit(buf, "off\n");
>>> +}
>>> +
>>> +static ssize_t nvme_passthrough_store(struct device *dev,
>>> + struct device_attribute *attr, const char *buf, size_t count)
>>> +{
>>> + struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
>>> + int passthrough_enable, err;
>>> +
>>> + err = kstrtoint(buf, 10, &passthrough_enable);
>>> + if (err)
>>> + return -EINVAL;
>>> +
>>> + if (passthrough_enable)
>>> + ctrl->error_logging = true;
>>> + else
>>> + ctrl->error_logging = false;
>>> +
>>> + return count;
>>> +}
>>> +
>>> +static DEVICE_ATTR(passthrough_logging, S_IRUGO | S_IWUSR,
>>> nvme_passthrough_show, nvme_passthrough_store);
>>
>> Is this something that we need per ctrl? My assumption is that
>> if someone wants this, one would enable it for all controllers.
>> Maybe this should be a modparam instead?
>
> Maybe both? Have the ability to set the system-wide default value via
> modparam and sysfs to change it per ctrl?
Definitely not both. If per controller setting is needed then lets do
that, and if not, lets do a global modparam.
More information about the Linux-nvme
mailing list