[RFC v3 1/1] nvme: add passthrough error logging opt-in

alan.adamson at oracle.com alan.adamson at oracle.com
Mon Apr 3 15:20:14 PDT 2023


On 4/3/23 3:17 AM, Sagi Grimberg wrote:
>
>
> On 4/1/23 01:18, Alan Adamson wrote:
>> Commit d7ac8dca938c ("nvme: quiet user passthrough command errors") 
>> disabled error
>> logging for user passthrough commands.  This commit adds the ability 
>> to opt-in
>> to passthrough error logging.
>>
>> To enable passthrough error logging:
>>          echo 1 > /sys/class/nvme/nvme0/passthrough_logging
>>
>> To disable passthrough error logging:
>>          echo 0 > /sys/class/nvme/nvme0/passthrough_logging
>>
>> By default, passthrough error logging will remain disabled.
>>
>> Signed-off-by: Alan Adamson <alan.adamson at oracle.com>
>> ---
>>   drivers/nvme/host/core.c | 93 ++++++++++++++++++++++++++++++++++++++--
>>   drivers/nvme/host/nvme.h |  1 +
>>   2 files changed, 90 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>> index 53ef028596c6..82d4f8235a8f 100644
>> --- a/drivers/nvme/host/core.c
>> +++ b/drivers/nvme/host/core.c
>> @@ -337,6 +337,49 @@ static void nvme_log_error(struct request *req)
>>                  nr->status & NVME_SC_DNR  ? "DNR "  : "");
>>   }
>>   +static void nvme_log_error_passthrough(struct request *req)
>> +{
>> +    struct nvme_ns *ns = req->q->queuedata;
>> +    struct nvme_request *nr = nvme_req(req);
>> +
>> +    if (ns) {
>> +        pr_err_ratelimited("%s: %s(0x%x), %s (sct 0x%x / sc 0x%x) %s%s"
>> +            "cdw10=0x%x cdw11=0x%x cdw12=0x%x cdw13=0x%x cdw14=0x%x 
>> cdw15=0x%x\n",
>> +            ns->disk ? ns->disk->disk_name : "?",
>> +            nvme_get_opcode_str(nr->cmd->common.opcode),
>> +            nr->cmd->common.opcode,
>> +            nvme_get_error_status_str(nr->status),
>> +            nr->status >> 8 & 7,    /* Status Code Type */
>> +            nr->status & 0xff,    /* Status Code */
>> +            nr->status & NVME_SC_MORE ? "MORE " : "",
>> +            nr->status & NVME_SC_DNR  ? "DNR "  : "",
>> +            nr->cmd->common.cdw10,
>> +            nr->cmd->common.cdw11,
>> +            nr->cmd->common.cdw12,
>> +            nr->cmd->common.cdw13,
>> +            nr->cmd->common.cdw14,
>> +            nr->cmd->common.cdw14);
>> +        return;
>> +    }
>> +
>> +    pr_err_ratelimited("%s: %s(0x%x), %s (sct 0x%x / sc 0x%x) %s%s"
>> +            "cdw10=0x%x cdw11=0x%x cdw12=0x%x cdw13=0x%x cdw14=0x%x 
>> cdw15=0x%x\n",
>> +            dev_name(nr->ctrl->device),
>> + nvme_get_admin_opcode_str(nr->cmd->common.opcode),
>> +            nr->cmd->common.opcode,
>> +            nvme_get_error_status_str(nr->status),
>> +            nr->status >> 8 & 7,    /* Status Code Type */
>> +            nr->status & 0xff,    /* Status Code */
>> +            nr->status & NVME_SC_MORE ? "MORE " : "",
>> +            nr->status & NVME_SC_DNR  ? "DNR "  : "",
>> +            nr->cmd->common.cdw10,
>> +            nr->cmd->common.cdw11,
>> +            nr->cmd->common.cdw12,
>> +            nr->cmd->common.cdw13,
>> +            nr->cmd->common.cdw14,
>> +            nr->cmd->common.cdw14);
>> +}
>> +
>>   enum nvme_disposition {
>>       COMPLETE,
>>       RETRY,
>> @@ -381,8 +424,12 @@ static inline void nvme_end_req(struct request 
>> *req)
>>   {
>>       blk_status_t status = nvme_error_status(nvme_req(req)->status);
>>   -    if (unlikely(nvme_req(req)->status && !(req->rq_flags & 
>> RQF_QUIET)))
>> -        nvme_log_error(req);
>> +    if (unlikely(nvme_req(req)->status && !(req->rq_flags & 
>> RQF_QUIET))) {
>> +        if (blk_rq_is_passthrough(req))
>> +            nvme_log_error_passthrough(req);
>> +        else
>> +            nvme_log_error(req);
>> +    }
>>       nvme_end_req_zoned(req);
>>       nvme_trace_bio_complete(req);
>>       if (req->cmd_flags & REQ_NVME_MPATH)
>> @@ -666,6 +713,8 @@ static inline void nvme_clear_nvme_request(struct 
>> request *req)
>>   /* initialize a passthrough request */
>>   void nvme_init_request(struct request *req, struct nvme_command *cmd)
>>   {
>> +    struct nvme_request *nr = nvme_req(req);
>> +
>>       if (req->q->queuedata)
>>           req->timeout = NVME_IO_TIMEOUT;
>>       else /* no queuedata implies admin queue */
>> @@ -678,8 +727,10 @@ void nvme_init_request(struct request *req, 
>> struct nvme_command *cmd)
>>       if (req->mq_hctx->type == HCTX_TYPE_POLL)
>>           req->cmd_flags |= REQ_POLLED;
>>       nvme_clear_nvme_request(req);
>> -    req->rq_flags |= RQF_QUIET;
>> -    memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd));
>> +    if (!nr->ctrl->error_logging)
>> +        req->rq_flags |= RQF_QUIET;
>> +
>> +    memcpy(nr->cmd, cmd, sizeof(*cmd));
>
> Question, if we already introduce granularity to this setting, why
> not per-ns? why only per controller? I'd think it makes more
> sense to do this per ns. Will also remove access to ctrl in the hot
> path...


Meaning we should have both:

/sys/class/nvme/nvme0/passthrough_logging

/sys/class/nvme/nvme0/nvme0n1/passthrough_logging


Alan






More information about the Linux-nvme mailing list