[PATCH V5 1/1] nvme: allow passthru cmd error logging
Chaitanya Kulkarni
chaitanyak at nvidia.com
Mon May 22 17:08:58 PDT 2023
On 5/22/2023 3:58 PM, alan.adamson at oracle.com wrote:
>
> On 4/9/23 2:25 PM, Chaitanya Kulkarni wrote:
>> From: Alan Adamson <alan.adamson at oracle.com>
>>
>> Commit d7ac8dca938c ("nvme: quiet user passthrough command errors")
>> disabled error logging for user passthrough commands. This commit
>> adds the ability to opt-in to passthrough admin error logging. IO
>> commands initiated as passthrough will always be logged.
>>
>> The logging output for passthrough commands (Admin and IO) has been
>> changed to include CDWXX fields.
>>
>> nvme0n1: Read(0x2), LBA Out of Range (sct 0x0 / sc 0x80) DNR cdw10=0x0
>> cdw11=0x1
>> cdw12=0x70000 cdw13=0x0 cdw14=0x0 cdw15=0x0
>>
>> Add a helper function nvme_log_err_passthru() which allows us to log
>> error for passthru commands by decoding cdw10-cdw15 values of nvme
>> command.
>>
>> Add a new sysfs attr passthru_err_log that allows user to conditionally
>> enable passthru command logging, by default it is disabled.
>>
>> To enable passthrough admin error logging:
>> echo 1 > /sys/class/nvme/nvme0/passthru_err_log
>>
>> To disable passthrough admin error logging:
>> echo 0 > /sys/class/nvme/nvme0/passthru_err_log
>>
>> Signed-off-by: Alan Adamson <alan.adamson at oracle.com>
>> [kch] fix sevaral nits and trim down code, details in cover-letter.
>> Signed-off-by: Chaitanya Kulkarni <kch at nvidia.com>
>> ---
>> drivers/nvme/host/core.c | 83 +++++++++++++++++++++++++++++++++++++---
>> drivers/nvme/host/nvme.h | 1 +
>> 2 files changed, 79 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>> index 954641a45e55..e32265adc034 100644
>> --- a/drivers/nvme/host/core.c
>> +++ b/drivers/nvme/host/core.c
>> @@ -337,6 +337,30 @@ static void nvme_log_error(struct request *req)
>> nr->status & NVME_SC_DNR ? "DNR " : "");
>> }
>> +static void nvme_log_err_passthru(struct request *req)
>> +{
>> + struct nvme_ns *ns = req->q->queuedata;
>> + struct nvme_request *nr = nvme_req(req);
>> +
>> + pr_err_ratelimited("%s: %s(0x%x), %s (sct 0x%x / sc 0x%x) %s%s"
>> + "cdw10=0x%x cdw11=0x%x cdw12=0x%x cdw13=0x%x cdw14=0x%x
>> cdw15=0x%x\n",
>> + ns ? ns->disk->disk_name : dev_name(nr->ctrl->device),
>> + ns ? nvme_get_opcode_str(nr->cmd->common.opcode) :
>> + nvme_get_admin_opcode_str(nr->cmd->common.opcode),
>> + nr->cmd->common.opcode,
>> + nvme_get_error_status_str(nr->status),
>> + nr->status >> 8 & 7, /* Status Code Type */
>> + nr->status & 0xff, /* Status Code */
>> + nr->status & NVME_SC_MORE ? "MORE " : "",
>> + nr->status & NVME_SC_DNR ? "DNR " : "",
>> + nr->cmd->common.cdw10,
>> + nr->cmd->common.cdw11,
>> + nr->cmd->common.cdw12,
>> + nr->cmd->common.cdw13,
>> + nr->cmd->common.cdw14,
>> + nr->cmd->common.cdw14);
>> +}
>> +
>> enum nvme_disposition {
>> COMPLETE,
>> RETRY,
>> @@ -381,8 +405,12 @@ static inline void nvme_end_req(struct request *req)
>> {
>> blk_status_t status = nvme_error_status(nvme_req(req)->status);
>> - if (unlikely(nvme_req(req)->status && !(req->rq_flags & RQF_QUIET)))
>> - nvme_log_error(req);
>> + if (unlikely(nvme_req(req)->status && !(req->rq_flags &
>> RQF_QUIET))) {
>> + if (blk_rq_is_passthrough(req))
>> + nvme_log_err_passthru(req);
>> + else
>> + nvme_log_error(req);
>> + }
>> nvme_end_req_zoned(req);
>> nvme_trace_bio_complete(req);
>> if (req->cmd_flags & REQ_NVME_MPATH)
>> @@ -666,10 +694,15 @@ static inline void
>> nvme_clear_nvme_request(struct request *req)
>> /* initialize a passthrough request */
>> void nvme_init_request(struct request *req, struct nvme_command *cmd)
>> {
>> + struct nvme_request *nr = nvme_req(req);
>> +
>> if (req->q->queuedata)
>> req->timeout = NVME_IO_TIMEOUT;
>> - else /* no queuedata implies admin queue */
>> + else { /* no queuedata implies admin queue */
>> req->timeout = NVME_ADMIN_TIMEOUT;
>> + if (!nr->ctrl->passthru_log_err)
>> + req->rq_flags |= RQF_QUIET;
>> + }
>> /* passthru commands should let the driver set the SGL flags */
>> cmd->common.flags &= ~NVME_CMD_SGL_ALL;
>> @@ -678,8 +711,8 @@ void nvme_init_request(struct request *req, struct
>> nvme_command *cmd)
>> if (req->mq_hctx->type == HCTX_TYPE_POLL)
>> req->cmd_flags |= REQ_POLLED;
>> nvme_clear_nvme_request(req);
>> - req->rq_flags |= RQF_QUIET;
>> - memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd));
>> +
>> + memcpy(nr->cmd, cmd, sizeof(*cmd));
>> }
>> EXPORT_SYMBOL_GPL(nvme_init_request);
>> @@ -3417,6 +3450,44 @@ static ssize_t nvme_sysfs_rescan(struct device
>> *dev,
>> }
>> static DEVICE_ATTR(rescan_controller, S_IWUSR, NULL,
>> nvme_sysfs_rescan);
>> +static ssize_t nvme_passthru_err_log_show(struct device *dev,
>> + struct device_attribute *attr, char *buf)
>> +{
>> + struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
>> +
>> + if (ctrl->passthru_log_err)
>> + return sysfs_emit(buf, "on\n");
>> +
>> + return sysfs_emit(buf, "off\n");
>> +}
>> +
>> +static ssize_t nvme_passthru_err_log_store(struct device *dev,
>> + struct device_attribute *attr, const char *buf, size_t count)
>> +{
>> + struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
>> + int passthru_enable, err;
>> +
>> + err = kstrtoint(buf, 10, &passthru_enable);
>> + if (err)
>> + return -EINVAL;
>> +
>> + switch (passthru_enable) {
>> + case true:
>> + case false:
>> + ctrl->passthru_log_err = passthru_enable;
>> + break;
>> + default:
>> + pr_err("invlid value %d for admin error logging [on:1 off:0]\n",
>> + passthru_enable);
>> + break;
>> + }
>> + return count;
>> +}
>> +
>> +static DEVICE_ATTR(passthru_err_log, S_IRUGO | S_IWUSR,
>> + nvme_passthru_err_log_show,
>> + nvme_passthru_err_log_store);
>> +
>> static inline struct nvme_ns_head *dev_to_ns_head(struct device *dev)
>> {
>> struct gendisk *disk = dev_to_disk(dev);
>> @@ -3925,6 +3996,7 @@ static struct attribute *nvme_dev_attrs[] = {
>> &dev_attr_dhchap_secret.attr,
>> &dev_attr_dhchap_ctrl_secret.attr,
>> #endif
>> + &dev_attr_passthru_err_log.attr,
>> NULL
>> };
>> @@ -5124,6 +5196,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl,
>> struct device *dev,
>> int ret;
>> ctrl->state = NVME_CTRL_NEW;
>> + ctrl->passthru_log_err = false;
>> clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
>> spin_lock_init(&ctrl->lock);
>> mutex_init(&ctrl->scan_lock);
>> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
>> index bf46f122e9e1..f5721ad8264c 100644
>> --- a/drivers/nvme/host/nvme.h
>> +++ b/drivers/nvme/host/nvme.h
>> @@ -248,6 +248,7 @@ struct nvme_ctrl {
>> bool comp_seen;
>> enum nvme_ctrl_state state;
>> bool identified;
>> + bool passthru_log_err;
>> spinlock_t lock;
>> struct mutex scan_lock;
>> const struct nvme_ctrl_ops *ops;
>
> This v5 version of the patch has been tested with the latest upstream.
> Any objections?
>
> Alan
>
>
Sagi/Christoph/Keith,
any objections ?
-ck
More information about the Linux-nvme
mailing list