[PATCH] nvme: add support for TP4084 - Time-to-Ready Enhancements
Keith Busch
kbusch at kernel.org
Wed May 18 08:09:46 PDT 2022
On Wed, May 18, 2022 at 05:00:44PM +0200, Christoph Hellwig wrote:
> Yeah. So we could do something like this instead, which even avoids
> the new member in struct nvme_ctrl:
Nice, the below patch looks good.
Reviewed-by: Keith Busch <kbusch at kernel.org>
> ---
> From 032b8a803bf5bec4c4f206d9cc5a4b1160e0dc69 Mon Sep 17 00:00:00 2001
> From: Christoph Hellwig <hch at lst.de>
> Date: Mon, 16 May 2022 15:09:21 +0200
> Subject: nvme: add support for TP4084 - Time-to-Ready Enhancements
>
> Add support for using longer timeouts during controller initialization
> and letting the controller come up with namespaces that are not ready
> for I/O yet. We skip these not ready namespaces during scanning and
> only bring them online once anoter scan is kicked off by the AEN that
> is set when the NRDY bit gets set in the I/O Command Set Independent
> Identify Namespace Data Structure. This asynchronous probing avoids
> blocking the kernel boot when controllers take a very long time to
> recover after unclean shutdowns (up to minutes).
>
> Signed-off-by: Christoph Hellwig <hch at lst.de>
> ---
> drivers/nvme/host/constants.c | 1 +
> drivers/nvme/host/core.c | 76 ++++++++++++++++++++++++++++++++---
> include/linux/nvme.h | 31 ++++++++++++++
> 3 files changed, 102 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/nvme/host/constants.c b/drivers/nvme/host/constants.c
> index 1dd1d78de2956..4910543f00ff9 100644
> --- a/drivers/nvme/host/constants.c
> +++ b/drivers/nvme/host/constants.c
> @@ -91,6 +91,7 @@ static const char * const nvme_statuses[] = {
> [NVME_SC_NS_WRITE_PROTECTED] = "Namespace is Write Protected",
> [NVME_SC_CMD_INTERRUPTED] = "Command Interrupted",
> [NVME_SC_TRANSIENT_TR_ERR] = "Transient Transport Error",
> + [NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY] = "Admin Command Media Not Ready",
> [NVME_SC_INVALID_IO_CMD_SET] = "Invalid IO Command Set",
> [NVME_SC_LBA_RANGE] = "LBA Out of Range",
> [NVME_SC_CAP_EXCEEDED] = "Capacity Exceeded",
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 42f9772abc4d0..faeb032719134 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -1427,6 +1427,32 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
> return error;
> }
>
> +static int nvme_identify_ns_cs_indep(struct nvme_ctrl *ctrl, unsigned nsid,
> + struct nvme_id_ns_cs_indep **id)
> +{
> + struct nvme_command c = {
> + .identify.opcode = nvme_admin_identify,
> + .identify.nsid = cpu_to_le32(nsid),
> + .identify.cns = NVME_ID_CNS_NS_CS_INDEP,
> + };
> + int ret;
> +
> + *id = kmalloc(sizeof(**id), GFP_KERNEL);
> + if (!*id)
> + return -ENOMEM;
> +
> + ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, *id, sizeof(**id));
> + if (ret) {
> + dev_warn(ctrl->device,
> + "Identify namespace (CS independent) failed (%d)\n",
> + ret);
> + kfree(*id);
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> static int nvme_features(struct nvme_ctrl *dev, u8 op, unsigned int fid,
> unsigned int dword11, void *buffer, size_t buflen, u32 *result)
> {
> @@ -2103,10 +2129,9 @@ static const struct block_device_operations nvme_bdev_ops = {
> .pr_ops = &nvme_pr_ops,
> };
>
> -static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
> +static int nvme_wait_ready(struct nvme_ctrl *ctrl, u32 timeout, bool enabled)
> {
> - unsigned long timeout =
> - ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
> + unsigned long timeout_jiffies = ((timeout + 1) * HZ / 2) + jiffies;
> u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
> int ret;
>
> @@ -2119,7 +2144,7 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
> usleep_range(1000, 2000);
> if (fatal_signal_pending(current))
> return -EINTR;
> - if (time_after(jiffies, timeout)) {
> + if (time_after(jiffies, timeout_jiffies)) {
> dev_err(ctrl->device,
> "Device not ready; aborting %s, CSTS=0x%x\n",
> enabled ? "initialisation" : "reset", csts);
> @@ -2150,13 +2175,14 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl)
> if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
> msleep(NVME_QUIRK_DELAY_AMOUNT);
>
> - return nvme_wait_ready(ctrl, ctrl->cap, false);
> + return nvme_wait_ready(ctrl, NVME_CAP_TIMEOUT(ctrl->cap), false);
> }
> EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
>
> int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
> {
> unsigned dev_page_min;
> + u32 timeout;
> int ret;
>
> ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
> @@ -2177,6 +2203,27 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
> ctrl->ctrl_config = NVME_CC_CSS_CSI;
> else
> ctrl->ctrl_config = NVME_CC_CSS_NVM;
> +
> + if (ctrl->cap & NVME_CAP_CRMS_CRWMS) {
> + u32 crto;
> +
> + ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CRTO, &crto);
> + if (ret) {
> + dev_err(ctrl->device, "Reading CRTO failed (%d)\n",
> + ret);
> + return ret;
> + }
> +
> + if (ctrl->cap & NVME_CAP_CRMS_CRIMS) {
> + ctrl->ctrl_config |= NVME_CC_CRIME;
> + timeout = NVME_CRTO_CRIMT(crto);
> + } else {
> + timeout = NVME_CRTO_CRWMT(crto);
> + }
> + } else {
> + timeout = NVME_CAP_TIMEOUT(ctrl->cap);
> + }
> +
> ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
> ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
> ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
> @@ -2185,7 +2232,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
> ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
> if (ret)
> return ret;
> - return nvme_wait_ready(ctrl, ctrl->cap, true);
> + return nvme_wait_ready(ctrl, timeout, true);
> }
> EXPORT_SYMBOL_GPL(nvme_enable_ctrl);
>
> @@ -4092,11 +4139,26 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids)
> static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
> {
> struct nvme_ns_ids ids = { };
> + struct nvme_id_ns_cs_indep *id;
> struct nvme_ns *ns;
> + bool ready = true;
>
> if (nvme_identify_ns_descs(ctrl, nsid, &ids))
> return;
>
> + /*
> + * Check if the namespace is ready. If not ignore it, we will get an
> + * AEN once it becomes ready and restart the scan.
> + */
> + if ((ctrl->cap & NVME_CAP_CRMS_CRIMS) &&
> + !nvme_identify_ns_cs_indep(ctrl, nsid, &id)) {
> + ready = id->nstat & NVME_NSTAT_NRDY;
> + kfree(id);
> + }
> +
> + if (!ready)
> + return;
> +
> ns = nvme_find_get_ns(ctrl, nsid);
> if (ns) {
> nvme_validate_ns(ns, &ids);
> @@ -4841,6 +4903,8 @@ static inline void _nvme_check_size(void)
> BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
> BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
> BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
> + BUILD_BUG_ON(sizeof(struct nvme_id_ns_cs_indep) !=
> + NVME_IDENTIFY_DATA_SIZE);
> BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE);
> BUILD_BUG_ON(sizeof(struct nvme_id_ns_nvm) != NVME_IDENTIFY_DATA_SIZE);
> BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE);
> diff --git a/include/linux/nvme.h b/include/linux/nvme.h
> index 5f6d432fa06a6..29ec3e3481ff6 100644
> --- a/include/linux/nvme.h
> +++ b/include/linux/nvme.h
> @@ -137,6 +137,7 @@ enum {
> NVME_REG_CMBMSC = 0x0050, /* Controller Memory Buffer Memory
> * Space Control
> */
> + NVME_REG_CRTO = 0x0068, /* Controller Ready Timeouts */
> NVME_REG_PMRCAP = 0x0e00, /* Persistent Memory Capabilities */
> NVME_REG_PMRCTL = 0x0e04, /* Persistent Memory Region Control */
> NVME_REG_PMRSTS = 0x0e08, /* Persistent Memory Region Status */
> @@ -161,6 +162,9 @@ enum {
> #define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7)
> #define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff)
>
> +#define NVME_CRTO_CRIMT(crto) ((crto) >> 16)
> +#define NVME_CRTO_CRWMT(crto) ((crto) & 0xffff)
> +
> enum {
> NVME_CMBSZ_SQS = 1 << 0,
> NVME_CMBSZ_CQS = 1 << 1,
> @@ -204,6 +208,7 @@ enum {
> NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT,
> NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT,
> NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT,
> + NVME_CC_CRIME = 1 << 24,
> };
>
> enum {
> @@ -227,6 +232,11 @@ enum {
> NVME_CAP_CSS_CSI = 1 << 6,
> };
>
> +enum {
> + NVME_CAP_CRMS_CRIMS = 1ULL << 59,
> + NVME_CAP_CRMS_CRWMS = 1ULL << 60,
> +};
> +
> struct nvme_id_power_state {
> __le16 max_power; /* centiwatts */
> __u8 rsvd2;
> @@ -414,6 +424,21 @@ struct nvme_id_ns {
> __u8 vs[3712];
> };
>
> +/* I/O Command Set Independent Identify Namespace Data Structure */
> +struct nvme_id_ns_cs_indep {
> + __u8 nsfeat;
> + __u8 nmic;
> + __u8 rescap;
> + __u8 fpi;
> + __le32 anagrpid;
> + __u8 nsattr;
> + __u8 rsvd9;
> + __le16 nvmsetid;
> + __le16 endgid;
> + __u8 nstat;
> + __u8 rsvd15[4081];
> +};
> +
> struct nvme_zns_lbafe {
> __le64 zsze;
> __u8 zdes;
> @@ -478,6 +503,7 @@ enum {
> NVME_ID_CNS_NS_DESC_LIST = 0x03,
> NVME_ID_CNS_CS_NS = 0x05,
> NVME_ID_CNS_CS_CTRL = 0x06,
> + NVME_ID_CNS_NS_CS_INDEP = 0x08,
> NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
> NVME_ID_CNS_NS_PRESENT = 0x11,
> NVME_ID_CNS_CTRL_NS_LIST = 0x12,
> @@ -531,6 +557,10 @@ enum {
> NVME_NS_DPS_PI_TYPE3 = 3,
> };
>
> +enum {
> + NVME_NSTAT_NRDY = 1 << 0,
> +};
> +
> enum {
> NVME_NVM_NS_16B_GUARD = 0,
> NVME_NVM_NS_32B_GUARD = 1,
> @@ -1592,6 +1622,7 @@ enum {
> NVME_SC_NS_WRITE_PROTECTED = 0x20,
> NVME_SC_CMD_INTERRUPTED = 0x21,
> NVME_SC_TRANSIENT_TR_ERR = 0x22,
> + NVME_SC_ADMIN_COMMAND_MEDIA_NOT_READY = 0x24,
> NVME_SC_INVALID_IO_CMD_SET = 0x2C,
>
> NVME_SC_LBA_RANGE = 0x80,
> --
> 2.30.2
>
More information about the Linux-nvme
mailing list