[PATCH v2 01/14] nvmet: Rapid Path Failure Recovery set controller identify fields
Hannes Reinecke
hare at suse.de
Mon Feb 2 19:03:22 PST 2026
On 1/30/26 23:34, Mohamed Khalfella wrote:
> TP8028 Rapid Path Failure Recovery defined new fields in controller
> identify response. The newly defined fields are:
>
> - CIU (Controller Instance UNIQUIFIER): is an 8bit non-zero value that
> is assigned a random value when controller first created. The value is
> expected to be incremented when RDY bit in CSTS register is asserted
> - CIRN (Controller Instance Random Number): is 64bit random value that
> gets generated when controller is crated. CIRN is regenerated everytime
> RDY bit is CSTS register is asserted.
> - CCRL (Cross-Controller Reset Limit) is an 8bit value that defines the
> maximum number of in-progress controller reset operations. CCRL is
> hardcoded to 4 as recommended by TP8028.
>
> TP4129 KATO Corrections and Clarifications defined CQT (Command Quiesce
> Time) which is used along with KATO (Keep Alive Timeout) to set an upper
> time limit for attempting Cross-Controller Recovery. For NVME subsystem
> CQT is set to 0 by default to keep the current behavior. The value can
> be set from configfs if needed.
>
> Make the new fields available for IO controllers only since TP8028 is
> not very useful for discovery controllers.
>
> Signed-off-by: Mohamed Khalfella <mkhalfella at purestorage.com>
> ---
> drivers/nvme/target/admin-cmd.c | 6 ++++++
> drivers/nvme/target/configfs.c | 31 +++++++++++++++++++++++++++++++
> drivers/nvme/target/core.c | 12 ++++++++++++
> drivers/nvme/target/nvmet.h | 4 ++++
> include/linux/nvme.h | 15 ++++++++++++---
> 5 files changed, 65 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
> index 3da31bb1183e..ade1145df72d 100644
> --- a/drivers/nvme/target/admin-cmd.c
> +++ b/drivers/nvme/target/admin-cmd.c
> @@ -696,6 +696,12 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
>
> id->cntlid = cpu_to_le16(ctrl->cntlid);
> id->ver = cpu_to_le32(ctrl->subsys->ver);
> + if (!nvmet_is_disc_subsys(ctrl->subsys)) {
> + id->cqt = cpu_to_le16(ctrl->cqt);
> + id->ciu = ctrl->ciu;
> + id->cirn = cpu_to_le64(ctrl->cirn);
> + id->ccrl = NVMF_CCR_LIMIT;
> + }
>
> /* XXX: figure out what to do about RTD3R/RTD3 */
> id->oaes = cpu_to_le32(NVMET_AEN_CFG_OPTIONAL);
> diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
> index e44ef69dffc2..035f6e75a818 100644
> --- a/drivers/nvme/target/configfs.c
> +++ b/drivers/nvme/target/configfs.c
> @@ -1636,6 +1636,36 @@ static ssize_t nvmet_subsys_attr_pi_enable_store(struct config_item *item,
> CONFIGFS_ATTR(nvmet_subsys_, attr_pi_enable);
> #endif
>
> +static ssize_t nvmet_subsys_attr_cqt_show(struct config_item *item,
> + char *page)
> +{
> + return snprintf(page, PAGE_SIZE, "%u\n", to_subsys(item)->cqt);
> +}
> +
> +static ssize_t nvmet_subsys_attr_cqt_store(struct config_item *item,
> + const char *page, size_t cnt)
> +{
> + struct nvmet_subsys *subsys = to_subsys(item);
> + struct nvmet_ctrl *ctrl;
> + u16 cqt;
> +
> + if (sscanf(page, "%hu\n", &cqt) != 1)
> + return -EINVAL;
> +
> + down_write(&nvmet_config_sem);
> + if (subsys->cqt == cqt)
> + goto out;
> +
> + subsys->cqt = cqt;
> + /* Force reconnect */
> + list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
> + ctrl->ops->delete_ctrl(ctrl);
> +out:
> + up_write(&nvmet_config_sem);
> + return cnt;
> +}
> +CONFIGFS_ATTR(nvmet_subsys_, attr_cqt);
> +
> static ssize_t nvmet_subsys_attr_qid_max_show(struct config_item *item,
> char *page)
> {
> @@ -1676,6 +1706,7 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = {
> &nvmet_subsys_attr_attr_vendor_id,
> &nvmet_subsys_attr_attr_subsys_vendor_id,
> &nvmet_subsys_attr_attr_model,
> + &nvmet_subsys_attr_attr_cqt,
> &nvmet_subsys_attr_attr_qid_max,
> &nvmet_subsys_attr_attr_ieee_oui,
> &nvmet_subsys_attr_attr_firmware,
I do think that TP8028 (ie the CQT defintions) are somewhat independent
on CCR. So I'm not sure if they should be integrated in this patchset;
personally I would prefer to have it moved to another patchset.
> diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
> index cc88e5a28c8a..0d2a1206e08f 100644
> --- a/drivers/nvme/target/core.c
> +++ b/drivers/nvme/target/core.c
> @@ -1393,6 +1393,10 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
> return;
> }
>
> + if (!nvmet_is_disc_subsys(ctrl->subsys)) {
> + ctrl->ciu = ((u8)(ctrl->ciu + 1)) ? : 1;
> + ctrl->cirn = get_random_u64();
> + }
> ctrl->csts = NVME_CSTS_RDY;
>
> /*
> @@ -1661,6 +1665,12 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
> }
> ctrl->cntlid = ret;
>
> + if (!nvmet_is_disc_subsys(ctrl->subsys)) {
> + ctrl->cqt = subsys->cqt;
> + ctrl->ciu = get_random_u8() ? : 1;
> + ctrl->cirn = get_random_u64();
> + }
> +
> /*
> * Discovery controllers may use some arbitrary high value
> * in order to cleanup stale discovery sessions
> @@ -1853,10 +1863,12 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
>
> switch (type) {
> case NVME_NQN_NVME:
> + subsys->cqt = NVMF_CQT_MS;
> subsys->max_qid = NVMET_NR_QUEUES;
> break;
And I would not set the CQT default here.
Thing is, implementing CQT to the letter would inflict a CQT delay
during failover for _every_ installation, thereby resulting in a
regression to previous implementations where we would fail over
with _no_ delay.
So again, we should make it a different patchset.
> case NVME_NQN_DISC:
> case NVME_NQN_CURR:
> + subsys->cqt = 0;
> subsys->max_qid = 0;
> break;
> default:
> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
> index b664b584fdc8..f5d9a01ec60c 100644
> --- a/drivers/nvme/target/nvmet.h
> +++ b/drivers/nvme/target/nvmet.h
> @@ -264,7 +264,10 @@ struct nvmet_ctrl {
>
> uuid_t hostid;
> u16 cntlid;
> + u16 cqt;
> + u8 ciu;
> u32 kato;
> + u64 cirn;
>
> struct nvmet_port *port;
>
> @@ -331,6 +334,7 @@ struct nvmet_subsys {
> #ifdef CONFIG_NVME_TARGET_DEBUGFS
> struct dentry *debugfs_dir;
> #endif
> + u16 cqt;
> u16 max_qid;
>
> u64 ver;
> diff --git a/include/linux/nvme.h b/include/linux/nvme.h
> index 655d194f8e72..5135cdc3c120 100644
> --- a/include/linux/nvme.h
> +++ b/include/linux/nvme.h
> @@ -21,6 +21,9 @@
> #define NVMF_TRADDR_SIZE 256
> #define NVMF_TSAS_SIZE 256
>
> +#define NVMF_CQT_MS 0
> +#define NVMF_CCR_LIMIT 4
> +
> #define NVME_DISC_SUBSYS_NAME "nqn.2014-08.org.nvmexpress.discovery"
>
> #define NVME_NSID_ALL 0xffffffff
> @@ -328,7 +331,10 @@ struct nvme_id_ctrl {
> __le16 crdt1;
> __le16 crdt2;
> __le16 crdt3;
> - __u8 rsvd134[122];
> + __u8 rsvd134[1];
> + __u8 ciu;
> + __le64 cirn;
> + __u8 rsvd144[112];
> __le16 oacs;
> __u8 acl;
> __u8 aerl;
> @@ -362,7 +368,9 @@ struct nvme_id_ctrl {
> __u8 anacap;
> __le32 anagrpmax;
> __le32 nanagrpid;
> - __u8 rsvd352[160];
> + __u8 rsvd352[34];
> + __le16 cqt;
> + __u8 rsvd388[124];
> __u8 sqes;
> __u8 cqes;
> __le16 maxcmd;
> @@ -389,7 +397,8 @@ struct nvme_id_ctrl {
> __u8 msdbd;
> __u8 rsvd1804[2];
> __u8 dctype;
> - __u8 rsvd1807[241];
> + __u8 ccrl;
> + __u8 rsvd1808[240];
> struct nvme_id_power_state psd[32];
> __u8 vs[1024];
> };
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare at suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
More information about the Linux-nvme
mailing list