[PATCH v2 01/14] nvmet: Rapid Path Failure Recovery set controller identify fields

Hannes Reinecke hare at suse.de
Mon Feb 2 19:03:22 PST 2026


On 1/30/26 23:34, Mohamed Khalfella wrote:
> TP8028 Rapid Path Failure Recovery defined new fields in controller
> identify response. The newly defined fields are:
> 
> - CIU (Controller Instance UNIQUIFIER): is an 8bit non-zero value that
> is assigned a random value when controller first created. The value is
> expected to be incremented when RDY bit in CSTS register is asserted
> - CIRN (Controller Instance Random Number): is 64bit random value that
> gets generated when controller is crated. CIRN is regenerated everytime
> RDY bit is CSTS register is asserted.
> - CCRL (Cross-Controller Reset Limit) is an 8bit value that defines the
> maximum number of in-progress controller reset operations. CCRL is
> hardcoded to 4 as recommended by TP8028.
> 
> TP4129 KATO Corrections and Clarifications defined CQT (Command Quiesce
> Time) which is used along with KATO (Keep Alive Timeout) to set an upper
> time limit for attempting Cross-Controller Recovery. For NVME subsystem
> CQT is set to 0 by default to keep the current behavior. The value can
> be set from configfs if needed.
> 
> Make the new fields available for IO controllers only since TP8028 is
> not very useful for discovery controllers.
> 
> Signed-off-by: Mohamed Khalfella <mkhalfella at purestorage.com>
> ---
>   drivers/nvme/target/admin-cmd.c |  6 ++++++
>   drivers/nvme/target/configfs.c  | 31 +++++++++++++++++++++++++++++++
>   drivers/nvme/target/core.c      | 12 ++++++++++++
>   drivers/nvme/target/nvmet.h     |  4 ++++
>   include/linux/nvme.h            | 15 ++++++++++++---
>   5 files changed, 65 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
> index 3da31bb1183e..ade1145df72d 100644
> --- a/drivers/nvme/target/admin-cmd.c
> +++ b/drivers/nvme/target/admin-cmd.c
> @@ -696,6 +696,12 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
>   
>   	id->cntlid = cpu_to_le16(ctrl->cntlid);
>   	id->ver = cpu_to_le32(ctrl->subsys->ver);
> +	if (!nvmet_is_disc_subsys(ctrl->subsys)) {
> +		id->cqt = cpu_to_le16(ctrl->cqt);
> +		id->ciu = ctrl->ciu;
> +		id->cirn = cpu_to_le64(ctrl->cirn);
> +		id->ccrl = NVMF_CCR_LIMIT;
> +	}
>   
>   	/* XXX: figure out what to do about RTD3R/RTD3 */
>   	id->oaes = cpu_to_le32(NVMET_AEN_CFG_OPTIONAL);
> diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
> index e44ef69dffc2..035f6e75a818 100644
> --- a/drivers/nvme/target/configfs.c
> +++ b/drivers/nvme/target/configfs.c
> @@ -1636,6 +1636,36 @@ static ssize_t nvmet_subsys_attr_pi_enable_store(struct config_item *item,
>   CONFIGFS_ATTR(nvmet_subsys_, attr_pi_enable);
>   #endif
>   
> +static ssize_t nvmet_subsys_attr_cqt_show(struct config_item *item,
> +					  char *page)
> +{
> +	return snprintf(page, PAGE_SIZE, "%u\n", to_subsys(item)->cqt);
> +}
> +
> +static ssize_t nvmet_subsys_attr_cqt_store(struct config_item *item,
> +					   const char *page, size_t cnt)
> +{
> +	struct nvmet_subsys *subsys = to_subsys(item);
> +	struct nvmet_ctrl *ctrl;
> +	u16 cqt;
> +
> +	if (sscanf(page, "%hu\n", &cqt) != 1)
> +		return -EINVAL;
> +
> +	down_write(&nvmet_config_sem);
> +	if (subsys->cqt == cqt)
> +		goto out;
> +
> +	subsys->cqt = cqt;
> +	/* Force reconnect */
> +	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
> +		ctrl->ops->delete_ctrl(ctrl);
> +out:
> +	up_write(&nvmet_config_sem);
> +	return cnt;
> +}
> +CONFIGFS_ATTR(nvmet_subsys_, attr_cqt);
> +
>   static ssize_t nvmet_subsys_attr_qid_max_show(struct config_item *item,
>   					      char *page)
>   {
> @@ -1676,6 +1706,7 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = {
>   	&nvmet_subsys_attr_attr_vendor_id,
>   	&nvmet_subsys_attr_attr_subsys_vendor_id,
>   	&nvmet_subsys_attr_attr_model,
> +	&nvmet_subsys_attr_attr_cqt,
>   	&nvmet_subsys_attr_attr_qid_max,
>   	&nvmet_subsys_attr_attr_ieee_oui,
>   	&nvmet_subsys_attr_attr_firmware,

I do think that TP8028 (ie the CQT defintions) are somewhat independent
on CCR. So I'm not sure if they should be integrated in this patchset;
personally I would prefer to have it moved to another patchset.

> diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
> index cc88e5a28c8a..0d2a1206e08f 100644
> --- a/drivers/nvme/target/core.c
> +++ b/drivers/nvme/target/core.c
> @@ -1393,6 +1393,10 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
>   		return;
>   	}
>   
> +	if (!nvmet_is_disc_subsys(ctrl->subsys)) {
> +		ctrl->ciu = ((u8)(ctrl->ciu + 1)) ? : 1;
> +		ctrl->cirn = get_random_u64();
> +	}
>   	ctrl->csts = NVME_CSTS_RDY;
>   
>   	/*
> @@ -1661,6 +1665,12 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
>   	}
>   	ctrl->cntlid = ret;
>   
> +	if (!nvmet_is_disc_subsys(ctrl->subsys)) {
> +		ctrl->cqt = subsys->cqt;
> +		ctrl->ciu = get_random_u8() ? : 1;
> +		ctrl->cirn = get_random_u64();
> +	}
> +
>   	/*
>   	 * Discovery controllers may use some arbitrary high value
>   	 * in order to cleanup stale discovery sessions
> @@ -1853,10 +1863,12 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
>   
>   	switch (type) {
>   	case NVME_NQN_NVME:
> +		subsys->cqt = NVMF_CQT_MS;
>   		subsys->max_qid = NVMET_NR_QUEUES;
>   		break;

And I would not set the CQT default here.
Thing is, implementing CQT to the letter would inflict a CQT delay
during failover for _every_ installation, thereby resulting in a
regression to previous implementations where we would fail over
with _no_ delay.
So again, we should make it a different patchset.

>   	case NVME_NQN_DISC:
>   	case NVME_NQN_CURR:
> +		subsys->cqt = 0;
>   		subsys->max_qid = 0;
>   		break;
>   	default:
> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
> index b664b584fdc8..f5d9a01ec60c 100644
> --- a/drivers/nvme/target/nvmet.h
> +++ b/drivers/nvme/target/nvmet.h
> @@ -264,7 +264,10 @@ struct nvmet_ctrl {
>   
>   	uuid_t			hostid;
>   	u16			cntlid;
> +	u16			cqt;
> +	u8			ciu;
>   	u32			kato;
> +	u64			cirn;
>   
>   	struct nvmet_port	*port;
>   
> @@ -331,6 +334,7 @@ struct nvmet_subsys {
>   #ifdef CONFIG_NVME_TARGET_DEBUGFS
>   	struct dentry		*debugfs_dir;
>   #endif
> +	u16			cqt;
>   	u16			max_qid;
>   
>   	u64			ver;
> diff --git a/include/linux/nvme.h b/include/linux/nvme.h
> index 655d194f8e72..5135cdc3c120 100644
> --- a/include/linux/nvme.h
> +++ b/include/linux/nvme.h
> @@ -21,6 +21,9 @@
>   #define NVMF_TRADDR_SIZE	256
>   #define NVMF_TSAS_SIZE		256
>   
> +#define NVMF_CQT_MS		0
> +#define NVMF_CCR_LIMIT		4
> +
>   #define NVME_DISC_SUBSYS_NAME	"nqn.2014-08.org.nvmexpress.discovery"
>   
>   #define NVME_NSID_ALL		0xffffffff
> @@ -328,7 +331,10 @@ struct nvme_id_ctrl {
>   	__le16			crdt1;
>   	__le16			crdt2;
>   	__le16			crdt3;
> -	__u8			rsvd134[122];
> +	__u8			rsvd134[1];
> +	__u8			ciu;
> +	__le64			cirn;
> +	__u8			rsvd144[112];
>   	__le16			oacs;
>   	__u8			acl;
>   	__u8			aerl;
> @@ -362,7 +368,9 @@ struct nvme_id_ctrl {
>   	__u8			anacap;
>   	__le32			anagrpmax;
>   	__le32			nanagrpid;
> -	__u8			rsvd352[160];
> +	__u8			rsvd352[34];
> +	__le16			cqt;
> +	__u8			rsvd388[124];
>   	__u8			sqes;
>   	__u8			cqes;
>   	__le16			maxcmd;
> @@ -389,7 +397,8 @@ struct nvme_id_ctrl {
>   	__u8			msdbd;
>   	__u8			rsvd1804[2];
>   	__u8			dctype;
> -	__u8			rsvd1807[241];
> +	__u8			ccrl;
> +	__u8			rsvd1808[240];
>   	struct nvme_id_power_state	psd[32];
>   	__u8			vs[1024];
>   };

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare at suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich



More information about the Linux-nvme mailing list