[PATCH v2 07/14] nvme: Introduce FENCING and FENCED controller states

Hannes Reinecke hare at suse.de
Mon Feb 2 21:07:35 PST 2026


On 1/30/26 23:34, Mohamed Khalfella wrote:
> FENCING is a new controller state that a LIVE controller enter when an
> error is encountered. While in FENCING state inflight IOs that timeout
> are not canceled because they should be held until either CCR succeeds
> or time-based recovery completes. While the queues remain alive requests
> are not allowed to be sent in this state and the controller can not be
> reset of deleted. This is intentional because resetting or deleting the
> controller results in canceling inflight IOs.
> 
> FENCED is a short-term state the controller enters before it is reset.
> It exists only to prevent manual resets to happen while controller is
> in FENCING state.
> 
> Signed-off-by: Mohamed Khalfella <mkhalfella at purestorage.com>
> ---
>   drivers/nvme/host/core.c  | 25 +++++++++++++++++++++++--
>   drivers/nvme/host/nvme.h  |  4 ++++
>   drivers/nvme/host/sysfs.c |  2 ++
>   3 files changed, 29 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 8961d612ccb0..3e1e02822dd4 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -574,10 +574,29 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
>   			break;
>   		}
>   		break;
> +	case NVME_CTRL_FENCING:
> +		switch (old_state) {
> +		case NVME_CTRL_LIVE:
> +			changed = true;
> +			fallthrough;
> +		default:
> +			break;
> +		}
> +		break;
 > +	case NVME_CTRL_FENCED:> +		switch (old_state) {
> +		case NVME_CTRL_FENCING:
> +			changed = true;
> +			fallthrough;
> +		default:
> +			break;
> +		}
> +		break;
>   	case NVME_CTRL_RESETTING:
>   		switch (old_state) {
>   		case NVME_CTRL_NEW:
>   		case NVME_CTRL_LIVE:
> +		case NVME_CTRL_FENCED:
>   			changed = true;
>   			fallthrough;
>   		default:
> @@ -760,6 +779,7 @@ blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
>   
>   	if (state != NVME_CTRL_DELETING_NOIO &&
>   	    state != NVME_CTRL_DELETING &&
> +	    state != NVME_CTRL_FENCING &&

Shouldn't 'FENCED' be in here, too?

>   	    state != NVME_CTRL_DEAD &&
>   	    !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
>   	    !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
> @@ -802,10 +822,11 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
>   			     req->cmd->fabrics.fctype == nvme_fabrics_type_auth_receive))
>   				return true;
>   			break;
> -		default:
> -			break;
> +		case NVME_CTRL_FENCING:

Similar here.

>   		case NVME_CTRL_DEAD:
>   			return false;
> +		default:
> +			break;
>   		}
>   	}
>   
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index 9dd9f179ad88..00866bbc66f3 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -251,6 +251,8 @@ static inline u16 nvme_req_qid(struct request *req)
>   enum nvme_ctrl_state {
>   	NVME_CTRL_NEW,
>   	NVME_CTRL_LIVE,
> +	NVME_CTRL_FENCING,
> +	NVME_CTRL_FENCED,
>   	NVME_CTRL_RESETTING,
>   	NVME_CTRL_CONNECTING,
>   	NVME_CTRL_DELETING,
> @@ -777,6 +779,8 @@ static inline bool nvme_state_terminal(struct nvme_ctrl *ctrl)
>   	switch (nvme_ctrl_state(ctrl)) {
>   	case NVME_CTRL_NEW:
>   	case NVME_CTRL_LIVE:
> +	case NVME_CTRL_FENCING:
> +	case NVME_CTRL_FENCED:
>   	case NVME_CTRL_RESETTING:
>   	case NVME_CTRL_CONNECTING:
>   		return false;
> diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
> index f81bbb6ec768..4ec9dfeb736e 100644
> --- a/drivers/nvme/host/sysfs.c
> +++ b/drivers/nvme/host/sysfs.c
> @@ -443,6 +443,8 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
>   	static const char *const state_name[] = {
>   		[NVME_CTRL_NEW]		= "new",
>   		[NVME_CTRL_LIVE]	= "live",
> +		[NVME_CTRL_FENCING]	= "fencing",
> +		[NVME_CTRL_FENCED]	= "fenced",
>   		[NVME_CTRL_RESETTING]	= "resetting",
>   		[NVME_CTRL_CONNECTING]	= "connecting",
>   		[NVME_CTRL_DELETING]	= "deleting",

You need to modify nvme-tcp.c:nvme_tcp_timeout() too, as this checks
'just' for 'LIVE' state and will abort/terminate commands when in
FENCING. Similar argument for nvme-rdma.c. And nvme-fc.c also needs an
audit to ensure it works correctly.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare at suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich



More information about the Linux-nvme mailing list