[PATCH v4 03/15] nvmet: Implement CCR nvme command

Mon Apr 6 22:40:23 PDT 2026

On 3/31/26 18:38, Mohamed Khalfella wrote:
> On Mon 2026-03-30 12:45:57 +0200, Hannes Reinecke wrote:
>> On 3/28/26 01:43, Mohamed Khalfella wrote:
>>> Defined by TP8028 Rapid Path Failure Recovery, CCR (Cross-Controller
>>> Reset) command is an nvme command issued to source controller by
>>> initiator to reset impacted controller. Implement CCR command for linux
>>> nvme target.
>>>
>>> Signed-off-by: Mohamed Khalfella <mkhalfella at purestorage.com>
>>> ---
>>>    drivers/nvme/target/admin-cmd.c | 74 ++++++++++++++++++++++++++++++++
>>>    drivers/nvme/target/core.c      | 76 +++++++++++++++++++++++++++++++++
>>>    drivers/nvme/target/nvmet.h     | 13 ++++++
>>>    include/linux/nvme.h            | 23 ++++++++++
>>>    4 files changed, 186 insertions(+)
>>>
>>> diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
>>> index ec09e30eca18..0a37c0eeebb5 100644
>>> --- a/drivers/nvme/target/admin-cmd.c
>>> +++ b/drivers/nvme/target/admin-cmd.c
>>> @@ -376,7 +376,9 @@ static void nvmet_get_cmd_effects_admin(struct nvmet_ctrl *ctrl,
>>>    	log->acs[nvme_admin_get_features] =
>>>    	log->acs[nvme_admin_async_event] =
>>>    	log->acs[nvme_admin_keep_alive] =
>>> +	log->acs[nvme_admin_cross_ctrl_reset] =
>>>    		cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
>>> +
>>>    }
>>>    
>>>    static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log)
>>> @@ -1613,6 +1615,75 @@ void nvmet_execute_keep_alive(struct nvmet_req *req)
>>>    	nvmet_req_complete(req, status);
>>>    }
>>>    
>>> +void nvmet_execute_cross_ctrl_reset(struct nvmet_req *req)
>>> +{
>>> +	struct nvmet_ctrl *ictrl, *sctrl = req->sq->ctrl;
>>> +	struct nvme_command *cmd = req->cmd;
>>> +	struct nvmet_ccr *ccr, *new_ccr;
>>> +	int ccr_active, ccr_total;
>>> +	u16 cntlid, status = NVME_SC_SUCCESS;
>>> +
>>> +	cntlid = le16_to_cpu(cmd->ccr.icid);
>>> +	if (sctrl->cntlid == cntlid) {
>>> +		req->error_loc =
>>> +			offsetof(struct nvme_cross_ctrl_reset_cmd, icid);
>>> +		status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR;
>>> +		goto out;
>>> +	}
>>> +
>>> +	/* Find and get impacted controller */
>>> +	ictrl = nvmet_ctrl_find_get_ccr(sctrl->subsys, sctrl->hostnqn,
>>> +					cmd->ccr.ciu, cntlid,
>>> +					le64_to_cpu(cmd->ccr.cirn));
>>> +	if (!ictrl) {
>>> +		/* Immediate Reset Successful */
>>> +		nvmet_set_result(req, 1);
>>> +		status = NVME_SC_SUCCESS;
>>> +		goto out;
>>> +	}
>>> +
>>> +	ccr_total = ccr_active = 0;
>>> +	mutex_lock(&sctrl->lock);
>>> +	list_for_each_entry(ccr, &sctrl->ccr_list, entry) {
>>> +		if (ccr->ctrl == ictrl) {
>>> +			status = NVME_SC_CCR_IN_PROGRESS | NVME_STATUS_DNR;
>>> +			goto out_unlock;
>>> +		}
>>> +
>>> +		ccr_total++;
>>> +		if (ccr->ctrl)
>>> +			ccr_active++;
>>> +	}
>>> +
>>> +	if (ccr_active >= NVMF_CCR_LIMIT) {
>>> +		status = NVME_SC_CCR_LIMIT_EXCEEDED;
>>> +		goto out_unlock;
>>> +	}
>>> +	if (ccr_total >= NVMF_CCR_PER_PAGE) {
>>> +		status = NVME_SC_CCR_LOGPAGE_FULL;
>>> +		goto out_unlock;
>>> +	}
>>> +
>>> +	new_ccr = kmalloc_obj(*new_ccr, GFP_KERNEL);
>>> +	if (!new_ccr) {
>>> +		status = NVME_SC_INTERNAL;
>>> +		goto out_unlock;
>>> +	}
>>> +
>>> +	new_ccr->ciu = cmd->ccr.ciu;
>>> +	new_ccr->icid = cntlid;
>>> +	new_ccr->ctrl = ictrl;
>>> +	list_add_tail(&new_ccr->entry, &sctrl->ccr_list);
>>> +
>>> +out_unlock:
>>> +	mutex_unlock(&sctrl->lock);
>>> +	if (status == NVME_SC_SUCCESS)
>>> +		nvmet_ctrl_fatal_error(ictrl);
>>> +	nvmet_ctrl_put(ictrl);
>>> +out:
>>> +	nvmet_req_complete(req, status);
>>> +}
>>> +
>>>    u32 nvmet_admin_cmd_data_len(struct nvmet_req *req)
>>>    {
>>>    	struct nvme_command *cmd = req->cmd;
>>> @@ -1690,6 +1761,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
>>>    	case nvme_admin_keep_alive:
>>>    		req->execute = nvmet_execute_keep_alive;
>>>    		return 0;
>>> +	case nvme_admin_cross_ctrl_reset:
>>> +		req->execute = nvmet_execute_cross_ctrl_reset;
>>> +		return 0;
>>>    	default:
>>>    		return nvmet_report_invalid_opcode(req);
>>>    	}
>>> diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
>>> index e8b945a01f35..2e0c31d82bad 100644
>>> --- a/drivers/nvme/target/core.c
>>> +++ b/drivers/nvme/target/core.c
>>> @@ -117,6 +117,20 @@ u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
>>>    	return 0;
>>>    }
>>>    
>>> +void nvmet_ctrl_cleanup_ccrs(struct nvmet_ctrl *ctrl, bool all)
>>> +{
>>> +	struct nvmet_ccr *ccr, *tmp;
>>> +
>>> +	lockdep_assert_held(&ctrl->lock);
>>> +
>>> +	list_for_each_entry_safe(ccr, tmp, &ctrl->ccr_list, entry) {
>>> +		if (all || ccr->ctrl == NULL) {
>>> +			list_del(&ccr->entry);
>>> +			kfree(ccr);
>>> +		}
>>> +	}
>>> +}
>>> +
>>>    static u32 nvmet_max_nsid(struct nvmet_subsys *subsys)
>>>    {
>>>    	struct nvmet_ns *cur;
>>> @@ -1399,6 +1413,7 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
>>>    	if (!nvmet_is_disc_subsys(ctrl->subsys)) {
>>>    		ctrl->ciu = ((u8)(ctrl->ciu + 1)) ? : 1;
>>>    		ctrl->cirn = get_random_u64();
>>> +		nvmet_ctrl_cleanup_ccrs(ctrl, false);
>>>    	}
>>>    	ctrl->csts = NVME_CSTS_RDY;
>>>    
>>> @@ -1504,6 +1519,35 @@ struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
>>>    	return ctrl;
>>>    }
>>>    
>>> +struct nvmet_ctrl *nvmet_ctrl_find_get_ccr(struct nvmet_subsys *subsys,
>>> +					   const char *hostnqn, u8 ciu,
>>> +					   u16 cntlid, u64 cirn)
>>> +{
>>> +	struct nvmet_ctrl *ctrl, *ictrl = NULL;
>>> +	bool found = false;
>>> +
>>> +	mutex_lock(&subsys->lock);
>>> +	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
>>> +		if (ctrl->cntlid != cntlid)
>>> +			continue;
>>> +
>>> +		/* Avoid racing with a controller that is becoming ready */
>>> +		mutex_lock(&ctrl->lock);
>>> +		if (ctrl->ciu == ciu && ctrl->cirn == cirn)
>>> +			found = true;
>>> +		mutex_unlock(&ctrl->lock);
>>> +
>>> +		if (found) {
>>> +			if (kref_get_unless_zero(&ctrl->ref))
>>> +				ictrl = ctrl;
>>> +			break;
>>> +		}
>>> +	};
>>> +	mutex_unlock(&subsys->lock);
>>> +
>>> +	return ictrl;
>>> +}
>>> +
>>>    u16 nvmet_check_ctrl_status(struct nvmet_req *req)
>>>    {
>>>    	if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
>>> @@ -1629,6 +1673,7 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
>>>    		subsys->clear_ids = 1;
>>>    #endif
>>>    
>>> +	INIT_LIST_HEAD(&ctrl->ccr_list);
>>>    	INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
>>>    	INIT_LIST_HEAD(&ctrl->async_events);
>>>    	INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
>>> @@ -1739,12 +1784,43 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args)
>>>    }
>>>    EXPORT_SYMBOL_GPL(nvmet_alloc_ctrl);
>>>    
>>> +static void nvmet_ctrl_complete_pending_ccr(struct nvmet_ctrl *ctrl)
>>> +{
>>> +	struct nvmet_subsys *subsys = ctrl->subsys;
>>> +	struct nvmet_ctrl *sctrl;
>>> +	struct nvmet_ccr *ccr;
>>> +
>>> +	lockdep_assert_held(&subsys->lock);
>>> +
>>> +	/* Cleanup all CCRs issued by ctrl as source controller */
>>> +	mutex_lock(&ctrl->lock);
>>> +	nvmet_ctrl_cleanup_ccrs(ctrl, true);
>>> +	mutex_unlock(&ctrl->lock);
>>> +
>>> +	/*
>>> +	 * Find all CCRs targeting ctrl as impacted controller and
>>> +	 * set ccr->ctrl to NULL. This tells the source controller
>>> +	 * that CCR completed successfully.
>>> +	 */
>>> +	list_for_each_entry(sctrl, &subsys->ctrls, subsys_entry) {
>>> +		mutex_lock(&sctrl->lock);
>>> +		list_for_each_entry(ccr, &sctrl->ccr_list, entry) {
>>> +			if (ccr->ctrl == ctrl) {
>>> +				ccr->ctrl = NULL;
>>> +				break;
>>> +			}
>>> +		}
>>> +		mutex_unlock(&sctrl->lock);
>>> +	}
>>> +}
>>> +
>>
>> Do I see this correct that with this implementation a CCR is only
>> complete once the controller resets? IOW the CCR has to wait for
>> the controller to be reset, but it does not invoke a controller reset
>> itself?
>>
>> Is that intended?
> 
> nvmet_execute_cross_ctrl_reset() calls nvmet_ctrl_fatal_error() to cause
> impacted controller to fail. CCR is completed when the impacted
> controller exits.

Thanks for the explanation.

Reviewed-by: Hannes Reinecke <hare at suse.de>

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare at suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich