[PATCH v4 09/15] nvme: Implement cross-controller reset completion

Mon Apr 6 22:48:50 PDT 2026

On 3/31/26 18:55, Mohamed Khalfella wrote:
> On Mon 2026-03-30 12:53:07 +0200, Hannes Reinecke wrote:
>> On 3/28/26 01:43, Mohamed Khalfella wrote:
>>> An nvme source controller that issues CCR command expects to receive an
>>> NVME_AER_NOTICE_CCR_COMPLETED when pending CCR succeeds or fails. Add
>>> sctrl->ccr_work to read NVME_LOG_CCR logpage and wakeup any thread
>>> waiting on CCR completion.
>>>
>>> Signed-off-by: Mohamed Khalfella <mkhalfella at purestorage.com>
>>> ---
>>>    drivers/nvme/host/core.c | 49 +++++++++++++++++++++++++++++++++++++++-
>>>    drivers/nvme/host/nvme.h |  1 +
>>>    2 files changed, 49 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>>> index 5603ae36444f..793f203bfc38 100644
>>> --- a/drivers/nvme/host/core.c
>>> +++ b/drivers/nvme/host/core.c
>>> @@ -1920,7 +1920,8 @@ EXPORT_SYMBOL_GPL(nvme_set_queue_count);
>>>    
>>>    #define NVME_AEN_SUPPORTED \
>>>    	(NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | \
>>> -	 NVME_AEN_CFG_ANA_CHANGE | NVME_AEN_CFG_DISC_CHANGE)
>>> +	 NVME_AEN_CFG_ANA_CHANGE | NVME_AEN_CFG_CCR_COMPLETE | \
>>> +	 NVME_AEN_CFG_DISC_CHANGE)
>>>    
>>>    static void nvme_enable_aen(struct nvme_ctrl *ctrl)
>>>    {
>>> @@ -4873,6 +4874,47 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
>>>    	kfree(log);
>>>    }
>>>    
>>> +static void nvme_ccr_work(struct work_struct *work)
>>> +{
>>> +	struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, ccr_work);
>>> +	struct nvme_ccr_entry *ccr;
>>> +	struct nvme_ccr_log_entry *entry;
>>> +	struct nvme_ccr_log *log;
>>> +	unsigned long flags;
>>> +	int ret, i;
>>> +
>>> +	log = kmalloc(sizeof(*log), GFP_KERNEL);
>>> +	if (!log)
>>> +		return;
>>> +
>>> +	ret = nvme_get_log(ctrl, 0, NVME_LOG_CCR, 0x01,
>>> +			   0x00, log, sizeof(*log), 0);
>>> +	if (ret)
>>> +		goto out;
>>> +
>>> +	spin_lock_irqsave(&ctrl->lock, flags);
>>> +	for (i = 0; i < le16_to_cpu(log->ne); i++) {
>>> +		entry = &log->entries[i];
>>> +		if (entry->ccrs == NVME_CCR_STATUS_IN_PROGRESS)
>>> +			continue;
>>> +
>>> +		list_for_each_entry(ccr, &ctrl->ccr_list, list) {
>>> +			struct nvme_ctrl *ictrl = ccr->ictrl;
>>> +
>>> +			if (ictrl->cntlid != le16_to_cpu(entry->icid) ||
>>> +			    ictrl->ciu != entry->ciu)
>>> +				continue;
>>> +
>>> +			/* Complete matching entry */
>>> +			ccr->ccrs = entry->ccrs;
>>> +			complete(&ccr->complete);
>>> +		}
>>> +	}
>>> +	spin_unlock_irqrestore(&ctrl->lock, flags);
>>> +out:
>>> +	kfree(log);
>>> +}
>>> +
>>>    static void nvme_fw_act_work(struct work_struct *work)
>>>    {
>>>    	struct nvme_ctrl *ctrl = container_of(work,
>>> @@ -4949,6 +4991,9 @@ static bool nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
>>>    	case NVME_AER_NOTICE_DISC_CHANGED:
>>>    		ctrl->aen_result = result;
>>>    		break;
>>> +	case NVME_AER_NOTICE_CCR_COMPLETED:
>>> +		queue_work(nvme_wq, &ctrl->ccr_work);
>>> +		break;
>>>    	default:
>>>    		dev_warn(ctrl->device, "async event result %08x\n", result);
>>>    	}
>>> @@ -5144,6 +5189,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
>>>    	nvme_stop_failfast_work(ctrl);
>>>    	flush_work(&ctrl->async_event_work);
>>>    	cancel_work_sync(&ctrl->fw_act_work);
>>> +	cancel_work_sync(&ctrl->ccr_work);
>>>    	if (ctrl->ops->stop_ctrl)
>>>    		ctrl->ops->stop_ctrl(ctrl);
>>>    }
>>> @@ -5267,6 +5313,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
>>>    	ctrl->quirks = quirks;
>>>    	ctrl->numa_node = NUMA_NO_NODE;
>>>    	INIT_WORK(&ctrl->scan_work, nvme_scan_work);
>>> +	INIT_WORK(&ctrl->ccr_work, nvme_ccr_work);
>>>    	INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
>>>    	INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
>>>    	INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
>>> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
>>> index f2bcff9ccd25..776ee8aa5a93 100644
>>> --- a/drivers/nvme/host/nvme.h
>>> +++ b/drivers/nvme/host/nvme.h
>>> @@ -419,6 +419,7 @@ struct nvme_ctrl {
>>>    	struct nvme_effects_log *effects;
>>>    	struct xarray cels;
>>>    	struct work_struct scan_work;
>>> +	struct work_struct ccr_work;
>>>    	struct work_struct async_event_work;
>>>    	struct delayed_work ka_work;
>>>    	struct delayed_work failfast_work;
>>
>> Hmm. The 'nvme_fence_ctrl' operation introduced in the previous patch
>> is synchronous, yet in this patch we're looking a a log page to figure
>> out if the cross-controller reset is complete.
>> Which is slightly irritating.
>> Wouldn't it be better to make the 'nvme_fence_ctrl' operation
>> asynchronous, and then have a separate function to wait for the fence
>> operation to complete (which then could look at log pages etc)?
> 
> True nvme_fence_ctrl() is synchronous, but it runs in from ctrl->fencing_work.
> What is it that you find irritating about nvme_fence_ctrl()?
> 

Thins is, in order to make nvme_fence_ctrl() synchronous we have to
wait for the operation itself (which is asynchronous) to complete.
And that wait in itself is implemented by a wait queue.
So we're having a wait queue calling nvme_fence_ctrl(), which calls
another wait queue waiting for a completion.
And then (if the IRS bit is not set) calling another waitqueue for
checking the log page.

I think we could simplify this by simply making nvme_fence_ctrl() 
asynchronous, which could do away with all the workqueue handling.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare at suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich