[PATCH v4 09/15] nvme: Implement cross-controller reset completion

Randy Jennings randyj at purestorage.com
Thu May 14 19:49:25 PDT 2026


On Tue, Apr 7, 2026 at 12:09 PM Mohamed Khalfella
<mkhalfella at purestorage.com> wrote:
>
> On Tue 2026-04-07 07:48:50 +0200, Hannes Reinecke wrote:
> > On 3/31/26 18:55, Mohamed Khalfella wrote:
> > > On Mon 2026-03-30 12:53:07 +0200, Hannes Reinecke wrote:
> > >> On 3/28/26 01:43, Mohamed Khalfella wrote:
> > >>> An nvme source controller that issues CCR command expects to receive an
> > >>> NVME_AER_NOTICE_CCR_COMPLETED when pending CCR succeeds or fails. Add
> > >>> sctrl->ccr_work to read NVME_LOG_CCR logpage and wakeup any thread
> > >>> waiting on CCR completion.
> > >>>
> > >>> Signed-off-by: Mohamed Khalfella <mkhalfella at purestorage.com>
> > >>> ---
> > >>>    drivers/nvme/host/core.c | 49 +++++++++++++++++++++++++++++++++++++++-
> > >>>    drivers/nvme/host/nvme.h |  1 +
> > >>>    2 files changed, 49 insertions(+), 1 deletion(-)
> > >>>
> > >>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> > >>> index 5603ae36444f..793f203bfc38 100644
> > >>> --- a/drivers/nvme/host/core.c
> > >>> +++ b/drivers/nvme/host/core.c
> > >>> @@ -1920,7 +1920,8 @@ EXPORT_SYMBOL_GPL(nvme_set_queue_count);
> > >>>
> > >>>    #define NVME_AEN_SUPPORTED \
> > >>>           (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | \
> > >>> -  NVME_AEN_CFG_ANA_CHANGE | NVME_AEN_CFG_DISC_CHANGE)
> > >>> +  NVME_AEN_CFG_ANA_CHANGE | NVME_AEN_CFG_CCR_COMPLETE | \
> > >>> +  NVME_AEN_CFG_DISC_CHANGE)
> > >>>
> > >>>    static void nvme_enable_aen(struct nvme_ctrl *ctrl)
> > >>>    {
> > >>> @@ -4873,6 +4874,47 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
> > >>>           kfree(log);
> > >>>    }
> > >>>
> > >>> +static void nvme_ccr_work(struct work_struct *work)
> > >>> +{
> > >>> + struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, ccr_work);
> > >>> + struct nvme_ccr_entry *ccr;
> > >>> + struct nvme_ccr_log_entry *entry;
> > >>> + struct nvme_ccr_log *log;
> > >>> + unsigned long flags;
> > >>> + int ret, i;
> > >>> +
> > >>> + log = kmalloc(sizeof(*log), GFP_KERNEL);
> > >>> + if (!log)
> > >>> +         return;
> > >>> +
> > >>> + ret = nvme_get_log(ctrl, 0, NVME_LOG_CCR, 0x01,
> > >>> +                    0x00, log, sizeof(*log), 0);
> > >>> + if (ret)
> > >>> +         goto out;
> > >>> +
> > >>> + spin_lock_irqsave(&ctrl->lock, flags);
> > >>> + for (i = 0; i < le16_to_cpu(log->ne); i++) {
> > >>> +         entry = &log->entries[i];
> > >>> +         if (entry->ccrs == NVME_CCR_STATUS_IN_PROGRESS)
> > >>> +                 continue;
> > >>> +
> > >>> +         list_for_each_entry(ccr, &ctrl->ccr_list, list) {
> > >>> +                 struct nvme_ctrl *ictrl = ccr->ictrl;
> > >>> +
> > >>> +                 if (ictrl->cntlid != le16_to_cpu(entry->icid) ||
> > >>> +                     ictrl->ciu != entry->ciu)
> > >>> +                         continue;
> > >>> +
> > >>> +                 /* Complete matching entry */
> > >>> +                 ccr->ccrs = entry->ccrs;
> > >>> +                 complete(&ccr->complete);
> > >>> +         }
> > >>> + }
> > >>> + spin_unlock_irqrestore(&ctrl->lock, flags);
> > >>> +out:
> > >>> + kfree(log);
> > >>> +}
> > >>> +
> > >>>    static void nvme_fw_act_work(struct work_struct *work)
> > >>>    {
> > >>>           struct nvme_ctrl *ctrl = container_of(work,
> > >>> @@ -4949,6 +4991,9 @@ static bool nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
> > >>>           case NVME_AER_NOTICE_DISC_CHANGED:
> > >>>                   ctrl->aen_result = result;
> > >>>                   break;
> > >>> + case NVME_AER_NOTICE_CCR_COMPLETED:
> > >>> +         queue_work(nvme_wq, &ctrl->ccr_work);
> > >>> +         break;
> > >>>           default:
> > >>>                   dev_warn(ctrl->device, "async event result %08x\n", result);
> > >>>           }
> > >>> @@ -5144,6 +5189,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
> > >>>           nvme_stop_failfast_work(ctrl);
> > >>>           flush_work(&ctrl->async_event_work);
> > >>>           cancel_work_sync(&ctrl->fw_act_work);
> > >>> + cancel_work_sync(&ctrl->ccr_work);
> > >>>           if (ctrl->ops->stop_ctrl)
> > >>>                   ctrl->ops->stop_ctrl(ctrl);
> > >>>    }
> > >>> @@ -5267,6 +5313,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
> > >>>           ctrl->quirks = quirks;
> > >>>           ctrl->numa_node = NUMA_NO_NODE;
> > >>>           INIT_WORK(&ctrl->scan_work, nvme_scan_work);
> > >>> + INIT_WORK(&ctrl->ccr_work, nvme_ccr_work);
> > >>>           INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
> > >>>           INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
> > >>>           INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
> > >>> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> > >>> index f2bcff9ccd25..776ee8aa5a93 100644
> > >>> --- a/drivers/nvme/host/nvme.h
> > >>> +++ b/drivers/nvme/host/nvme.h
> > >>> @@ -419,6 +419,7 @@ struct nvme_ctrl {
> > >>>           struct nvme_effects_log *effects;
> > >>>           struct xarray cels;
> > >>>           struct work_struct scan_work;
> > >>> + struct work_struct ccr_work;
> > >>>           struct work_struct async_event_work;
> > >>>           struct delayed_work ka_work;
> > >>>           struct delayed_work failfast_work;
> > >>
> > >> Hmm. The 'nvme_fence_ctrl' operation introduced in the previous patch
> > >> is synchronous, yet in this patch we're looking a a log page to figure
> > >> out if the cross-controller reset is complete.
> > >> Which is slightly irritating.
> > >> Wouldn't it be better to make the 'nvme_fence_ctrl' operation
> > >> asynchronous, and then have a separate function to wait for the fence
> > >> operation to complete (which then could look at log pages etc)?
> > >
> > > True nvme_fence_ctrl() is synchronous, but it runs in from ctrl->fencing_work.
> > > What is it that you find irritating about nvme_fence_ctrl()?
> > >
> >
> > Thins is, in order to make nvme_fence_ctrl() synchronous we have to
> > wait for the operation itself (which is asynchronous) to complete.
> > And that wait in itself is implemented by a wait queue.
> > So we're having a wait queue calling nvme_fence_ctrl(), which calls
> > another wait queue waiting for a completion.
> > And then (if the IRS bit is not set) calling another waitqueue for
> > checking the log page.
>
> There is no point of checking the CCR logpage before getting AEN. Sure
> we can implement some sort of polling, but I do not think this is the
> right approach.
>
> >
> > I think we could simplify this by simply making nvme_fence_ctrl()
> > asynchronous, which could do away with all the workqueue handling.
>
> I am not sure I understand exactly how nvme_fence_ctrl() can be make
> asynchronous. Can you provide example code?

I was able to convince Hannes at LSF to withdraw the request for an
asynchronous structure for the code.

Sincerely,
Randy Jennings



More information about the Linux-nvme mailing list