[PATCH 4/4] nvme: Add two-pass shutdown support
Sagi Grimberg
sagi at grimberg.me
Mon Dec 25 01:58:04 PST 2023
On 12/21/23 19:22, Jeremy Allison wrote:
> This works with the two-pass shutdown mechanism setup for the PCI
> drivers and participates to provide the shutdown_wait
> method at the pci_driver structure level.
>
> Adds the new NVME_DISABLE_SHUTDOWN_ASYNC to enum shutdown_type.
> Changes the nvme shutdown() method to set the
> NVME_CC_SHN_NORMAL bit and then return to the caller when
> requested by NVME_DISABLE_SHUTDOWN_ASYNC.
>
> nvme_shutdown_wait() is added to call an internal
> nvme_wait_for_shutdown_cmpl() function to synchronously
> wait for the device to wait for the NVME_CSTS_SHST_CMPLT bit.
>
> This change speeds up the shutdown in a system which hosts
> many controllers.
>
> Signed-off-by: Jeremy Allison <jallison at ciq.com>
> Signed-off-by: Tanjore Suresh <tansuresh at google.com>
> ---
> drivers/nvme/host/core.c | 29 +++++++++++++++++++++++++++--
> drivers/nvme/host/nvme.h | 4 +++-
> drivers/nvme/host/pci.c | 24 ++++++++++++++++++++++--
> 3 files changed, 52 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index bc7040da8e74..2ebcd40106b7 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -2216,7 +2216,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, enum shutdown_type shutdown_type)
> int ret;
>
> ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
> - if (shutdown_type == NVME_DISABLE_SHUTDOWN_SYNC)
> + if (shutdown_type != NVME_DISABLE_RESET)
> ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
> else
> ctrl->ctrl_config &= ~NVME_CC_ENABLE;
> @@ -2225,10 +2225,24 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, enum shutdown_type shutdown_type)
> if (ret)
> return ret;
>
> - if (shutdown_type == NVME_DISABLE_SHUTDOWN_SYNC) {
> + switch (shutdown_type) {
> + case NVME_DISABLE_SHUTDOWN_ASYNC:
> + /*
> + * nvme_wait_for_shutdown_cmpl() will read the reply for this.
> + */
> + return ret;
> + case NVME_DISABLE_SHUTDOWN_SYNC:
> + /*
> + * Spin on the read of the control register.
> + */
> return nvme_wait_ready(ctrl, NVME_CSTS_SHST_MASK,
> NVME_CSTS_SHST_CMPLT,
> ctrl->shutdown_timeout, "shutdown");
> + case NVME_DISABLE_RESET:
> + /*
> + * Doing a reset here. Handle below.
> + */
> + break;
> }
> if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
> msleep(NVME_QUIRK_DELAY_AMOUNT);
> @@ -2237,6 +2251,17 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, enum shutdown_type shutdown_type)
> }
> EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
>
> +int nvme_wait_for_shutdown_cmpl(struct nvme_ctrl *ctrl)
> +{
> + ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
> + ctrl->ctrl_config |= NVME_CC_SHN_NORMAL;
Why is ctrl_config being set again?
> +
> + return nvme_wait_ready(ctrl, NVME_CSTS_SHST_MASK,
> + NVME_CSTS_SHST_CMPLT,
> + ctrl->shutdown_timeout, "shutdown");
> +}
> +EXPORT_SYMBOL_GPL(nvme_wait_for_shutdown_cmpl);
Why export the symbol?
> +
> int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
> {
> unsigned dev_page_min;
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index d880f1ee08d4..adbff23532de 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -189,7 +189,8 @@ enum {
>
> enum shutdown_type {
> NVME_DISABLE_RESET = 0,
> - NVME_DISABLE_SHUTDOWN_SYNC = 1
> + NVME_DISABLE_SHUTDOWN_SYNC = 1,
> + NVME_DISABLE_SHUTDOWN_ASYNC = 2
> };
>
> static inline struct nvme_request *nvme_req(struct request *req)
> @@ -756,6 +757,7 @@ void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl);
> bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
> enum nvme_ctrl_state new_state);
> int nvme_disable_ctrl(struct nvme_ctrl *ctrl, enum shutdown_type shutdown_type);
> +int nvme_wait_for_shutdown_cmpl(struct nvme_ctrl *ctrl);
> int nvme_enable_ctrl(struct nvme_ctrl *ctrl);
> int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
> const struct nvme_ctrl_ops *ops, unsigned long quirks);
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 77b015affb0b..9cb4436710dd 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -2586,7 +2586,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, enum shutdown_type shutdown_t
> * Give the controller a chance to complete all entered requests
> * if doing a safe shutdown.
> */
> - if (!dead && (shutdown_type == NVME_DISABLE_SHUTDOWN_SYNC))
> + if (!dead && (shutdown_type != NVME_DISABLE_RESET))
> nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
> }
>
> @@ -3100,7 +3100,26 @@ static void nvme_shutdown(struct pci_dev *pdev)
> {
> struct nvme_dev *dev = pci_get_drvdata(pdev);
>
> - nvme_disable_prepare_reset(dev, NVME_DISABLE_SHUTDOWN_SYNC);
> + nvme_disable_prepare_reset(dev, NVME_DISABLE_SHUTDOWN_ASYNC);
> +}
> +
> +static void nvme_shutdown_wait(struct pci_dev *pdev)
> +{
> + struct nvme_dev *dev = pci_get_drvdata(pdev);
> +
> + mutex_lock(&dev->shutdown_lock);
> + nvme_wait_for_shutdown_cmpl(&dev->ctrl);
If this is the only call-site? why not just open-code it here?
> +
> + /*
> + * The driver will not be starting up queues again if shutting down so
> + * must flush all entered requests to their failed completion to avoid
> + * deadlocking blk-mq hot-cpu notifier.
> + */
> + nvme_unquiesce_io_queues(&dev->ctrl);
> + if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q))
> + nvme_unquiesce_admin_queue(&dev->ctrl);
> +
> + mutex_unlock(&dev->shutdown_lock);
> }
>
> /*
> @@ -3492,6 +3511,7 @@ static struct pci_driver nvme_driver = {
> .probe = nvme_probe,
> .remove = nvme_remove,
> .shutdown = nvme_shutdown,
> + .shutdown_wait = nvme_shutdown_wait,
> .driver = {
> .probe_type = PROBE_PREFER_ASYNCHRONOUS,
> #ifdef CONFIG_PM_SLEEP
More information about the Linux-nvme
mailing list