[PATCH 11/12] nvme-pci: split the initial probe from the rest path
Sagi Grimberg
sagi at grimberg.me
Tue Nov 8 19:14:02 PST 2022
Nice
On 11/8/22 17:02, Christoph Hellwig wrote:
> nvme_reset_work is a little fragile as it needs to handle both resetting
> a live controller and initializing one during probe. Split out the initial
> probe and open code it in nvme_probe and leave nvme_reset_work to just do
> the live controller reset.
>
> This fixes a recently introduced bug where nvme_dev_disable causes a NULL
> pointer dereferences in blk_mq_quiesce_tagset because the tagset pointer
> is not set when the reset state is entered directly from the new state.
> The separate probe code can skip the reset state and probe directly and
> fixes this.
>
> To make sure the system isn't single threaded on enabling nvme
> controllers, set the PROBE_PREFER_ASYNCHRONOUS flag in the device_driver
> structure so that the driver core probes in parallel.
>
> Fixes: 98d81f0df70c ("nvme: use blk_mq_[un]quiesce_tagset")
> Reported-by: Gerd Bayer <gbayer at linux.ibm.com>
> Signed-off-by: Christoph Hellwig <hch at lst.de>
> ---
> drivers/nvme/host/pci.c | 139 ++++++++++++++++++++++++----------------
> 1 file changed, 83 insertions(+), 56 deletions(-)
>
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 1b3c96a4b7c90..1c8c70767cb8a 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -2821,15 +2821,7 @@ static void nvme_reset_work(struct work_struct *work)
> result = nvme_pci_enable(dev);
> if (result)
> goto out_unlock;
> -
> - if (!dev->ctrl.admin_q) {
> - result = nvme_pci_alloc_admin_tag_set(dev);
> - if (result)
> - goto out_unlock;
> - } else {
> - nvme_start_admin_queue(&dev->ctrl);
> - }
> -
> + nvme_start_admin_queue(&dev->ctrl);
> mutex_unlock(&dev->shutdown_lock);
>
> /*
> @@ -2854,9 +2846,6 @@ static void nvme_reset_work(struct work_struct *work)
> */
> memset(dev->dbbuf_dbs, 0, nvme_dbbuf_size(dev));
> memset(dev->dbbuf_eis, 0, nvme_dbbuf_size(dev));
> - } else {
> - if (dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP)
> - nvme_dbbuf_dma_alloc(dev);
> }
>
> if (dev->ctrl.hmpre) {
> @@ -2869,37 +2858,23 @@ static void nvme_reset_work(struct work_struct *work)
> if (result)
> goto out;
>
> - if (dev->ctrl.tagset) {
> - /*
> - * This is a controller reset and we already have a tagset.
> - * Freeze and update the number of I/O queues as thos might have
> - * changed. If there are no I/O queues left after this reset,
> - * keep the controller around but remove all namespaces.
> - */
> - if (dev->online_queues > 1) {
> - nvme_start_queues(&dev->ctrl);
> - nvme_wait_freeze(&dev->ctrl);
> - nvme_pci_update_nr_queues(dev);
> - nvme_dbbuf_set(dev);
> - nvme_unfreeze(&dev->ctrl);
> - } else {
> - dev_warn(dev->ctrl.device, "IO queues lost\n");
> - nvme_mark_namespaces_dead(&dev->ctrl);
> - nvme_start_queues(&dev->ctrl);
> - nvme_remove_namespaces(&dev->ctrl);
> - nvme_free_tagset(dev);
> - }
> + /*
> + * Freeze and update the number of I/O queues as thos might have
> + * changed. If there are no I/O queues left after this reset, keep the
> + * controller around but remove all namespaces.
> + */
> + if (dev->online_queues > 1) {
> + nvme_start_queues(&dev->ctrl);
> + nvme_wait_freeze(&dev->ctrl);
> + nvme_pci_update_nr_queues(dev);
> + nvme_dbbuf_set(dev);
> + nvme_unfreeze(&dev->ctrl);
> } else {
> - /*
> - * First probe. Still allow the controller to show up even if
> - * there are no namespaces.
> - */
> - if (dev->online_queues > 1) {
> - nvme_pci_alloc_tag_set(dev);
> - nvme_dbbuf_set(dev);
> - } else {
> - dev_warn(dev->ctrl.device, "IO queues not created\n");
> - }
> + dev_warn(dev->ctrl.device, "IO queues lost\n");
> + nvme_mark_namespaces_dead(&dev->ctrl);
> + nvme_start_queues(&dev->ctrl);
> + nvme_remove_namespaces(&dev->ctrl);
Is this needed? isn't nvme_remove coming soon?
In fact, shouldn't all these calls be in nvme_remove?
More information about the Linux-nvme
mailing list