[PATCH] NVMe: Async probe
Indraneel Mukherjee
indraneel.m at samsung.com
Thu Feb 12 06:52:02 PST 2015
> -----Original Message-----
> From: Linux-nvme [mailto:linux-nvme-bounces at lists.infradead.org] On Behalf
> Of Keith Busch
> Sent: Thursday, February 12, 2015 6:41 AM
> To: linux-nvme at lists.infradead.org; willy at linux.intel.com
> Cc: Keith Busch
> Subject: [PATCH] NVMe: Async probe
>
> This performs the longest parts of nvme device probe asynchronously. This
> speeds up probe significantly when multiple devices are in use.
>
> Just to drive how important this is for many distros, 'systemd' sends a
fatal signal
> to the modprobe routine during boot if you have a lot of NVMe drives; it
might
> take a while to initailize them and exceed some arbitrary timeout that no
one
> knows how to change. The result is only a subset of your drives are
discovered
> after boot since nvme probe bails on devices when fatal_signal_pending is
set.
>
> Signed-off-by: Keith Busch <keith.busch at intel.com>
> ---
> drivers/block/nvme-core.c | 55
++++++++++++++++++++++++---------------------
> include/linux/nvme.h | 2 ++
> 2 files changed, 32 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index
> b3cb67d..89cb4c5 100644
> --- a/drivers/block/nvme-core.c
> +++ b/drivers/block/nvme-core.c
> @@ -2799,6 +2799,8 @@ static void nvme_reset_workfn(struct work_struct
> *work)
> dev->reset_workfn(work);
> }
>
> +static void nvme_async_probe(void *data, async_cookie_t cookie);
> +
> static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id
*id) {
> int node, result = -ENOMEM;
> @@ -2833,48 +2835,50 @@ static int nvme_probe(struct pci_dev *pdev, const
> struct pci_device_id *id)
> if (result)
> goto release;
>
> + dev->probe_cookie = async_schedule(nvme_async_probe, dev);
> + return result;
> +
> + release:
> + nvme_release_instance(dev);
> + put_pci:
> + pci_dev_put(dev->pci_dev);
> + free:
> + kfree(dev->queues);
> + kfree(dev->entry);
> + kfree(dev);
> + return result;
> +}
> +
> +static void nvme_async_probe(void *data, async_cookie_t cookie) {
> + struct nvme_dev *dev = data;
> + int result;
> +
> kref_init(&dev->kref);
> result = nvme_dev_start(dev);
> if (result)
> - goto release_pools;
> + goto reset;
>
> if (dev->online_queues > 1)
> result = nvme_dev_add(dev);
> if (result)
> - goto shutdown;
> + goto reset;
>
> scnprintf(dev->name, sizeof(dev->name), "nvme%d", dev->instance);
> dev->miscdev.minor = MISC_DYNAMIC_MINOR;
> - dev->miscdev.parent = &pdev->dev;
> + dev->miscdev.parent = &dev->pci_dev->dev;
> dev->miscdev.name = dev->name;
> dev->miscdev.fops = &nvme_dev_fops;
> result = misc_register(&dev->miscdev);
> if (result)
> - goto remove;
> + goto reset;
>
> nvme_set_irq_hints(dev);
> -
> dev->initialized = 1;
> - return 0;
> -
> - remove:
> - nvme_dev_remove(dev);
> - nvme_dev_remove_admin(dev);
> - nvme_free_namespaces(dev);
> - shutdown:
> - nvme_dev_shutdown(dev);
> - release_pools:
> - nvme_free_queues(dev, 0);
> - nvme_release_prp_pools(dev);
> - release:
> - nvme_release_instance(dev);
> - put_pci:
> - pci_dev_put(dev->pci_dev);
> - free:
> - kfree(dev->queues);
> - kfree(dev->entry);
> - kfree(dev);
> - return result;
> + return;
> + reset:
> + dev->reset_workfn = nvme_reset_failed_dev;
> + queue_work(nvme_workq, &dev->reset_work);
> }
>
> static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) @@
-2902,6
> +2906,7 @@ static void nvme_remove(struct pci_dev *pdev)
> spin_unlock(&dev_list_lock);
>
> pci_set_drvdata(pdev, NULL);
> + async_synchronize_cookie(dev->probe_cookie);
Keith, I think this may not work. All asynchronous functions called prior to
the one identified by cookie
are guaranteed to have completed. The current one is not.
And I just remembered Matthew's feedback on one of the previous attempts at
asynchronous probe.
We should be able to rmmod the driver during probe if suppose there are a
million namespaces.
Is there any plan to address such scenarios?
> flush_work(&dev->reset_work);
> misc_deregister(&dev->miscdev);
> nvme_dev_shutdown(dev);
> diff --git a/include/linux/nvme.h b/include/linux/nvme.h index
19a5d4b..ebfbfa8
> 100644
> --- a/include/linux/nvme.h
> +++ b/include/linux/nvme.h
> @@ -16,6 +16,7 @@
> #define _LINUX_NVME_H
>
> #include <uapi/linux/nvme.h>
> +#include <linux/async.h>
> #include <linux/pci.h>
> #include <linux/miscdevice.h>
> #include <linux/kref.h>
> @@ -94,6 +95,7 @@ struct nvme_dev {
> struct miscdevice miscdev;
> work_func_t reset_workfn;
> struct work_struct reset_work;
> + async_cookie_t probe_cookie;
> char name[12];
> char serial[20];
> char model[40];
> --
> 1.7.10.4
>
>
> _______________________________________________
> Linux-nvme mailing list
> Linux-nvme at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-nvme
More information about the Linux-nvme
mailing list