[PATCHv2 5/5] nvme: split device add from initialization

Yi Zhang yi.zhang at redhat.com
Thu Jun 6 05:13:27 PDT 2024


Verified the kmemleak reported was fixed by this patch, thanks.

Tested-by: Yi Zhang <yi.zhang at redhat.com>

On Wed, Jun 5, 2024 at 3:01 AM Keith Busch <kbusch at meta.com> wrote:
>
> From: Keith Busch <kbusch at kernel.org>
>
> Combining both creates an ambiguous cleanup scenario for the caller if
> an error is returned: does the device reference need to be dropped or
> did the error occur before the device was initialized? If an error
> occurs after the device is added, then the existing cleanup routines
> will leak memory.
>
> Furthermore, the nvme core is taking it upon itself to free the device's
> kobj.name under certain conditions rather than go the the core device
> API. We shouldn't be peaking into these implementation details.
>
> Split the device initialization from the addition to make it easier to
> know the error handling actions, fix the existing memory leaks, and stop
> the device layering violations.
>
> Link: https://lore.kernel.org/linux-nvme/c4050a37-ecc9-462c-9772-65e25166f439@grimberg.me/
> Signed-off-by: Keith Busch <kbusch at kernel.org>
> ---
>  drivers/nvme/host/apple.c  |  5 ++++
>  drivers/nvme/host/core.c   | 58 +++++++++++++++++++++++---------------
>  drivers/nvme/host/fc.c     |  5 ++++
>  drivers/nvme/host/nvme.h   |  1 +
>  drivers/nvme/host/pci.c    |  5 ++++
>  drivers/nvme/host/rdma.c   |  5 ++++
>  drivers/nvme/host/tcp.c    |  5 ++++
>  drivers/nvme/target/loop.c |  5 ++++
>  8 files changed, 66 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
> index d3384aecc0d39..9760c97bc8f19 100644
> --- a/drivers/nvme/host/apple.c
> +++ b/drivers/nvme/host/apple.c
> @@ -1531,6 +1531,10 @@ static int apple_nvme_probe(struct platform_device *pdev)
>         if (IS_ERR(anv))
>                 return PTR_ERR(anv);
>
> +       result = nvme_add_ctrl(&anv->ctrl);
> +       if (result)
> +               goto out_put_ctrl;
> +
>         anv->ctrl.admin_q = blk_mq_alloc_queue(&anv->admin_tagset, NULL, NULL);
>         if (IS_ERR(anv->ctrl.admin_q)) {
>                 ret = -ENOMEM;
> @@ -1545,6 +1549,7 @@ static int apple_nvme_probe(struct platform_device *pdev)
>
>  out_uninit_ctrl:
>         nvme_uninit_ctrl(&dev->ctrl);
> +out_put_ctrl:
>         nvme_put_ctrl(&dev->ctrl);
>         return ret;
>  }
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index f5d150c62955d..070cc4d76b6a4 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -4611,6 +4611,9 @@ static void nvme_free_ctrl(struct device *dev)
>   * Initialize a NVMe controller structures.  This needs to be called during
>   * earliest initialization so that we have the initialized structured around
>   * during probing.
> + *
> + * On success, the caller must use the nvme_put_ctrl() to release this when
> + * needed, which also invokes the ops->free_ctrl() callback.
>   */
>  int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
>                 const struct nvme_ctrl_ops *ops, unsigned long quirks)
> @@ -4659,6 +4662,12 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
>                 goto out;
>         ctrl->instance = ret;
>
> +       ret = nvme_auth_init_ctrl(ctrl);
> +       if (ret)
> +               goto out_release_instance;
> +
> +       nvme_mpath_init_ctrl(ctrl);
> +
>         device_initialize(&ctrl->ctrl_device);
>         ctrl->device = &ctrl->ctrl_device;
>         ctrl->device->devt = MKDEV(MAJOR(nvme_ctrl_base_chr_devt),
> @@ -4671,16 +4680,36 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
>                 ctrl->device->groups = nvme_dev_attr_groups;
>         ctrl->device->release = nvme_free_ctrl;
>         dev_set_drvdata(ctrl->device, ctrl);
> +
> +       return ret;
> +
> +out_release_instance:
> +       ida_free(&nvme_instance_ida, ctrl->instance);
> +out:
> +       if (ctrl->discard_page)
> +               __free_page(ctrl->discard_page);
> +       cleanup_srcu_struct(&ctrl->srcu);
> +       return ret;
> +}
> +EXPORT_SYMBOL_GPL(nvme_init_ctrl);
> +
> +/*
> + * On success, returns with an elevated controller reference and caller must
> + * use nvme_uninit_ctrl() to properly free resources associated with the ctrl.
> + */
> +int nvme_add_ctrl(struct nvme_ctrl *ctrl)
> +{
> +       int ret;
> +
>         ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance);
>         if (ret)
> -               goto out_release_instance;
> +               return ret;
>
> -       nvme_get_ctrl(ctrl);
>         cdev_init(&ctrl->cdev, &nvme_dev_fops);
> -       ctrl->cdev.owner = ops->module;
> +       ctrl->cdev.owner = ctrl->ops->module;
>         ret = cdev_device_add(&ctrl->cdev, ctrl->device);
>         if (ret)
> -               goto out_free_name;
> +               return ret;
>
>         /*
>          * Initialize latency tolerance controls.  The sysfs files won't
> @@ -4691,28 +4720,11 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
>                 min(default_ps_max_latency_us, (unsigned long)S32_MAX));
>
>         nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
> -       nvme_mpath_init_ctrl(ctrl);
> -       ret = nvme_auth_init_ctrl(ctrl);
> -       if (ret)
> -               goto out_free_cdev;
> +       nvme_get_ctrl(ctrl);
>
>         return 0;
> -out_free_cdev:
> -       nvme_fault_inject_fini(&ctrl->fault_inject);
> -       dev_pm_qos_hide_latency_tolerance(ctrl->device);
> -       cdev_device_del(&ctrl->cdev, ctrl->device);
> -out_free_name:
> -       nvme_put_ctrl(ctrl);
> -       kfree_const(ctrl->device->kobj.name);
> -out_release_instance:
> -       ida_free(&nvme_instance_ida, ctrl->instance);
> -out:
> -       if (ctrl->discard_page)
> -               __free_page(ctrl->discard_page);
> -       cleanup_srcu_struct(&ctrl->srcu);
> -       return ret;
>  }
> -EXPORT_SYMBOL_GPL(nvme_init_ctrl);
> +EXPORT_SYMBOL_GPL(nvme_add_ctrl);
>
>  /* let I/O to all namespaces fail in preparation for surprise removal */
>  void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl)
> diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
> index c52446013388f..d5a383766b34d 100644
> --- a/drivers/nvme/host/fc.c
> +++ b/drivers/nvme/host/fc.c
> @@ -3563,6 +3563,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
>         if (IS_ERR(ctrl))
>                 return ERR_CAST(ctrl);
>
> +       ret = nvme_add_ctrl(&ctrl->ctrl);
> +       if (ret)
> +               goto out_put_ctrl;
> +
>         ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set,
>                         &nvme_fc_admin_mq_ops,
>                         struct_size_t(struct nvme_fcp_op_w_sgl, priv,
> @@ -3607,6 +3611,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
>         /* initiate nvme ctrl ref counting teardown */
>         nvme_uninit_ctrl(&ctrl->ctrl);
>
> +out_put_ctrl:
>         /* Remove core ctrl ref. */
>         nvme_put_ctrl(&ctrl->ctrl);
>
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index f3a41133ac3f9..9693490680866 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -792,6 +792,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, bool shutdown);
>  int nvme_enable_ctrl(struct nvme_ctrl *ctrl);
>  int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
>                 const struct nvme_ctrl_ops *ops, unsigned long quirks);
> +int nvme_add_ctrl(struct nvme_ctrl *ctrl);
>  void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
>  void nvme_start_ctrl(struct nvme_ctrl *ctrl);
>  void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index 102a9fb0c65ff..c92125b0238d4 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -3015,6 +3015,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>         if (IS_ERR(dev))
>                 return PTR_ERR(dev);
>
> +       result = nvme_add_ctrl(&dev->ctrl);
> +       if (result)
> +               goto out_put_ctrl;
> +
>         result = nvme_dev_map(dev);
>         if (result)
>                 goto out_uninit_ctrl;
> @@ -3101,6 +3105,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>         nvme_dev_unmap(dev);
>  out_uninit_ctrl:
>         nvme_uninit_ctrl(&dev->ctrl);
> +out_put_ctrl:
>         nvme_put_ctrl(&dev->ctrl);
>         return result;
>  }
> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
> index 94d4f3dbac6b6..5c44c7c5c688c 100644
> --- a/drivers/nvme/host/rdma.c
> +++ b/drivers/nvme/host/rdma.c
> @@ -2323,6 +2323,10 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
>         if (IS_ERR(ctrl))
>                 return ERR_CAST(ctrl);
>
> +       ret = nvme_add_ctrl(&ctrl->ctrl);
> +       if (ret)
> +               goto out_put_ctrl;
> +
>         changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
>         WARN_ON_ONCE(!changed);
>
> @@ -2341,6 +2345,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
>
>  out_uninit_ctrl:
>         nvme_uninit_ctrl(&ctrl->ctrl);
> +out_put_ctrl:
>         nvme_put_ctrl(&ctrl->ctrl);
>         if (ret > 0)
>                 ret = -EIO;
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 5ee3bbc67f411..3be67c98c906e 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -2779,6 +2779,10 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
>         if (IS_ERR(ctrl))
>                 return ERR_CAST(ctrl);
>
> +       ret = nvme_add_ctrl(&ctrl->ctrl);
> +       if (ret)
> +               goto out_put_ctrl;
> +
>         if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
>                 WARN_ON_ONCE(1);
>                 ret = -EINTR;
> @@ -2800,6 +2804,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
>
>  out_uninit_ctrl:
>         nvme_uninit_ctrl(&ctrl->ctrl);
> +out_put_ctrl:
>         nvme_put_ctrl(&ctrl->ctrl);
>         if (ret > 0)
>                 ret = -EIO;
> diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
> index e589915ddef85..e32790d8fc260 100644
> --- a/drivers/nvme/target/loop.c
> +++ b/drivers/nvme/target/loop.c
> @@ -555,6 +555,10 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
>                 goto out;
>         }
>
> +       ret = nvme_add_ctrl(&ctrl->ctrl);
> +       if (ret)
> +               goto out_put_ctrl;
> +
>         if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
>                 WARN_ON_ONCE(1);
>
> @@ -611,6 +615,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
>         kfree(ctrl->queues);
>  out_uninit_ctrl:
>         nvme_uninit_ctrl(&ctrl->ctrl);
> +out_put_ctrl:
>         nvme_put_ctrl(&ctrl->ctrl);
>  out:
>         if (ret > 0)
> --
> 2.43.0
>
>


--
Best Regards,
  Yi Zhang




More information about the Linux-nvme mailing list