[PATCH] nvme: Fix memory leak in nvme_init_ctrl error path

Yi Zhang yi.zhang at redhat.com
Fri May 5 20:12:32 PDT 2023


Verified the kmemleak issue I reported:
https://lore.kernel.org/linux-nvme/CAHj4cs-nDaKzMx2txO4dbE+Mz9ePwLtU0e3egz+StmzOUgWUrA@mail.gmail.com/

Tested-by: Yi Zhang <yi.zhang at redhat.com>

On Wed, May 3, 2023 at 11:13 PM Sagi Grimberg <sagi at grimberg.me> wrote:
>
> nvme_init_ctrl may fail before creating the misc device or after creating
> the misc device.
>
> If we fail before creating the misc device, we just need to deallocate what
> was allocated before and return (as usually done).
>
> If we fail after we create the misc device, we must put
> the final reference on the device in order to make sure that internal
> device resources are cleaned up.
>
> The device release also triggers nvme_free_ctrl method so we need to make
> sure to identify that we failed during the initialization itself and skip
> cleaning because nvme_init_ctrl error path cleaned up on its own (we do
> that by marking the ctrl->instance to UNINITIALIZED).
>
> We also drop the explicit dev_name deallocation because it is freed in
> the device release sequence.
>
> This addresses a memory leak triggered by blktests nvme/044 which happens
> to fail exactly after misc device initialization:
> --
> unreferenced object 0xffff95678a54cd00 (size 256):
>   comm "nvme", pid 1941, jiffies 4294761594 (age 10.010s)
>   hex dump (first 32 bytes):
>     00 00 00 00 00 00 00 00 08 cd 54 8a 67 95 ff ff  ..........T.g...
>     08 cd 54 8a 67 95 ff ff e0 b5 7b 8b ff ff ff ff  ..T.g.....{.....
>   backtrace:
>     [<ffffffff8b349205>] kmalloc_trace+0x25/0x90
>     [<ffffffff8b7c0463>] device_add+0x303/0x690
>     [<ffffffff8b4103c4>] cdev_device_add+0x44/0x90
>     [<ffffffffc0de1c42>] 0xffffffffc0de1c42
>     [<ffffffffc0d788b3>] 0xffffffffc0d788b3
>     [<ffffffffc0d8c66d>] 0xffffffffc0d8c66d
>     [<ffffffffc0d8c831>] 0xffffffffc0d8c831
>     [<ffffffff8b40a8b2>] vfs_write+0xc2/0x3c0
>     [<ffffffff8b40aeff>] ksys_write+0x5f/0xe0
>     [<ffffffff8bc0eb58>] do_syscall_64+0x38/0x90
>     [<ffffffff8be000aa>] entry_SYSCALL_64_after_hwframe+0x72/0xdc
> --
>
> Reported-by: Irvin Cote <irvincoteg at gmail.com>
> Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
> ---
>  drivers/nvme/host/core.c | 28 ++++++++++++++++------------
>  drivers/nvme/host/nvme.h |  1 +
>  2 files changed, 17 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index ea508e9a1de7..b374e6007553 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -5083,6 +5083,9 @@ static void nvme_free_ctrl(struct device *dev)
>                 container_of(dev, struct nvme_ctrl, ctrl_device);
>         struct nvme_subsystem *subsys = ctrl->subsys;
>
> +       if (ctrl->instance == NVME_CTRL_INSTANCE_UNINITIALIZED)
> +               return;
> +
>         if (!subsys || ctrl->instance != subsys->instance)
>                 ida_free(&nvme_instance_ida, ctrl->instance);
>
> @@ -5137,18 +5140,19 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
>         INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work);
>         memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
>         ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
> +       ctrl->instance = NVME_CTRL_INSTANCE_UNINITIALIZED;
>
>         BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) >
>                         PAGE_SIZE);
>         ctrl->discard_page = alloc_page(GFP_KERNEL);
> -       if (!ctrl->discard_page) {
> -               ret = -ENOMEM;
> -               goto out;
> -       }
> +       if (!ctrl->discard_page)
> +               return -ENOMEM;
>
>         ret = ida_alloc(&nvme_instance_ida, GFP_KERNEL);
> -       if (ret < 0)
> -               goto out;
> +       if (ret < 0) {
> +               __free_page(ctrl->discard_page);
> +               return ret;
> +       }
>         ctrl->instance = ret;
>
>         device_initialize(&ctrl->ctrl_device);
> @@ -5172,7 +5176,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
>         ctrl->cdev.owner = ops->module;
>         ret = cdev_device_add(&ctrl->cdev, ctrl->device);
>         if (ret)
> -               goto out_free_name;
> +               goto out_put_ctrl;
>
>         /*
>          * Initialize latency tolerance controls.  The sysfs files won't
> @@ -5193,14 +5197,14 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
>         nvme_fault_inject_fini(&ctrl->fault_inject);
>         dev_pm_qos_hide_latency_tolerance(ctrl->device);
>         cdev_device_del(&ctrl->cdev, ctrl->device);
> -out_free_name:
> +out_put_ctrl:
> +       /* pairs with nvme_get_ctrl */
>         nvme_put_ctrl(ctrl);
> -       kfree_const(ctrl->device->kobj.name);
>  out_release_instance:
>         ida_free(&nvme_instance_ida, ctrl->instance);
> -out:
> -       if (ctrl->discard_page)
> -               __free_page(ctrl->discard_page);
> +       ctrl->instance = NVME_CTRL_INSTANCE_UNINITIALIZED;
> +       /* pairs with device_initialize .release method will cleanup */
> +       nvme_put_ctrl(ctrl);
>         return ret;
>  }
>  EXPORT_SYMBOL_GPL(nvme_init_ctrl);
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index bf46f122e9e1..920403589670 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -255,6 +255,7 @@ struct nvme_ctrl {
>         struct request_queue *connect_q;
>         struct request_queue *fabrics_q;
>         struct device *dev;
> +#define NVME_CTRL_INSTANCE_UNINITIALIZED (-1)
>         int instance;
>         int numa_node;
>         struct blk_mq_tag_set *tagset;
> --
> 2.34.1
>
>


-- 
Best Regards,
  Yi Zhang




More information about the Linux-nvme mailing list