[PATCH] nvme: Fix memory leak in nvme_init_ctrl error path
Yi Zhang
yi.zhang at redhat.com
Fri May 5 20:12:32 PDT 2023
Verified the kmemleak issue I reported:
https://lore.kernel.org/linux-nvme/CAHj4cs-nDaKzMx2txO4dbE+Mz9ePwLtU0e3egz+StmzOUgWUrA@mail.gmail.com/
Tested-by: Yi Zhang <yi.zhang at redhat.com>
On Wed, May 3, 2023 at 11:13 PM Sagi Grimberg <sagi at grimberg.me> wrote:
>
> nvme_init_ctrl may fail before creating the misc device or after creating
> the misc device.
>
> If we fail before creating the misc device, we just need to deallocate what
> was allocated before and return (as usually done).
>
> If we fail after we create the misc device, we must put
> the final reference on the device in order to make sure that internal
> device resources are cleaned up.
>
> The device release also triggers nvme_free_ctrl method so we need to make
> sure to identify that we failed during the initialization itself and skip
> cleaning because nvme_init_ctrl error path cleaned up on its own (we do
> that by marking the ctrl->instance to UNINITIALIZED).
>
> We also drop the explicit dev_name deallocation because it is freed in
> the device release sequence.
>
> This addresses a memory leak triggered by blktests nvme/044 which happens
> to fail exactly after misc device initialization:
> --
> unreferenced object 0xffff95678a54cd00 (size 256):
> comm "nvme", pid 1941, jiffies 4294761594 (age 10.010s)
> hex dump (first 32 bytes):
> 00 00 00 00 00 00 00 00 08 cd 54 8a 67 95 ff ff ..........T.g...
> 08 cd 54 8a 67 95 ff ff e0 b5 7b 8b ff ff ff ff ..T.g.....{.....
> backtrace:
> [<ffffffff8b349205>] kmalloc_trace+0x25/0x90
> [<ffffffff8b7c0463>] device_add+0x303/0x690
> [<ffffffff8b4103c4>] cdev_device_add+0x44/0x90
> [<ffffffffc0de1c42>] 0xffffffffc0de1c42
> [<ffffffffc0d788b3>] 0xffffffffc0d788b3
> [<ffffffffc0d8c66d>] 0xffffffffc0d8c66d
> [<ffffffffc0d8c831>] 0xffffffffc0d8c831
> [<ffffffff8b40a8b2>] vfs_write+0xc2/0x3c0
> [<ffffffff8b40aeff>] ksys_write+0x5f/0xe0
> [<ffffffff8bc0eb58>] do_syscall_64+0x38/0x90
> [<ffffffff8be000aa>] entry_SYSCALL_64_after_hwframe+0x72/0xdc
> --
>
> Reported-by: Irvin Cote <irvincoteg at gmail.com>
> Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
> ---
> drivers/nvme/host/core.c | 28 ++++++++++++++++------------
> drivers/nvme/host/nvme.h | 1 +
> 2 files changed, 17 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index ea508e9a1de7..b374e6007553 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -5083,6 +5083,9 @@ static void nvme_free_ctrl(struct device *dev)
> container_of(dev, struct nvme_ctrl, ctrl_device);
> struct nvme_subsystem *subsys = ctrl->subsys;
>
> + if (ctrl->instance == NVME_CTRL_INSTANCE_UNINITIALIZED)
> + return;
> +
> if (!subsys || ctrl->instance != subsys->instance)
> ida_free(&nvme_instance_ida, ctrl->instance);
>
> @@ -5137,18 +5140,19 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
> INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work);
> memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
> ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
> + ctrl->instance = NVME_CTRL_INSTANCE_UNINITIALIZED;
>
> BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) >
> PAGE_SIZE);
> ctrl->discard_page = alloc_page(GFP_KERNEL);
> - if (!ctrl->discard_page) {
> - ret = -ENOMEM;
> - goto out;
> - }
> + if (!ctrl->discard_page)
> + return -ENOMEM;
>
> ret = ida_alloc(&nvme_instance_ida, GFP_KERNEL);
> - if (ret < 0)
> - goto out;
> + if (ret < 0) {
> + __free_page(ctrl->discard_page);
> + return ret;
> + }
> ctrl->instance = ret;
>
> device_initialize(&ctrl->ctrl_device);
> @@ -5172,7 +5176,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
> ctrl->cdev.owner = ops->module;
> ret = cdev_device_add(&ctrl->cdev, ctrl->device);
> if (ret)
> - goto out_free_name;
> + goto out_put_ctrl;
>
> /*
> * Initialize latency tolerance controls. The sysfs files won't
> @@ -5193,14 +5197,14 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
> nvme_fault_inject_fini(&ctrl->fault_inject);
> dev_pm_qos_hide_latency_tolerance(ctrl->device);
> cdev_device_del(&ctrl->cdev, ctrl->device);
> -out_free_name:
> +out_put_ctrl:
> + /* pairs with nvme_get_ctrl */
> nvme_put_ctrl(ctrl);
> - kfree_const(ctrl->device->kobj.name);
> out_release_instance:
> ida_free(&nvme_instance_ida, ctrl->instance);
> -out:
> - if (ctrl->discard_page)
> - __free_page(ctrl->discard_page);
> + ctrl->instance = NVME_CTRL_INSTANCE_UNINITIALIZED;
> + /* pairs with device_initialize .release method will cleanup */
> + nvme_put_ctrl(ctrl);
> return ret;
> }
> EXPORT_SYMBOL_GPL(nvme_init_ctrl);
> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> index bf46f122e9e1..920403589670 100644
> --- a/drivers/nvme/host/nvme.h
> +++ b/drivers/nvme/host/nvme.h
> @@ -255,6 +255,7 @@ struct nvme_ctrl {
> struct request_queue *connect_q;
> struct request_queue *fabrics_q;
> struct device *dev;
> +#define NVME_CTRL_INSTANCE_UNINITIALIZED (-1)
> int instance;
> int numa_node;
> struct blk_mq_tag_set *tagset;
> --
> 2.34.1
>
>
--
Best Regards,
Yi Zhang
More information about the Linux-nvme
mailing list