[PATCH rfc] nvme: fix a possible memory leak when failing to initialize ctrl auth

Sagi Grimberg sagi at grimberg.me
Thu May 30 00:22:33 PDT 2024


In case nvme_auth_init_ctrl fails, we leak memory [1] due to not dropping
the device reference to zero, causing a device private allocation to leak.

Dropping the reference to zero to trigger device .release() is a bit
problematic here because the callers of nvme_init_ctrl() cannot sanely
understand if it failed before or after a device was added, hence don't
know if they need to cleanup or allow the ctrl device release to cleanup.

Instead, we simply move the call to nvme_auth_init_ctrl before we add the
device such that nvme_init_ctrl either fails without creating a device
or it succeeds. We also add a code comment to try and prevent similar issues
from coming back in the future.

[1]:
--
unreferenced object 0xffff91488fb07200 (size 256):
  comm "nvme", pid 29344, jiffies 4295069699
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 08 72 b0 8f 48 91 ff ff .........r..H...
    08 72 b0 8f 48 91 ff ff 00 d6 55 a2 ff ff ff ff .r..H.....U.....
  backtrace (crc 36f6c278):
    [<ffffffffa2b8b46a>] kmemleak_alloc+0x4a/0x90
    [<ffffffffa1e57897>] kmalloc_trace+0x2f7/0x3a0
    [<ffffffffa2562fd0>] device_add+0x510/0x8c0
    [<ffffffffa1ee5b9e>] cdev_device_add+0x4e/0xc0
    [<ffffffffc133b7a0>] nvme_init_ctrl+0x360/0x460 [nvme_core]
    [<ffffffffc13b0af6>] 0xffffffffc13b0af6
    [<ffffffffc120e565>] 0xffffffffc120e565
    [<ffffffffa1edeb14>] vfs_write+0x104/0x490
    [<ffffffffa1edf263>] ksys_write+0x73/0x100
    [<ffffffffa1edf319>] __x64_sys_write+0x19/0x30
    [<ffffffffa1a0553e>] x64_sys_call+0x7e/0x25c0
    [<ffffffffa2b7be01>] do_syscall_64+0x71/0x130
    [<ffffffffa2c0012b>] entry_SYSCALL_64_after_hwframe+0x76/0x7e
unreferenced object 0xffff91488ed7ea00 (size 256):
  comm "nvme", pid 37478, jiffies 4295155857
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 08 ea d7 8e 48 91 ff ff ............H...
    08 ea d7 8e 48 91 ff ff 00 d6 55 a2 ff ff ff ff ....H.....U.....
  backtrace (crc fc1acf5):
    [<ffffffffa2b8b46a>] kmemleak_alloc+0x4a/0x90
    [<ffffffffa1e57897>] kmalloc_trace+0x2f7/0x3a0
    [<ffffffffa2562fd0>] device_add+0x510/0x8c0
    [<ffffffffa1ee5b9e>] cdev_device_add+0x4e/0xc0
    [<ffffffffc133b7a0>] nvme_init_ctrl+0x360/0x460 [nvme_core]
    [<ffffffffc13707b7>] 0xffffffffc13707b7
    [<ffffffffc120e565>] 0xffffffffc120e565
    [<ffffffffa1edeb14>] vfs_write+0x104/0x490
    [<ffffffffa1edf263>] ksys_write+0x73/0x100
    [<ffffffffa1edf319>] __x64_sys_write+0x19/0x30
    [<ffffffffa1a0553e>] x64_sys_call+0x7e/0x25c0
    [<ffffffffa2b7be01>] do_syscall_64+0x71/0x130
    [<ffffffffa2c0012b>] entry_SYSCALL_64_after_hwframe+0x76/0x7e
--

Suggested-by: Keith Busch <kbusch at kernel.org>
Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
---
 drivers/nvme/host/core.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f62fd49c1411..a7e820840cf7 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4655,9 +4655,13 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 		goto out;
 	}
 
+	ret = nvme_auth_init_ctrl(ctrl);
+	if (ret)
+		goto out;
+
 	ret = ida_alloc(&nvme_instance_ida, GFP_KERNEL);
 	if (ret < 0)
-		goto out;
+		goto out_free_auth;
 	ctrl->instance = ret;
 
 	device_initialize(&ctrl->ctrl_device);
@@ -4683,6 +4687,15 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 	if (ret)
 		goto out_free_name;
 
+	/*
+	 * NOTE: please do not add any function calls after cdev_device_add
+	 * becuase once we added a device we have to pair our cleanup with
+	 * the device reference model, which makes the callers of nvme_init_ctrl
+	 * unable to understand if they should cleanup upon failure, or the
+	 * device release will take care of it. So make sure that once a device
+	 * was created, this function will not fail.
+	 */
+
 	/*
 	 * Initialize latency tolerance controls.  The sysfs files won't
 	 * be visible to userspace unless the device actually supports APST.
@@ -4693,20 +4706,15 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 
 	nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
 	nvme_mpath_init_ctrl(ctrl);
-	ret = nvme_auth_init_ctrl(ctrl);
-	if (ret)
-		goto out_free_cdev;
 
 	return 0;
-out_free_cdev:
-	nvme_fault_inject_fini(&ctrl->fault_inject);
-	dev_pm_qos_hide_latency_tolerance(ctrl->device);
-	cdev_device_del(&ctrl->cdev, ctrl->device);
 out_free_name:
 	nvme_put_ctrl(ctrl);
 	kfree_const(ctrl->device->kobj.name);
 out_release_instance:
 	ida_free(&nvme_instance_ida, ctrl->instance);
+out_free_auth:
+	nvme_auth_free(ctrl);
 out:
 	if (ctrl->discard_page)
 		__free_page(ctrl->discard_page);
-- 
2.40.1




More information about the Linux-nvme mailing list