[PATCH rfc] nvme: fix a possible memory leak when failing to initialize ctrl auth
Sagi Grimberg
sagi at grimberg.me
Thu May 30 00:22:33 PDT 2024
In case nvme_auth_init_ctrl fails, we leak memory [1] due to not dropping
the device reference to zero, causing a device private allocation to leak.
Dropping the reference to zero to trigger device .release() is a bit
problematic here because the callers of nvme_init_ctrl() cannot sanely
understand if it failed before or after a device was added, hence don't
know if they need to cleanup or allow the ctrl device release to cleanup.
Instead, we simply move the call to nvme_auth_init_ctrl before we add the
device such that nvme_init_ctrl either fails without creating a device
or it succeeds. We also add a code comment to try and prevent similar issues
from coming back in the future.
[1]:
--
unreferenced object 0xffff91488fb07200 (size 256):
comm "nvme", pid 29344, jiffies 4295069699
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 08 72 b0 8f 48 91 ff ff .........r..H...
08 72 b0 8f 48 91 ff ff 00 d6 55 a2 ff ff ff ff .r..H.....U.....
backtrace (crc 36f6c278):
[<ffffffffa2b8b46a>] kmemleak_alloc+0x4a/0x90
[<ffffffffa1e57897>] kmalloc_trace+0x2f7/0x3a0
[<ffffffffa2562fd0>] device_add+0x510/0x8c0
[<ffffffffa1ee5b9e>] cdev_device_add+0x4e/0xc0
[<ffffffffc133b7a0>] nvme_init_ctrl+0x360/0x460 [nvme_core]
[<ffffffffc13b0af6>] 0xffffffffc13b0af6
[<ffffffffc120e565>] 0xffffffffc120e565
[<ffffffffa1edeb14>] vfs_write+0x104/0x490
[<ffffffffa1edf263>] ksys_write+0x73/0x100
[<ffffffffa1edf319>] __x64_sys_write+0x19/0x30
[<ffffffffa1a0553e>] x64_sys_call+0x7e/0x25c0
[<ffffffffa2b7be01>] do_syscall_64+0x71/0x130
[<ffffffffa2c0012b>] entry_SYSCALL_64_after_hwframe+0x76/0x7e
unreferenced object 0xffff91488ed7ea00 (size 256):
comm "nvme", pid 37478, jiffies 4295155857
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 08 ea d7 8e 48 91 ff ff ............H...
08 ea d7 8e 48 91 ff ff 00 d6 55 a2 ff ff ff ff ....H.....U.....
backtrace (crc fc1acf5):
[<ffffffffa2b8b46a>] kmemleak_alloc+0x4a/0x90
[<ffffffffa1e57897>] kmalloc_trace+0x2f7/0x3a0
[<ffffffffa2562fd0>] device_add+0x510/0x8c0
[<ffffffffa1ee5b9e>] cdev_device_add+0x4e/0xc0
[<ffffffffc133b7a0>] nvme_init_ctrl+0x360/0x460 [nvme_core]
[<ffffffffc13707b7>] 0xffffffffc13707b7
[<ffffffffc120e565>] 0xffffffffc120e565
[<ffffffffa1edeb14>] vfs_write+0x104/0x490
[<ffffffffa1edf263>] ksys_write+0x73/0x100
[<ffffffffa1edf319>] __x64_sys_write+0x19/0x30
[<ffffffffa1a0553e>] x64_sys_call+0x7e/0x25c0
[<ffffffffa2b7be01>] do_syscall_64+0x71/0x130
[<ffffffffa2c0012b>] entry_SYSCALL_64_after_hwframe+0x76/0x7e
--
Suggested-by: Keith Busch <kbusch at kernel.org>
Signed-off-by: Sagi Grimberg <sagi at grimberg.me>
---
drivers/nvme/host/core.c | 24 ++++++++++++++++--------
1 file changed, 16 insertions(+), 8 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f62fd49c1411..a7e820840cf7 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4655,9 +4655,13 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
goto out;
}
+ ret = nvme_auth_init_ctrl(ctrl);
+ if (ret)
+ goto out;
+
ret = ida_alloc(&nvme_instance_ida, GFP_KERNEL);
if (ret < 0)
- goto out;
+ goto out_free_auth;
ctrl->instance = ret;
device_initialize(&ctrl->ctrl_device);
@@ -4683,6 +4687,15 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
if (ret)
goto out_free_name;
+ /*
+ * NOTE: please do not add any function calls after cdev_device_add
+ * becuase once we added a device we have to pair our cleanup with
+ * the device reference model, which makes the callers of nvme_init_ctrl
+ * unable to understand if they should cleanup upon failure, or the
+ * device release will take care of it. So make sure that once a device
+ * was created, this function will not fail.
+ */
+
/*
* Initialize latency tolerance controls. The sysfs files won't
* be visible to userspace unless the device actually supports APST.
@@ -4693,20 +4706,15 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
nvme_mpath_init_ctrl(ctrl);
- ret = nvme_auth_init_ctrl(ctrl);
- if (ret)
- goto out_free_cdev;
return 0;
-out_free_cdev:
- nvme_fault_inject_fini(&ctrl->fault_inject);
- dev_pm_qos_hide_latency_tolerance(ctrl->device);
- cdev_device_del(&ctrl->cdev, ctrl->device);
out_free_name:
nvme_put_ctrl(ctrl);
kfree_const(ctrl->device->kobj.name);
out_release_instance:
ida_free(&nvme_instance_ida, ctrl->instance);
+out_free_auth:
+ nvme_auth_free(ctrl);
out:
if (ctrl->discard_page)
__free_page(ctrl->discard_page);
--
2.40.1
More information about the Linux-nvme
mailing list