[PATCH 5/7] um: skas: validate stub munmap() against the guest address range
Cong Wang
xiyou.wangcong at gmail.com
Fri Jun 19 20:22:22 PDT 2026
From: Cong Wang <cwang at multikernel.io>
Route stub munmap() through the USER_NOTIF monitor too, and validate it
before letting it run. munmap() was previously SECCOMP_RET_ALLOW, so a
hijacked stub (jumping to the in-stub munmap with crafted registers) could
unmap arbitrary ranges, including the stub's own code/data pages, which
would sever the monitor's control over it, or guest mappings outside what
it is allowed to manage. After mmap, munmap was the remaining memory
primitive a hijacked stub could invoke with arbitrary arguments.
Unlike mmap(), there is no PTE left to check: by the time the stub unmaps
a guest page the kernel has already cleared the corresponding entry. So
stub_munmap_allowed() is range-based instead: the request must be
non-empty, must not wrap, and must lie entirely below STUB_START. That
confines the stub to the guest address space and keeps its own reserved
region off-limits. Both arguments are scalars captured in seccomp_data, so
CONTINUE carries no TOCTOU risk, same as mmap().
stub_munmap_allowed() lives in skas/uaccess.c next to stub_mmap_allowed();
the os-Linux notify handler dispatches on the syscall number and responds
CONTINUE or -EPERM, and the batch server counts STUB_SYSCALL_MUNMAP as
well as STUB_SYSCALL_MMAP.
Verified on UML: guest boots and survives heavy mmap/munmap churn with
zero false denials; the legitimate boot-time clear of the whole user
address space [0, STUB_START) is allowed (end == STUB_START), while a
range overlapping the stub region is denied.
Assisted-by: Claude:claude-opus-4.8
Signed-off-by: Cong Wang <cwang at multikernel.io>
---
arch/um/include/shared/skas/skas.h | 2 ++
arch/um/kernel/skas/stub_exe.c | 4 ++--
arch/um/kernel/skas/uaccess.c | 12 ++++++++++++
arch/um/os-Linux/skas/process.c | 31 ++++++++++++++++++------------
4 files changed, 35 insertions(+), 14 deletions(-)
diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h
index ce1b67b06b4b..ca2a62cef0c1 100644
--- a/arch/um/include/shared/skas/skas.h
+++ b/arch/um/include/shared/skas/skas.h
@@ -20,5 +20,7 @@ void initial_jmpbuf_unlock(void);
int stub_mmap_allowed(struct mm_id *id, unsigned long addr,
unsigned long prot, unsigned long offset);
+int stub_munmap_allowed(struct mm_id *id, unsigned long addr,
+ unsigned long len);
#endif
diff --git a/arch/um/kernel/skas/stub_exe.c b/arch/um/kernel/skas/stub_exe.c
index 65ea2af5ca73..00eea0cb9463 100644
--- a/arch/um/kernel/skas/stub_exe.c
+++ b/arch/um/kernel/skas/stub_exe.c
@@ -175,7 +175,7 @@ noinline static void real_init(void)
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, STUB_MMAP_NR,
5, 0),
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_munmap,
- 3, 0),
+ 4, 0),
#ifdef __i386__
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_set_thread_area,
2, 0),
@@ -192,7 +192,7 @@ noinline static void real_init(void)
/* [18] Permitted call for the stub */
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
- /* [19] mmap: route to the monitor for validation */
+ /* [19] mmap and munmap: route to the monitor for validation */
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_USER_NOTIF),
};
struct sock_fprog prog = {
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index 9359ede8a04b..feb267637735 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -14,6 +14,7 @@
#include <asm/futex.h>
#include <os.h>
#include <skas.h>
+#include <as-layout.h>
/*
* Same mapping as MMAP_OFFSET() in <sysdep/stub.h>, but usable from kernel
@@ -79,6 +80,17 @@ int stub_mmap_allowed(struct mm_id *id, unsigned long addr,
return 1;
}
+int stub_munmap_allowed(struct mm_id *id, unsigned long addr, unsigned long len)
+{
+ if (len == 0 || addr + len < addr)
+ return 0;
+
+ if (addr + len > STUB_START)
+ return 0;
+
+ return 1;
+}
+
static pte_t *maybe_map(unsigned long virt, int is_write)
{
pte_t *pte = virt_to_pte(current->mm, virt);
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 0987eb79ce76..2010b4529c41 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -193,7 +193,7 @@ static int seccomp_notify_serve(struct mm_id *mm_idp)
struct seccomp_notif req = {};
struct seccomp_notif_resp resp = {};
int notify_fd = mm_idp->seccomp_notify_fd;
- int ret;
+ int allowed, ret;
CATCH_EINTR(ret = ioctl(notify_fd, SECCOMP_IOCTL_NOTIF_RECV, &req));
if (ret < 0)
@@ -201,13 +201,19 @@ static int seccomp_notify_serve(struct mm_id *mm_idp)
resp.id = req.id;
- if (req.data.nr == STUB_MMAP_NR &&
- stub_mmap_allowed(mm_idp, req.data.args[0], req.data.args[2],
- req.data.args[5])) {
+ if (req.data.nr == STUB_MMAP_NR)
+ allowed = stub_mmap_allowed(mm_idp, req.data.args[0],
+ req.data.args[2], req.data.args[5]);
+ else if (req.data.nr == __NR_munmap)
+ allowed = stub_munmap_allowed(mm_idp, req.data.args[0],
+ req.data.args[1]);
+ else
+ allowed = 0;
+
+ if (allowed)
resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
- } else {
+ else
resp.error = -EPERM;
- }
CATCH_EINTR(ret = ioctl(notify_fd, SECCOMP_IOCTL_NOTIF_SEND, &resp));
if (ret < 0)
@@ -231,19 +237,20 @@ static void seccomp_notify_drain(struct mm_id *mm_idp)
break;
}
-static void seccomp_serve_mmaps(struct mm_id *mm_idp)
+static void seccomp_serve_stub_syscalls(struct mm_id *mm_idp)
{
struct stub_data *data = (void *)mm_idp->stack;
- int i, n_mmaps = 0;
+ int i, n_notif = 0;
if (mm_idp->seccomp_notify_fd < 0)
return;
for (i = 0; i < data->syscall_data_len; i++)
- if (data->syscall_data[i].syscall == STUB_SYSCALL_MMAP)
- n_mmaps++;
+ if (data->syscall_data[i].syscall == STUB_SYSCALL_MMAP ||
+ data->syscall_data[i].syscall == STUB_SYSCALL_MUNMAP)
+ n_notif++;
- for (i = 0; i < n_mmaps; i++)
+ for (i = 0; i < n_notif; i++)
seccomp_notify_serve(mm_idp);
}
@@ -255,7 +262,7 @@ void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys)
do {
if (!running) {
wake_seccomp_stub(mm_idp);
- seccomp_serve_mmaps(mm_idp);
+ seccomp_serve_stub_syscalls(mm_idp);
}
do {
--
2.43.0
More information about the linux-um
mailing list