[PATCH 3/7] um: skas: validate stub mmap() against the guest page table
Cong Wang
xiyou.wangcong at gmail.com
Fri Jun 19 20:22:20 PDT 2026
From: Cong Wang <cwang at multikernel.io>
Replace the allow-all USER_NOTIF response with the actual security check.
When a stub mmap traps to the monitor, validate its arguments against the
mm's page table: the call is allowed iff this mm's PTE for the target
address maps exactly the physical frame named by the mmap offset, and the
requested protection does not exceed what the PTE grants. Otherwise the
mmap is rejected with -EPERM.
This is what closes the disclosure issue documented in stub.c. A hijacked
stub (jumping to the in-stub mmap with crafted registers) can no longer
choose an arbitrary physmem offset: it is confined to the frames its own
mm's PTEs already reference. On this base (single physmem fd, no
kernel/user split) the same check also blocks host escape: a mapping of
a UML-kernel frame has no authorizing user PTE, so it is denied.
The check is a pure function of state the monitor already owns (it *is*
the UML kernel and holds the guest pgd, freshly synced before the stub
mmap is issued), so it needs no per-batch bookkeeping or fd-identity
tracking. stub_mmap_allowed() lives in skas/uaccess.c next to
virt_to_pte(); the os-Linux notify handler calls it and responds CONTINUE
or -EPERM.
Verified on UML: guest boots and survives a fork/exec storm plus heavy
demand paging with zero false denials.
Assisted-by: Claude:claude-opus-4.8
Signed-off-by: Cong Wang <cwang at multikernel.io>
---
arch/um/include/shared/skas/skas.h | 3 +++
arch/um/kernel/skas/uaccess.c | 36 ++++++++++++++++++++++++++++++
arch/um/os-Linux/skas/process.c | 21 ++++++++---------
3 files changed, 50 insertions(+), 10 deletions(-)
diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h
index 2237ffedec75..ce1b67b06b4b 100644
--- a/arch/um/include/shared/skas/skas.h
+++ b/arch/um/include/shared/skas/skas.h
@@ -18,4 +18,7 @@ extern void current_mm_sync(void);
void initial_jmpbuf_lock(void);
void initial_jmpbuf_unlock(void);
+int stub_mmap_allowed(struct mm_id *id, unsigned long addr,
+ unsigned long prot, unsigned long offset);
+
#endif
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index caef1deef795..9359ede8a04b 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -13,6 +13,18 @@
#include <kern_util.h>
#include <asm/futex.h>
#include <os.h>
+#include <skas.h>
+
+/*
+ * Same mapping as MMAP_OFFSET() in <sysdep/stub.h>, but usable from kernel
+ * code (that header pulls in the host <sys/mman.h>). 64-bit stubs use mmap()
+ * with a byte offset; 32-bit stubs use mmap2() with a page offset.
+ */
+#ifdef CONFIG_64BIT
+#define stub_mmap_offset(phys) (phys)
+#else
+#define stub_mmap_offset(phys) ((phys) >> PAGE_SHIFT)
+#endif
pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
{
@@ -43,6 +55,30 @@ pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
return pte_offset_kernel(pmd, addr);
}
+int stub_mmap_allowed(struct mm_id *id, unsigned long addr,
+ unsigned long prot, unsigned long offset)
+{
+ struct mm_context *ctx = container_of(id, struct mm_context, id);
+ struct mm_struct *mm = container_of(ctx, struct mm_struct, context);
+ pte_t *pte;
+
+ pte = virt_to_pte(mm, addr);
+ if (pte == NULL || !pte_present(*pte))
+ return 0;
+
+ /* Must map exactly the frame this PTE references. */
+ if (stub_mmap_offset(pte_val(*pte) & PAGE_MASK) != offset)
+ return 0;
+
+ /* Must not grant more access than the PTE allows. */
+ if ((prot & UM_PROT_WRITE) && !pte_write(*pte))
+ return 0;
+ if ((prot & UM_PROT_EXEC) && !pte_exec(*pte))
+ return 0;
+
+ return 1;
+}
+
static pte_t *maybe_map(unsigned long virt, int is_write)
{
pte_t *pte = virt_to_pte(current->mm, virt);
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 63b426b2c523..3a31e52cdcf8 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -187,10 +187,11 @@ static void wake_seccomp_stub(struct mm_id *mm_idp)
FUTEX_WAKE, 1, NULL, NULL, 0));
}
-static int seccomp_notify_serve(int notify_fd)
+static int seccomp_notify_serve(struct mm_id *mm_idp)
{
struct seccomp_notif req = {};
struct seccomp_notif_resp resp = {};
+ int notify_fd = mm_idp->seccomp_notify_fd;
int ret;
CATCH_EINTR(ret = ioctl(notify_fd, SECCOMP_IOCTL_NOTIF_RECV, &req));
@@ -198,7 +199,14 @@ static int seccomp_notify_serve(int notify_fd)
return -errno;
resp.id = req.id;
- resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
+
+ if (req.data.nr == STUB_MMAP_NR &&
+ stub_mmap_allowed(mm_idp, req.data.args[0], req.data.args[2],
+ req.data.args[5])) {
+ resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
+ } else {
+ resp.error = -EPERM;
+ }
CATCH_EINTR(ret = ioctl(notify_fd, SECCOMP_IOCTL_NOTIF_SEND, &resp));
if (ret < 0)
@@ -220,7 +228,7 @@ static void seccomp_serve_mmaps(struct mm_id *mm_idp)
n_mmaps++;
for (i = 0; i < n_mmaps; i++)
- seccomp_notify_serve(mm_idp->seccomp_notify_fd);
+ seccomp_notify_serve(mm_idp);
}
void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys)
@@ -288,13 +296,6 @@ void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys)
fatal_sigsegv();
}
-/*
- * Service one SECCOMP_RET_USER_NOTIF notification from a stub mmap: read the
- * suspended call, then respond CONTINUE so the stub's real mmap runs. CONTINUE
- * is safe here because mmap takes only scalar arguments (no TOCTOU on user
- * memory). Validation of (addr, len, prot, fd, offset) is added later; for now
- * every stub mmap is allowed.
- */
extern unsigned long current_stub_stack(void);
static void get_skas_faultinfo(int pid, struct faultinfo *fi)
--
2.43.0
More information about the linux-um
mailing list