[RFC PATCH v2 4/9] um: Add stub side of SECCOMP/futex based process handling
Benjamin Berg
benjamin at sipsolutions.net
Wed Oct 23 07:08:22 PDT 2024
This adds the stub side for the new seccomp process management code. In
this case we do register save/restore through the signal handler
mcontext. For the FS_BASE/GS_BASE register we need special handling.
Co-authored-by: Johannes Berg <johannes at sipsolutions.net>
Signed-off-by: Benjamin Berg <benjamin at sipsolutions.net>
Signed-off-by: Benjamin Berg <benjamin.berg at intel.com>
---
RFCv2:
- Add include guards into new architecture specific header file
---
arch/um/include/shared/common-offsets.h | 2 +
arch/um/include/shared/skas/stub-data.h | 15 +++++++
arch/um/kernel/skas/stub.c | 53 +++++++++++++++++++++++++
arch/x86/um/shared/sysdep/stub-data.h | 23 +++++++++++
arch/x86/um/shared/sysdep/stub.h | 2 +
arch/x86/um/shared/sysdep/stub_32.h | 13 ++++++
arch/x86/um/shared/sysdep/stub_64.h | 14 +++++++
7 files changed, 122 insertions(+)
create mode 100644 arch/x86/um/shared/sysdep/stub-data.h
diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h
index 86537e20942a..44cb72413db4 100644
--- a/arch/um/include/shared/common-offsets.h
+++ b/arch/um/include/shared/common-offsets.h
@@ -33,3 +33,5 @@ DEFINE(UML_CONFIG_UML_MAX_USERSPACE_ITERATIONS, CONFIG_UML_MAX_USERSPACE_ITERATI
#else
DEFINE(UML_CONFIG_UML_MAX_USERSPACE_ITERATIONS, 0);
#endif
+
+DEFINE(UM_KERN_GDT_ENTRY_TLS_ENTRIES, GDT_ENTRY_TLS_ENTRIES);
diff --git a/arch/um/include/shared/skas/stub-data.h b/arch/um/include/shared/skas/stub-data.h
index 81a4cace032c..4a2a00556a8e 100644
--- a/arch/um/include/shared/skas/stub-data.h
+++ b/arch/um/include/shared/skas/stub-data.h
@@ -8,9 +8,14 @@
#ifndef __STUB_DATA_H
#define __STUB_DATA_H
+#include <linux/kconfig.h>
#include <linux/compiler_types.h>
#include <as-layout.h>
#include <sysdep/tls.h>
+#include <sysdep/stub-data.h>
+
+#define FUTEX_IN_CHILD 0
+#define FUTEX_IN_KERN 1
struct stub_init_data {
unsigned long stub_start;
@@ -52,6 +57,16 @@ struct stub_data {
/* 128 leaves enough room for additional fields in the struct */
struct stub_syscall syscall_data[(UM_KERN_PAGE_SIZE - 128) / sizeof(struct stub_syscall)] __aligned(16);
+ /* data shared with signal handler (only used in seccomp mode) */
+ short restart_wait;
+ unsigned int futex;
+ int signal;
+ unsigned short si_offset;
+ unsigned short mctx_offset;
+
+ /* seccomp architecture specific state restore */
+ struct stub_data_arch arch_data;
+
/* Stack for our signal handlers and for calling into . */
unsigned char sigstack[UM_KERN_PAGE_SIZE] __aligned(UM_KERN_PAGE_SIZE);
};
diff --git a/arch/um/kernel/skas/stub.c b/arch/um/kernel/skas/stub.c
index 796fc266d3bb..628d58428104 100644
--- a/arch/um/kernel/skas/stub.c
+++ b/arch/um/kernel/skas/stub.c
@@ -5,6 +5,11 @@
#include <sysdep/stub.h>
+#ifdef CONFIG_UML_SECCOMP
+#include <linux/futex.h>
+#include <errno.h>
+#endif
+
static __always_inline int syscall_handler(struct stub_data *d)
{
int i;
@@ -57,3 +62,51 @@ stub_syscall_handler(void)
trap_myself();
}
+
+#ifdef CONFIG_UML_SECCOMP
+void __attribute__ ((__section__ (".__syscall_stub")))
+stub_signal_interrupt(int sig, siginfo_t *info, void *p)
+{
+ struct stub_data *d = get_stub_data();
+ ucontext_t *uc = p;
+ long res;
+
+ d->signal = sig;
+ d->si_offset = (unsigned long)info - (unsigned long)&d->sigstack[0];
+ d->mctx_offset = (unsigned long)&uc->uc_mcontext - (unsigned long)&d->sigstack[0];
+
+restart_wait:
+ d->futex = FUTEX_IN_KERN;
+ do {
+ res = stub_syscall3(__NR_futex, (unsigned long)&d->futex,
+ FUTEX_WAKE, 1);
+ } while (res == -EINTR);
+ do {
+ res = stub_syscall4(__NR_futex, (unsigned long)&d->futex,
+ FUTEX_WAIT, FUTEX_IN_KERN, 0);
+ } while (res == -EINTR || d->futex == FUTEX_IN_KERN);
+
+ if (res < 0 && res != -EAGAIN)
+ stub_syscall2(__NR_kill, 0, SIGKILL);
+
+ /* Try running queued syscalls. */
+ if (syscall_handler(d) < 0 || d->restart_wait) {
+ /* Report SIGSYS if we restart. */
+ d->signal = SIGSYS;
+ d->restart_wait = 0;
+ goto restart_wait;
+ }
+
+ /* Restore arch dependent state that is not part of the mcontext */
+ stub_seccomp_restore_state(&d->arch_data);
+
+ /* Return so that the host modified mcontext is restored. */
+}
+
+void __attribute__ ((__section__ (".__syscall_stub")))
+stub_signal_restorer(void)
+{
+ /* We must not have anything on the stack when doing rt_sigreturn */
+ stub_syscall0(__NR_rt_sigreturn);
+}
+#endif
diff --git a/arch/x86/um/shared/sysdep/stub-data.h b/arch/x86/um/shared/sysdep/stub-data.h
new file mode 100644
index 000000000000..82b1b7f8ac3d
--- /dev/null
+++ b/arch/x86/um/shared/sysdep/stub-data.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARCH_STUB_DATA_H
+#define __ARCH_STUB_DATA_H
+
+#ifdef __i386__
+#include <generated/asm-offsets.h>
+#include <asm/ldt.h>
+
+struct stub_data_arch {
+ int sync;
+ struct user_desc tls[UM_KERN_GDT_ENTRY_TLS_ENTRIES];
+};
+#else
+#define STUB_SYNC_FS_BASE (1 << 0)
+#define STUB_SYNC_GS_BASE (1 << 1)
+struct stub_data_arch {
+ int sync;
+ unsigned long fs_base;
+ unsigned long gs_base;
+};
+#endif
+
+#endif /* __ARCH_STUB_DATA_H */
diff --git a/arch/x86/um/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub.h
index dc89f4423454..4fa58f5b4fca 100644
--- a/arch/x86/um/shared/sysdep/stub.h
+++ b/arch/x86/um/shared/sysdep/stub.h
@@ -13,3 +13,5 @@
extern void stub_segv_handler(int, siginfo_t *, void *);
extern void stub_syscall_handler(void);
+extern void stub_signal_interrupt(int, siginfo_t *, void *);
+extern void stub_signal_restorer(void);
diff --git a/arch/x86/um/shared/sysdep/stub_32.h b/arch/x86/um/shared/sysdep/stub_32.h
index 390988132c0a..df568fc3ceb4 100644
--- a/arch/x86/um/shared/sysdep/stub_32.h
+++ b/arch/x86/um/shared/sysdep/stub_32.h
@@ -131,4 +131,17 @@ static __always_inline void *get_stub_data(void)
"call *%%eax ;" \
:: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \
"i" (&fn))
+
+static __always_inline void
+stub_seccomp_restore_state(struct stub_data_arch *arch)
+{
+ for (int i = 0; i < sizeof(arch->tls) / sizeof(arch->tls[0]); i++) {
+ if (arch->sync & (1 << i))
+ stub_syscall1(__NR_set_thread_area,
+ (unsigned long) &arch->tls[i]);
+ }
+
+ arch->sync = 0;
+}
+
#endif
diff --git a/arch/x86/um/shared/sysdep/stub_64.h b/arch/x86/um/shared/sysdep/stub_64.h
index 294affbec742..5a9546ff0493 100644
--- a/arch/x86/um/shared/sysdep/stub_64.h
+++ b/arch/x86/um/shared/sysdep/stub_64.h
@@ -10,6 +10,7 @@
#include <sysdep/ptrace_user.h>
#include <generated/asm-offsets.h>
#include <linux/stddef.h>
+#include <asm/prctl.h>
#define STUB_MMAP_NR __NR_mmap
#define MMAP_OFFSET(o) (o)
@@ -134,4 +135,17 @@ static __always_inline void *get_stub_data(void)
"call *%%rax ;" \
:: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \
"i" (&fn))
+
+static __always_inline void
+stub_seccomp_restore_state(struct stub_data_arch *arch)
+{
+ /* TODO: Use _writefsbase_u64/_writegsbase_u64 when possible */
+ if (arch->sync & STUB_SYNC_FS_BASE)
+ stub_syscall2(__NR_arch_prctl, ARCH_SET_FS, arch->fs_base);
+ if (arch->sync & STUB_SYNC_GS_BASE)
+ stub_syscall2(__NR_arch_prctl, ARCH_SET_GS, arch->gs_base);
+
+ arch->sync = 0;
+}
+
#endif
--
2.47.0
More information about the linux-um
mailing list