[PATCH v2 27/28] um: Implement kernel side of SECCOMP based process handling

benjamin at sipsolutions.net benjamin at sipsolutions.net
Tue Nov 22 02:07:58 PST 2022


From: Benjamin Berg <benjamin at sipsolutions.net>

This adds the kernel side of the seccomp based process handling.

Co-authored-by: Johannes Berg <johannes at sipsolutions.net>
Signed-off-by: Benjamin Berg <benjamin at sipsolutions.net>
---
 arch/um/os-Linux/skas/mem.c     |  35 +-
 arch/um/os-Linux/skas/process.c | 561 ++++++++++++++++++++++++--------
 arch/um/os-Linux/start_up.c     |   3 -
 3 files changed, 439 insertions(+), 160 deletions(-)

diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c
index 28e50349ab91..619035151bc6 100644
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@@ -4,6 +4,7 @@
  * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  */
 
+#include <linux/kconfig.h>
 #include <stddef.h>
 #include <unistd.h>
 #include <errno.h>
@@ -22,6 +23,7 @@
 extern char __syscall_stub_start[];
 
 extern void wait_stub_done(int pid);
+void wait_stub_done_seccomp(int pid, struct stub_data *data, int running);
 
 static inline unsigned long *check_init_stack(struct mm_id *mm_idp,
 					      unsigned long *stack)
@@ -58,24 +60,29 @@ static inline long do_syscall_stub(struct mm_id *mm_idp)
 	int n, i;
 	int err, pid = mm_idp->u.pid;
 
-	n = ptrace_setregs(pid, syscall_regs);
-	if (n < 0) {
-		printk(UM_KERN_ERR "Registers - \n");
-		for (i = 0; i < MAX_REG_NR; i++)
-			printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
-		panic("%s : PTRACE_SETREGS failed, errno = %d\n",
-		      __func__, -n);
-	}
-
 	/* Inform process how much we have filled in. */
 	proc_data->syscall_data_len = mm_idp->syscall_data_len;
 
-	err = ptrace(PTRACE_CONT, pid, 0, 0);
-	if (err)
-		panic("Failed to continue stub, pid = %d, errno = %d\n", pid,
-		      errno);
+	if (using_seccomp) {
+		proc_data->restart_wait = 1;
+		wait_stub_done_seccomp(pid, proc_data, 0);
+	} else {
+		n = ptrace_setregs(pid, syscall_regs);
+		if (n < 0) {
+			printk(UM_KERN_ERR "Registers -\n");
+			for (i = 0; i < MAX_REG_NR; i++)
+				printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
+			panic("%s : PTRACE_SETREGS failed, errno = %d\n",
+			      __func__, -n);
+		}
+
+		err = ptrace(PTRACE_CONT, pid, 0, 0);
+		if (err)
+			panic("Failed to continue stub, pid = %d, errno = %d\n",
+			      pid, errno);
 
-	wait_stub_done(pid);
+		wait_stub_done(pid);
+	}
 
 	/*
 	 * proc_data->err will be non-zero if there was an (unexpected) error.
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 44a7d49538ce..55868eb35727 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -1,9 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
+ * Copyright (C) 2021 Benjamin Berg <benjamin at sipsolutions.net>
  * Copyright (C) 2015 Thomas Meyer (thomas at m3y3r.de)
  * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  */
 
+#include <linux/kconfig.h>
 #include <stdlib.h>
 #include <stdbool.h>
 #include <unistd.h>
@@ -22,7 +24,13 @@
 #include <registers.h>
 #include <skas.h>
 #include <sysdep/stub.h>
+#include <sysdep/mcontext.h>
 #include <linux/threads.h>
+#include <sys/resource.h>
+#include <sys/prctl.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <linux/futex.h>
 
 int is_skas_winch(int pid, int fd, void *data)
 {
@@ -137,6 +145,58 @@ void wait_stub_done(int pid)
 	fatal_sigsegv();
 }
 
+#ifdef CONFIG_UML_SECCOMP
+void wait_stub_done_seccomp(int pid, struct stub_data *data, int running)
+{
+	int ret;
+
+	do {
+		if (!running) {
+			data->signal = 0;
+			data->futex = FUTEX_IN_CHILD;
+			CATCH_EINTR(syscall(__NR_futex, &data->futex,
+					    FUTEX_WAKE, 1, NULL, NULL, 0));
+		}
+
+		do {
+			ret = syscall(__NR_futex, &data->futex,
+				      FUTEX_WAIT, FUTEX_IN_CHILD,
+				      NULL, NULL, 0);
+		} while ((ret == -1 && errno == EINTR) || data->futex == FUTEX_IN_CHILD);
+
+		running = 0;
+
+		/* We may receive a SIGALRM, if we do, we are not done yet and need to iterate. */
+	} while (data->signal == SIGALRM);
+
+	if (ret < 0 && errno != EAGAIN) {
+		printk(UM_KERN_ERR "%s : waiting for child futex failed, errno = %d\n",
+		       __func__, errno);
+		goto out_kill;
+	}
+
+	if (data->mctx_offset > sizeof(data->sigstack) - sizeof(mcontext_t)) {
+		printk(UM_KERN_ERR "%s : invalid mcontext offset", __func__);
+		goto out_kill;
+	}
+
+	if (data->signal != SIGTRAP) {
+		printk(UM_KERN_ERR "%s : expected SIGTRAP but got %d",
+		       __func__, data->signal);
+		goto out_kill;
+	}
+
+	return;
+
+out_kill:
+	printk(UM_KERN_ERR "%s : failed to wait for SIGTRAP, pid = %d, errno = %d\n",
+	       __func__, pid, errno);
+	fatal_sigsegv();
+}
+#else
+void wait_stub_done_seccomp(int pid, struct stub_data *data, int running);
+#endif
+
 extern unsigned long current_stub_stack(void);
 
 static void get_skas_faultinfo(int pid, struct faultinfo *fi, unsigned long *aux_fp_regs)
@@ -198,16 +258,16 @@ extern char __syscall_stub_start[];
  */
 static int userspace_tramp(void *stack)
 {
-	struct sigaction sa;
 	struct stub_data *data;
 	void *addr;
 	int fd;
 	unsigned long long offset;
-	unsigned long segv_handler = STUB_CODE +
-				     (unsigned long) stub_segv_handler -
-				     (unsigned long) __syscall_stub_start;
 
-	ptrace(PTRACE_TRACEME, 0, 0, 0);
+	if (!using_seccomp)
+		ptrace(PTRACE_TRACEME, 0, 0, 0);
+
+	/* Needed for seccomp, but this is sane anyway. */
+	prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 
 	signal(SIGTERM, SIG_DFL);
 	signal(SIGWINCH, SIG_IGN);
@@ -233,17 +293,130 @@ static int userspace_tramp(void *stack)
 	data = (void *) addr;
 
 	set_sigstack((void *) &data->sigstack, sizeof(data->sigstack));
-	sigemptyset(&sa.sa_mask);
-	sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
-	sa.sa_sigaction = (void *) segv_handler;
-	sa.sa_restorer = NULL;
-	if (sigaction(SIGSEGV, &sa, NULL) < 0) {
-		os_info("%s - setting SIGSEGV handler failed - errno = %d\n",
-			__func__, errno);
-		exit(1);
+
+	if (using_seccomp) {
+		struct rlimit lim;
+		struct sock_filter filter[] = {
+#if __BITS_PER_LONG > 32
+			/* [0] Load upper 32bit of instruction pointer from seccomp_data */
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				(offsetof(struct seccomp_data, instruction_pointer) + 4)),
+
+			/* [1] Jump forward 4 instructions if the upper address is not identical */
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (STUB_CODE) >> 32, 0, 4),
+#endif
+			/* [2] Load lower 32bit of instruction pointer from seccomp_data */
+			BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+				(offsetof(struct seccomp_data, instruction_pointer))),
+
+			/* [3] Mask out lower bits */
+			BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xfffff000),
+
+			/* [4] Jump to [6] if the lower bits are not on the expected page */
+			BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (STUB_CODE) & 0xfffff000, 0, 1),
+
+			/* [5] Permitted call, allow */
+			BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+
+			/* [6] Restricted call, replace with SIGSYS */
+			BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP),
+		};
+		struct sock_fprog prog = {
+			.len = ARRAY_SIZE(filter),
+			.filter = filter,
+		};
+
+		/*
+		 * With seccomp we return normally from the signal handler, so
+		 * avoid setting things up through libc which may do its own
+		 * thing for restoring.
+		 */
+		struct sigaction_real {
+			void *sa_handler_;
+			unsigned long sa_flags;
+			void *sa_restorer;
+			sigset_t sa_mask;
+		} sa;
+
+		unsigned long v = STUB_CODE +
+				  (unsigned long) stub_signal_interrupt -
+				  (unsigned long) __syscall_stub_start;
+		unsigned long r = STUB_CODE +
+				  (unsigned long) stub_signal_restorer -
+				  (unsigned long) __syscall_stub_start;
+
+		/* Never coredump */
+		lim.rlim_cur = 0;
+		lim.rlim_max = 0;
+		if (setrlimit(RLIMIT_CORE, &lim) < 0) {
+			os_info("Could not set coredump size limit, errno = %d\n",
+				errno);
+			exit(1);
+		}
+
+		sigemptyset(&sa.sa_mask);
+		sigaddset(&sa.sa_mask, SIGALRM);
+		sigaddset(&sa.sa_mask, SIGCHLD);
+		sa.sa_flags = SA_ONSTACK | SA_SIGINFO | 0x04000000; /* SA_RESTORER */
+		sa.sa_handler_ = (void *)v;
+		sa.sa_restorer = (void *)r;
+		if (syscall(__NR_rt_sigaction, SIGSEGV, &sa, NULL, 8) < 0) {
+			os_info("%s - setting SIGSEGV handler failed - errno = %d\n",
+				__func__, errno);
+			exit(1);
+		}
+
+		if (syscall(__NR_rt_sigaction, SIGSYS, &sa, NULL, 8) < 0) {
+			os_info("%s - setting SIGSYS handler failed - errno = %d\n",
+				__func__, errno);
+			exit(1);
+		}
+
+		if (syscall(__NR_rt_sigaction, SIGALRM, &sa, NULL, 8) < 0) {
+			os_info("%s - setting SIGALRM handler failed - errno = %d\n",
+				__func__, errno);
+			exit(1);
+		}
+
+		if (syscall(__NR_rt_sigaction, SIGTRAP, &sa, NULL, 8) < 0) {
+			os_info("%s - setting SIGTRAP handler failed - errno = %d\n",
+				__func__, errno);
+			exit(1);
+		}
+
+		if (syscall(__NR_rt_sigaction, SIGFPE, &sa, NULL, 8) < 0) {
+			os_info("%s - setting SIGFPE handler failed - errno = %d\n",
+				__func__, errno);
+			exit(1);
+		}
+
+		if (syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
+			    SECCOMP_FILTER_FLAG_TSYNC, &prog) != 0) {
+			os_info("%s - could not install seccomp filter - errno = %d\n",
+				__func__, errno);
+			exit(42);
+		}
+
+		trap_myself();
+	} else {
+		struct sigaction sa;
+		unsigned long segv_handler = STUB_CODE +
+					     (unsigned long) stub_segv_handler -
+					     (unsigned long) __syscall_stub_start;
+
+		sigemptyset(&sa.sa_mask);
+		sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
+		sa.sa_sigaction = (void *) segv_handler;
+		sa.sa_restorer = NULL;
+		if (sigaction(SIGSEGV, &sa, NULL) < 0) {
+			os_info("%s - setting SIGSEGV handler failed - errno = %d\n",
+				__func__, errno);
+			exit(1);
+		}
+
+		kill(os_getpid(), SIGSTOP);
 	}
 
-	kill(os_getpid(), SIGSTOP);
 	return 0;
 }
 
@@ -269,6 +442,7 @@ int start_userspace(struct mm_id *id)
 	void *stack;
 	unsigned long sp;
 	int status, n, flags, err;
+	struct stub_data *proc_data = (void *) id->stack;
 
 	/* setup a temporary stack page */
 	stack = mmap(NULL, UM_KERN_PAGE_SIZE,
@@ -286,6 +460,9 @@ int start_userspace(struct mm_id *id)
 
 	flags = CLONE_FILES | SIGCHLD;
 
+	if (using_seccomp)
+		proc_data->futex = FUTEX_IN_CHILD;
+
 	/* clone into new userspace process */
 	id->u.pid = clone(userspace_tramp, (void *) sp, flags, (void *) id->stack);
 	if (id->u.pid < 0) {
@@ -295,29 +472,33 @@ int start_userspace(struct mm_id *id)
 		return err;
 	}
 
-	do {
-		CATCH_EINTR(n = waitpid(id->u.pid, &status, WUNTRACED | __WALL));
-		if (n < 0) {
+	if (using_seccomp) {
+		wait_stub_done_seccomp(id->u.pid, proc_data, 1);
+	} else {
+		do {
+			CATCH_EINTR(n = waitpid(id->u.pid, &status, WUNTRACED | __WALL));
+			if (n < 0) {
+				err = -errno;
+				printk(UM_KERN_ERR "%s : wait failed, errno = %d\n",
+				       __func__, errno);
+				goto out_kill;
+			}
+		} while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
+
+		if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
+			err = -EINVAL;
+			printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n",
+			       __func__, status);
+			goto out_kill;
+		}
+
+		if (ptrace(PTRACE_SETOPTIONS, id->u.pid, NULL,
+			   (void *) PTRACE_O_TRACESYSGOOD) < 0) {
 			err = -errno;
-			printk(UM_KERN_ERR "%s : wait failed, errno = %d\n",
+			printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
 			       __func__, errno);
 			goto out_kill;
 		}
-	} while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
-
-	if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
-		err = -EINVAL;
-		printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n",
-		       __func__, status);
-		goto out_kill;
-	}
-
-	if (ptrace(PTRACE_SETOPTIONS, id->u.pid, NULL,
-		   (void *) PTRACE_O_TRACESYSGOOD) < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
-		       __func__, errno);
-		goto out_kill;
 	}
 
 	if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) {
@@ -337,7 +518,9 @@ int start_userspace(struct mm_id *id)
 void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 {
 	int err, status, op, pid = userspace_pid[0];
-	siginfo_t si;
+	siginfo_t si_ptrace;
+	siginfo_t *si;
+	int sig;
 
 	/* Handle any immediate reschedules or signals */
 	interrupt_end();
@@ -346,94 +529,166 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 		if (kill_userspace_mm[0])
 			fatal_sigsegv();
 
-		/*
-		 * This can legitimately fail if the process loads a
-		 * bogus value into a segment register.  It will
-		 * segfault and PTRACE_GETREGS will read that value
-		 * out of the process.  However, PTRACE_SETREGS will
-		 * fail.  In this case, there is nothing to do but
-		 * just kill the process.
-		 */
-		if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
-			printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n",
-			       __func__, errno);
-			fatal_sigsegv();
-		}
+		if (using_seccomp) {
+			struct stub_data *proc_data = (void *)current_stub_stack();
+			int ret;
 
-		if (put_fp_registers(pid, regs->fp)) {
-			printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n",
-			       __func__, errno);
-			fatal_sigsegv();
-		}
+			ret = set_stub_state(regs, proc_data, singlestepping());
+			if (ret) {
+				printk(UM_KERN_ERR "%s - failed to set regs: %d",
+				       __func__, ret);
+				fatal_sigsegv();
+			}
 
-		if (singlestepping())
-			op = PTRACE_SYSEMU_SINGLESTEP;
-		else
-			op = PTRACE_SYSEMU;
+			/* Must have been reset by the syscall caller */
+			if (proc_data->restart_wait != 0)
+				panic("Programming error: Flag to only run syscalls in child was not cleared!");
+
+			proc_data->signal = 0;
+			proc_data->futex = FUTEX_IN_CHILD;
+			CATCH_EINTR(syscall(__NR_futex, &proc_data->futex,
+					    FUTEX_WAKE, 1, NULL, NULL, 0));
+			do {
+				ret = syscall(__NR_futex, &proc_data->futex,
+					      FUTEX_WAIT, FUTEX_IN_CHILD, NULL, NULL, 0);
+			} while ((ret == -1 && errno == EINTR) ||
+				 proc_data->futex == FUTEX_IN_CHILD);
+
+			sig = proc_data->signal;
+
+			ret = get_stub_state(regs, proc_data);
+			if (ret) {
+				printk(UM_KERN_ERR "%s - failed to get regs: %d",
+				       __func__, ret);
+				fatal_sigsegv();
+			}
 
-		if (ptrace(op, pid, 0, 0)) {
-			printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
-			       __func__, op, errno);
-			fatal_sigsegv();
-		}
+			if (proc_data->si_offset > sizeof(proc_data->sigstack) - sizeof(*si))
+				panic("%s - Invalid siginfo offset from child",
+				      __func__);
+			si = (void *)&proc_data->sigstack[proc_data->si_offset];
+
+			if (sig == SIGSEGV && si->si_code == SI_KERNEL) {
+				/* This happens if the host is unable to
+				 * restore the state from the mcontext.
+				 */
+				panic("%s - SEGV with si_code == SI_KERNEL, faulted while returning to userspace (addr: 0x%lx)",
+				      __func__, (unsigned long) si->si_addr);
+			}
 
-		CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
-		if (err < 0) {
-			printk(UM_KERN_ERR "%s - wait failed, errno = %d\n",
-			       __func__, errno);
-			fatal_sigsegv();
-		}
+			regs->is_user = 1;
 
-		regs->is_user = 1;
-		if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
-			printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n",
-			       __func__, errno);
-			fatal_sigsegv();
-		}
+			/* Fill in ORIG_RAX and extract fault information */
+			PT_SYSCALL_NR(regs->gp) = si->si_syscall;
+			if (sig == SIGSEGV) {
+				mcontext_t *mcontext = (void *)&proc_data->sigstack[proc_data->mctx_offset];
 
-		if (get_fp_registers(pid, regs->fp)) {
-			printk(UM_KERN_ERR "%s -  get_fp_registers failed, errno = %d\n",
-			       __func__, errno);
-			fatal_sigsegv();
-		}
+				GET_FAULTINFO_FROM_MC(regs->faultinfo, mcontext);
+			}
+		} else {
+			/*
+			 * This can legitimately fail if the process loads a
+			 * bogus value into a segment register.  It will
+			 * segfault and PTRACE_GETREGS will read that value
+			 * out of the process.  However, PTRACE_SETREGS will
+			 * fail.  In this case, there is nothing to do but
+			 * just kill the process.
+			 */
+			if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
+				printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n",
+				       __func__, errno);
+				fatal_sigsegv();
+			}
 
-		UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
+			if (put_fp_registers(pid, regs->fp)) {
+				printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n",
+				       __func__, errno);
+				fatal_sigsegv();
+			}
 
-		if (WIFSTOPPED(status)) {
-			int sig = WSTOPSIG(status);
+			if (singlestepping())
+				op = PTRACE_SYSEMU_SINGLESTEP;
+			else
+				op = PTRACE_SYSEMU;
 
-			/* These signal handlers need the si argument.
-			 * The SIGIO and SIGALARM handlers which constitute the
-			 * majority of invocations, do not use it.
-			 */
-			switch (sig) {
-			case SIGSEGV:
-			case SIGTRAP:
-			case SIGILL:
-			case SIGBUS:
-			case SIGFPE:
-			case SIGWINCH:
-				ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si);
-				break;
+			if (ptrace(op, pid, 0, 0)) {
+				printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
+				       __func__, op, errno);
+				fatal_sigsegv();
 			}
 
+			CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
+			if (err < 0) {
+				printk(UM_KERN_ERR "%s - wait failed, errno = %d\n",
+				       __func__, errno);
+				fatal_sigsegv();
+			}
+
+			regs->is_user = 1;
+			if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
+				printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n",
+				       __func__, errno);
+				fatal_sigsegv();
+			}
+
+			if (get_fp_registers(pid, regs->fp)) {
+				printk(UM_KERN_ERR "%s -  get_fp_registers failed, errno = %d\n",
+				       __func__, errno);
+				fatal_sigsegv();
+			}
+
+			if (WIFSTOPPED(status)) {
+				sig = WSTOPSIG(status);
+
+				/* These signal handlers need the si argument
+				 * and SIGSEGV needs the faultinfo.
+				 * The SIGIO and SIGALARM handlers which constitute the
+				 * majority of invocations, do not use it.
+				 */
+				switch (sig) {
+				case SIGSEGV:
+					get_skas_faultinfo(pid,
+							   &regs->faultinfo,
+							   aux_fp_regs);
+					fallthrough;
+				case SIGTRAP:
+				case SIGILL:
+				case SIGBUS:
+				case SIGFPE:
+				case SIGWINCH:
+					ptrace(PTRACE_GETSIGINFO, pid, 0,
+					       (struct siginfo *)&si_ptrace);
+					si = &si_ptrace;
+					break;
+				default:
+					si = NULL;
+					break;
+				}
+			} else {
+				sig = 0;
+			}
+		}
+
+		UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
+
+		if (sig) {
 			switch (sig) {
 			case SIGSEGV:
-				get_skas_faultinfo(pid,
-						   &regs->faultinfo, aux_fp_regs);
-
-				if (PTRACE_FULL_FAULTINFO)
-					(*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si,
-							     regs);
+				if (using_seccomp || PTRACE_FULL_FAULTINFO)
+					(*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)si,
+						     regs);
 				else
 					segv(regs->faultinfo, 0, 1, NULL);
 
+				break;
+			case SIGSYS:
+				handle_syscall(regs);
 				break;
 			case SIGTRAP + 0x80:
 				handle_trap(pid, regs);
 				break;
 			case SIGTRAP:
-				relay_signal(SIGTRAP, (struct siginfo *)&si, regs);
+				relay_signal(SIGTRAP, (struct siginfo *)si, regs);
 				break;
 			case SIGALRM:
 				break;
@@ -443,7 +698,7 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
 			case SIGFPE:
 			case SIGWINCH:
 				block_signals_trace();
-				(*sig_info[sig])(sig, (struct siginfo *)&si, regs);
+				(*sig_info[sig])(sig, (struct siginfo *)si, regs);
 				unblock_signals_trace();
 				break;
 			default:
@@ -467,9 +722,14 @@ static int __init init_thread_regs(void)
 {
 	get_safe_registers(thread_regs.gp, thread_regs.fp);
 	/* Set parent's instruction pointer to start of clone-stub */
-	thread_regs.gp[REGS_IP_INDEX] = STUB_CODE +
-					(unsigned long)stub_clone_handler -
-					(unsigned long)__syscall_stub_start;
+	if (using_seccomp)
+		thread_regs.gp[REGS_IP_INDEX] = STUB_CODE +
+				(unsigned long)stub_clone_handler_seccomp -
+				(unsigned long)__syscall_stub_start;
+	else
+		thread_regs.gp[REGS_IP_INDEX] = STUB_CODE +
+				(unsigned long)stub_clone_handler -
+				(unsigned long)__syscall_stub_start;
 
 	/* syscall data as a temporary stack area (top half). */
 	thread_regs.gp[REGS_SP_INDEX] = STUB_DATA +
@@ -493,45 +753,55 @@ int copy_context_skas0(struct mm_id *id, struct mm_id *from)
 	 * prepare offset and fd of child's stack as argument for parent's
 	 * and child's mmap2 calls
 	 */
-	*data = ((struct stub_data) {
-		.offset	= MMAP_OFFSET(new_offset),
-		.fd     = new_fd,
-		.err    = -ESRCH,
-		.child_err = 0,
-	});
-
-	*child_data = ((struct stub_data) {
-		.child_err = -ESRCH,
-	});
-
-	err = ptrace_setregs(from->u.pid, thread_regs.gp);
-	if (err < 0) {
-		err = -errno;
-		printk(UM_KERN_ERR "%s : PTRACE_SETREGS failed, pid = %d, errno = %d\n",
-		      __func__, from->u.pid, -err);
-		return err;
-	}
+	data->offset     = MMAP_OFFSET(new_offset);
+	data->fd         = new_fd;
+	data->err        = -ESRCH;
+	data->child_err  = 0;
 
-	err = put_fp_registers(from->u.pid, thread_regs.fp);
-	if (err < 0) {
-		printk(UM_KERN_ERR "%s : put_fp_registers failed, pid = %d, err = %d\n",
-		       __func__, from->u.pid, err);
-		return err;
-	}
+	child_data->child_err = -ESRCH;
 
-	/*
-	 * Wait, until parent has finished its work: read child's pid from
-	 * parent's stack, and check, if bad result.
-	 */
-	err = ptrace(PTRACE_CONT, from->u.pid, 0, 0);
-	if (err) {
-		err = -errno;
-		printk(UM_KERN_ERR "Failed to continue new process, pid = %d, errno = %d\n",
-		       from->u.pid, errno);
-		return err;
-	}
+	if (using_seccomp) {
+		err = set_stub_state(&thread_regs, data, 0);
+		if (err)
+			return err;
+
+		/* The architecture dependent state will be identical */
+		memcpy(&child_data->arch_data, &data->arch_data, sizeof(data->arch_data));
+
+		child_data->futex = FUTEX_IN_CHILD;
+
+		data->restart_wait = 0;
+		wait_stub_done_seccomp(from->u.pid, data, 0);
+	} else {
+		err = ptrace_setregs(from->u.pid, thread_regs.gp);
+		if (err < 0) {
+			err = -errno;
+			printk(UM_KERN_ERR "%s : PTRACE_SETREGS failed, pid = %d, errno = %d\n",
+			      __func__, from->u.pid, -err);
+			return err;
+		}
 
-	wait_stub_done(from->u.pid);
+		err = put_fp_registers(from->u.pid, thread_regs.fp);
+		if (err < 0) {
+			printk(UM_KERN_ERR "%s : put_fp_registers failed, pid = %d, err = %d\n",
+			       __func__, from->u.pid, err);
+			return err;
+		}
+
+		/*
+		 * Wait, until parent has finished its work: read child's pid from
+		 * parent's stack, and check, if bad result.
+		 */
+		err = ptrace(PTRACE_CONT, from->u.pid, 0, 0);
+		if (err) {
+			err = -errno;
+			printk(UM_KERN_ERR "Failed to continue new process, pid = %d, errno = %d\n",
+			       from->u.pid, errno);
+			return err;
+		}
+
+		wait_stub_done(from->u.pid);
+	}
 
 	id->u.pid = data->err;
 	if (id->u.pid < 0) {
@@ -544,7 +814,11 @@ int copy_context_skas0(struct mm_id *id, struct mm_id *from)
 	 * Wait, until child has finished too: read child's result from
 	 * child's stack and check it.
 	 */
-	wait_stub_done(id->u.pid);
+	if (using_seccomp)
+		wait_stub_done_seccomp(id->u.pid, child_data, 1);
+	else
+		wait_stub_done(id->u.pid);
+
 	if (child_data->child_err != STUB_DATA) {
 		printk(UM_KERN_ERR "%s - stub-child %d reports error %ld\n",
 		       __func__, id->u.pid, data->child_err);
@@ -552,7 +826,8 @@ int copy_context_skas0(struct mm_id *id, struct mm_id *from)
 		goto out_kill;
 	}
 
-	if (ptrace(PTRACE_SETOPTIONS, id->u.pid, NULL,
+	if (!using_seccomp &&
+	    ptrace(PTRACE_SETOPTIONS, id->u.pid, NULL,
 		   (void *)PTRACE_O_TRACESYSGOOD) < 0) {
 		err = -errno;
 		printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index f84eb13a0b98..4dd8b959c008 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -413,12 +413,9 @@ void __init os_early_checks(void)
 	using_seccomp = 0;
 
 	if (init_seccomp()) {
-		/* Not fully implemented */
-#if 0
 		using_seccomp = 1;
 
 		return;
-#endif
 	}
 #endif
 
-- 
2.38.1




More information about the linux-um mailing list