[PATCH v4 07/13] x86/um: nommu: process/thread handling
Hajime Tazaki
thehajime at gmail.com
Sun Dec 8 02:15:34 PST 2024
Since ptrace facility isn't used under !MMU of UML, there is different
code path to invoke processes/threads; on an entry to the syscall
interface, the stack pointer should be manipulated to handle vfork(2)
return address, no external process is used, and need to properly
configure some of registers (fs segment register for TLS, etc) on every
context switch, etc.
Signals aren't delivered in non-ptrace syscall entry/leave so, we also
need to handle pending signal by ourselves.
Signed-off-by: Hajime Tazaki <thehajime at gmail.com>
Signed-off-by: Ricardo Koller <ricarkol at google.com>
---
arch/um/os-Linux/process.c | 6 ++++
arch/x86/um/Makefile | 3 +-
arch/x86/um/nommu/Makefile | 2 +-
arch/x86/um/nommu/do_syscall_64.c | 26 +++++++++++++++
arch/x86/um/nommu/entry_64.S | 24 ++++++++++++++
arch/x86/um/nommu/process.c | 35 ++++++++++++++++++++
arch/x86/um/nommu/syscalls_64.c | 44 +++++++++++++++++++++++++
arch/x86/um/shared/sysdep/syscalls_64.h | 1 +
8 files changed, 139 insertions(+), 2 deletions(-)
create mode 100644 arch/x86/um/nommu/process.c
create mode 100644 arch/x86/um/nommu/syscalls_64.c
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index e2dc00fc84c0..49aa8e92205e 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -28,6 +28,9 @@ int unscheduled_userspace_iterations;
void os_alarm_process(int pid)
{
+ if (pid <= 0)
+ return;
+
kill(pid, SIGALRM);
}
@@ -45,6 +48,9 @@ void os_kill_process(int pid, int reap_child)
void os_kill_ptraced_process(int pid, int reap_child)
{
+ if (pid <= 0)
+ return;
+
kill(pid, SIGKILL);
ptrace(PTRACE_KILL, pid);
ptrace(PTRACE_CONT, pid);
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 227af2a987e2..53c9ebb3c41c 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -27,7 +27,8 @@ subarch-y += ../kernel/sys_ia32.o
else
-obj-y += syscalls_64.o vdso/
+obj-y += vdso/
+obj-$(CONFIG_MMU) += syscalls_64.o
subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o \
../lib/memmove_64.o ../lib/memset_64.o
diff --git a/arch/x86/um/nommu/Makefile b/arch/x86/um/nommu/Makefile
index ebe47d4836f4..03865bf1428e 100644
--- a/arch/x86/um/nommu/Makefile
+++ b/arch/x86/um/nommu/Makefile
@@ -5,4 +5,4 @@ else
BITS := 64
endif
-obj-y = do_syscall_$(BITS).o entry_$(BITS).o os-Linux/
+obj-y = do_syscall_$(BITS).o entry_$(BITS).o process.o syscalls_$(BITS).o os-Linux/
diff --git a/arch/x86/um/nommu/do_syscall_64.c b/arch/x86/um/nommu/do_syscall_64.c
index 5d0fa83e7fdc..d5fb849bb34b 100644
--- a/arch/x86/um/nommu/do_syscall_64.c
+++ b/arch/x86/um/nommu/do_syscall_64.c
@@ -6,9 +6,28 @@
#include <sysdep/syscalls.h>
#include <os.h>
+/*
+ * save/restore the return address stored in the stack, as the child overwrites
+ * the contents after returning to userspace (i.e., by push %rdx).
+ *
+ * see the detail in fork_handler().
+ */
+static void vfork_save_stack(void *stack)
+{
+ memcpy(stack,
+ (void *)current->thread.regs.regs.gp[HOST_SP], 8);
+}
+
+static void vfork_restore_stack(void *stack_copy)
+{
+ memcpy((void *)current->thread.regs.regs.gp[HOST_SP],
+ stack_copy, 8);
+}
+
__visible void do_syscall_64(struct pt_regs *regs)
{
int syscall;
+ unsigned long stack_copy;
syscall = PT_SYSCALL_NR(regs->regs.gp);
UPT_SYSCALL_NR(®s->regs) = syscall;
@@ -17,6 +36,9 @@ __visible void do_syscall_64(struct pt_regs *regs)
syscall, (unsigned long)current,
(unsigned long)sys_call_table[syscall]);
+ if (syscall == __NR_vfork)
+ vfork_save_stack(&stack_copy);
+
if (likely(syscall < NR_syscalls)) {
PT_REGS_SET_SYSCALL_RETURN(regs,
EXECUTE_SYSCALL(syscall, regs));
@@ -31,6 +53,10 @@ __visible void do_syscall_64(struct pt_regs *regs)
if (syscall == __NR_execve && regs->regs.gp[HOST_AX] == 0)
userspace(¤t->thread.regs.regs);
+ /* only parents of vfork restores the contents of stack */
+ if (syscall == __NR_vfork && regs->regs.gp[HOST_AX] > 0)
+ vfork_restore_stack(&stack_copy);
+
/* force do_signal() --> is_syscall() */
set_thread_flag(TIF_SIGPENDING);
interrupt_end();
diff --git a/arch/x86/um/nommu/entry_64.S b/arch/x86/um/nommu/entry_64.S
index c612a4c53c6f..cb0642172e00 100644
--- a/arch/x86/um/nommu/entry_64.S
+++ b/arch/x86/um/nommu/entry_64.S
@@ -85,3 +85,27 @@ ENTRY(__kernel_vsyscall)
ret
END(__kernel_vsyscall)
+
+// void userspace(struct uml_pt_regs *regs)
+ENTRY(userspace)
+ /* fixup stack for vfork syscall*/
+ call arch_fixup_stack
+
+ /* align the stack for x86_64 ABI */
+ and $-0x10, %rsp
+ /* Handle any immediate reschedules or signals */
+ call interrupt_end
+
+ movq current_ptregs, %rsp
+
+ POP_REGS
+
+ addq $8, %rsp /* skip orig_ax */
+ popq %r11 /* pt_regs->ip */
+ addq $8, %rsp /* skip cs */
+ addq $8, %rsp /* skip flags */
+ popq %rsp
+
+ jmp *%r11
+
+END(userspace)
diff --git a/arch/x86/um/nommu/process.c b/arch/x86/um/nommu/process.c
new file mode 100644
index 000000000000..ad5487796329
--- /dev/null
+++ b/arch/x86/um/nommu/process.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <asm/unistd.h>
+#include <sysdep/syscalls.h>
+#include <os.h>
+
+void arch_fixup_stack(struct uml_pt_regs *regs)
+{
+ /*
+ * child of vfork(2) comes here.
+ * clone(2) also enters here but doesn't need to advance the %rsp.
+ *
+ * This fork can only come from libc's vfork, which
+ * does this:
+ * pop %%rdx;
+ * mov $58,%eax
+ * syscall ; // hook => __kernel_vsyscall
+ * push %%rdx;
+ * %rcx stores the return address which is stored
+ * at pt_regs[HOST_IP] at the moment. As child returns
+ * via userspace() with a jmp instruction (while parent
+ * does via ret instruction in __kernel_vsyscall), we
+ * need to pop (advance) the pushed address by "call"
+ * though, so this is what this next line does.
+ *
+ * As a result of vfork return in child, stack contents
+ * is overwritten by child (by pushq in vfork), which
+ * makes the parent puzzled after child returns.
+ *
+ * thus the contents should be restored before vfork/parent
+ * returns. this is done in do_syscall_64().
+ */
+ if (regs->gp[HOST_ORIG_AX] == __NR_vfork)
+ regs->gp[REGS_SP_INDEX] += 8;
+}
diff --git a/arch/x86/um/nommu/syscalls_64.c b/arch/x86/um/nommu/syscalls_64.c
new file mode 100644
index 000000000000..c78c442aed1d
--- /dev/null
+++ b/arch/x86/um/nommu/syscalls_64.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2003 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Copyright 2003 PathScale, Inc.
+ *
+ * Licensed under the GPL
+ */
+
+#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <asm/prctl.h> /* XXX This should get the constants from libc */
+#include <registers.h>
+#include <os.h>
+#include "syscalls.h"
+
+void arch_switch_to(struct task_struct *to)
+{
+ /*
+ * In !CONFIG_MMU, it doesn't ptrace thus,
+ * The FS_BASE/GS_BASE registers are saved here.
+ */
+ current_top_of_stack = task_top_of_stack(to);
+ current_ptregs = (long)task_pt_regs(to);
+
+ if ((to->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)] == 0) ||
+ (to->mm == NULL))
+ return;
+
+ /* this changes the FS on every context switch */
+ arch_prctl(to, ARCH_SET_FS,
+ (void __user *) to->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)]);
+}
+
+SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
+ unsigned long, prot, unsigned long, flags,
+ unsigned long, fd, unsigned long, off)
+{
+ if (off & ~PAGE_MASK)
+ return -EINVAL;
+
+ return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
+}
diff --git a/arch/x86/um/shared/sysdep/syscalls_64.h b/arch/x86/um/shared/sysdep/syscalls_64.h
index ffd80ee3b9dc..9831ba8f5bcd 100644
--- a/arch/x86/um/shared/sysdep/syscalls_64.h
+++ b/arch/x86/um/shared/sysdep/syscalls_64.h
@@ -29,6 +29,7 @@ extern syscall_handler_t sys_arch_prctl;
extern void do_syscall_64(struct pt_regs *regs);
extern long __kernel_vsyscall(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
int64_t a4, int64_t a5, int64_t a6);
+extern void arch_fixup_stack(struct uml_pt_regs *regs);
#endif
#endif
--
2.43.0
More information about the linux-um
mailing list