[RFC PATCH 10/13] x86/um: nommu: stack save/restore on vfork
Hajime Tazaki
thehajime at gmail.com
Thu Oct 24 05:09:18 PDT 2024
This fork can only come from libc's vfork, which does this:
popq %%rdx;
call *%rax; // zpoline => __kernel_vsyscall
pushq %%rdx;
%rcx stores the return address which is stored at
pt_regs[HOST_IP] at the moment. As child returns via
userspace() with a jmp instruction (while parent does via ret
instruction in __kernel_vsyscall), we need to pop (advance)
the pushed address by "call".
As a result of vfork return in child, stack contents is overwritten
by child (by pushq in vfork), which makes the parent puzzled after
child returns. thus the contents should be restored before
vfork/parent returns. this is done in do_syscall_64().
Signed-off-by: Hajime Tazaki <thehajime at gmail.com>
Signed-off-by: Ricardo Koller <ricarkol at google.com>
---
arch/um/kernel/process.c | 22 +++++++++++++++++-----
arch/x86/um/do_syscall_64.c | 36 ++++++++++++++++++++++++++++++++++++
2 files changed, 53 insertions(+), 5 deletions(-)
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 270b5bd476be..134687530e5f 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -140,15 +140,27 @@ static void fork_handler(void)
#ifndef CONFIG_MMU
/*
+ * child of vfork(2) comes here.
+ * clone(2) also enters here but doesn't need to advance the %rsp.
+ *
* This fork can only come from libc's vfork, which
* does this:
* popq %%rdx;
- * call *%0; // vsyscall
+ * call *%rax; // zpoline => __kernel_vsyscall
* pushq %%rdx;
- * %rdx stores the return address which is stored
- * at pt_regs[HOST_IP] at the moment. We still
- * need to pop the pushed address by "call" though,
- * so this is what this next line does.
+ * %rcx stores the return address which is stored
+ * at pt_regs[HOST_IP] at the moment. As child returns
+ * via userspace() with a jmp instruction (while parent
+ * does via ret instruction in __kernel_vsyscall), we
+ * need to pop (advance) the pushed address by "call"
+ * though, so this is what this next line does.
+ *
+ * As a result of vfork return in child, stack contents
+ * is overwritten by child (by pushq in vfork), which
+ * makes the parent puzzled after child returns.
+ *
+ * thus the contents should be restored before vfork/parent
+ * returns. this is done in do_syscall_64().
*/
if (current->thread.regs.regs.gp[HOST_ORIG_AX] == __NR_vfork)
current->thread.regs.regs.gp[REGS_SP_INDEX] += 8;
diff --git a/arch/x86/um/do_syscall_64.c b/arch/x86/um/do_syscall_64.c
index 594248f7319c..4235259bbee1 100644
--- a/arch/x86/um/do_syscall_64.c
+++ b/arch/x86/um/do_syscall_64.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
+//#define DEBUG 1
#include <linux/kernel.h>
#include <linux/ptrace.h>
#include <asm/fsgsbase.h>
@@ -34,9 +35,38 @@ static int os_x86_arch_prctl(int pid, int option, unsigned long *arg2)
return 0;
}
+/*
+ * save/restore the return address stored in the stack, as the child overwrites
+ * the contents after returning to userspace (i.e., by push %rdx).
+ *
+ * see the detail in fork_handler().
+ */
+static void *vfork_save_stack(void)
+{
+ unsigned char *stack_copy;
+
+ stack_copy = kzalloc(PAGE_SIZE << THREAD_SIZE_ORDER,
+ GFP_KERNEL);
+ if (!stack_copy)
+ return NULL;
+
+ memcpy(stack_copy,
+ (void *)current->thread.regs.regs.gp[HOST_SP], 8);
+
+ return stack_copy;
+}
+
+static void vfork_restore_stack(void *stack_copy)
+{
+ WARN_ON_ONCE(!stack_copy);
+ memcpy((void *)current->thread.regs.regs.gp[HOST_SP],
+ stack_copy, 8);
+}
+
__visible void do_syscall_64(struct pt_regs *regs)
{
int syscall;
+ unsigned char *stack_copy = NULL;
syscall = PT_SYSCALL_NR(regs->regs.gp);
UPT_SYSCALL_NR(®s->regs) = syscall;
@@ -45,6 +75,9 @@ __visible void do_syscall_64(struct pt_regs *regs)
syscall, (unsigned long)current,
(unsigned long)sys_call_table[syscall]);
+ if (syscall == __NR_vfork)
+ stack_copy = vfork_save_stack();
+
/* set fs register to the original host one */
os_x86_arch_prctl(0, ARCH_SET_FS, (void *)host_fs);
@@ -72,6 +105,9 @@ __visible void do_syscall_64(struct pt_regs *regs)
userspace(¤t->thread.regs.regs,
current_thread_info()->aux_fp_regs);
}
+ /* only parents of vfork restores the contents of stack */
+ if (syscall == __NR_vfork && regs->regs.gp[HOST_AX] > 0)
+ vfork_restore_stack(stack_copy);
}
#endif
--
2.43.0
More information about the linux-um
mailing list