[PATCH v3 3/5] um: Do a double clone to disable rseq
benjamin at sipsolutions.net
benjamin at sipsolutions.net
Wed Jun 12 07:10:45 PDT 2024
From: Benjamin Berg <benjamin.berg at intel.com>
Newer glibc versions are enabling rseq support by default. This remains
enabled in the cloned child process, potentially causing the host kernel
to write/read memory in the child.
It appears that this was purely not an issue because the used memory
area happened to be above TASK_SIZE and remains mapped.
Note that a better approach would be to exec a small static binary that
does not link with other libraries. Using a memfd and execveat the
binary could be embedded into UML itself and it would result in an
entirely clean execution environment for userspace.
Signed-off-by: Benjamin Berg <benjamin.berg at intel.com>
---
v2: Improved clone logic using CLONE_VFORK
v3: Undo incorrect change in child wait loop
---
arch/um/os-Linux/skas/process.c | 53 ++++++++++++++++++++++++++++++---
1 file changed, 49 insertions(+), 4 deletions(-)
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 41a288dcfc34..93d3d44337cd 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -255,6 +255,32 @@ static int userspace_tramp(void *stack)
int userspace_pid[NR_CPUS];
int kill_userspace_mm[NR_CPUS];
+struct tramp_data {
+ int pid;
+ void *clone_sp;
+ void *stack;
+};
+
+static int userspace_tramp_clone_vm(void *data)
+{
+ struct tramp_data *tramp_data = data;
+
+ /*
+ * At this point we are still in the same VM as the parent, but rseq
+ * has been disabled for this process.
+ * Continue with the clone into the new userspace process, the kernel
+ * continues as soon as this process quits (CLONE_VFORK).
+ */
+
+ tramp_data->pid = clone(userspace_tramp, tramp_data->clone_sp,
+ CLONE_PARENT | CLONE_FILES | SIGCHLD,
+ tramp_data->stack);
+ if (tramp_data->pid < 0)
+ tramp_data->pid = -errno;
+
+ exit(0);
+}
+
/**
* start_userspace() - prepare a new userspace process
* @stub_stack: pointer to the stub stack.
@@ -268,9 +294,10 @@ int kill_userspace_mm[NR_CPUS];
*/
int start_userspace(unsigned long stub_stack)
{
+ struct tramp_data tramp_data;
void *stack;
unsigned long sp;
- int pid, status, n, flags, err;
+ int pid, status, n, err;
/* setup a temporary stack page */
stack = mmap(NULL, UM_KERN_PAGE_SIZE,
@@ -286,10 +313,13 @@ int start_userspace(unsigned long stub_stack)
/* set stack pointer to the end of the stack page, so it can grow downwards */
sp = (unsigned long)stack + UM_KERN_PAGE_SIZE;
- flags = CLONE_FILES | SIGCHLD;
+ tramp_data.stack = (void *) stub_stack;
+ tramp_data.clone_sp = (void *) sp;
+ tramp_data.pid = -EINVAL;
- /* clone into new userspace process */
- pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack);
+ /* first stage CLONE_VM clone using VFORK and no signal notification */
+ pid = clone(userspace_tramp_clone_vm, (void *) sp,
+ CLONE_VM | CLONE_FILES | CLONE_VFORK, &tramp_data);
if (pid < 0) {
err = -errno;
printk(UM_KERN_ERR "%s : clone failed, errno = %d\n",
@@ -297,6 +327,21 @@ int start_userspace(unsigned long stub_stack)
return err;
}
+ n = waitpid(pid, &status, WUNTRACED | WNOHANG | __WCLONE);
+ if (n < 0 || !WIFEXITED(status) || WEXITSTATUS(status)) {
+ err = -errno;
+ printk(UM_KERN_ERR "%s : wait failed, errno = %d, status = %d\n",
+ __func__, n < 0 ? errno : 0, status);
+ goto out_kill;
+ }
+
+ pid = tramp_data.pid;
+ if (pid < 0) {
+ printk(UM_KERN_ERR "%s : second clone failed, errno = %d\n",
+ __func__, -pid);
+ return pid;
+ }
+
do {
CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL));
if (n < 0) {
--
2.45.1
More information about the linux-um
mailing list