[PATCH 3/5] um: Do a double clone to disable rseq
Tiwei Bie
tiwei.btw at antgroup.com
Tue May 28 03:16:38 PDT 2024
On 5/28/24 4:54 PM, benjamin at sipsolutions.net wrote:
> From: Benjamin Berg <benjamin.berg at intel.com>
>
> Newer glibc versions are enabling rseq support by default. This remains
> enabled in the cloned child process, potentially causing the host kernel
> to write/read memory in the child.
>
> It appears that this was purely not an issue because the used memory
> area happened to be above TASK_SIZE and remains mapped.
I also encountered this issue. In my case, with "Force a static link"
(CONFIG_STATIC_LINK) enabled, UML will crash immediately every time
it starts up. I worked around this by setting the glibc.pthread.rseq
tunable via GLIBC_TUNABLES [1] before launching UML.
So another easy way to work around this issue without introducing runtime
overhead might be to add the GLIBC_TUNABLES=glibc.pthread.rseq=0 environment
variable and exec /proc/self/exe in UML on startup.
[1] https://www.gnu.org/software/libc/manual/html_node/Tunables.html
Regards,
Tiwei
>
> Note that a better approach would be to exec a small static binary that
> does not link with other libraries. Using a memfd and execveat the
> binary could be embedded into UML itself and it would result in an
> entirely clean execution environment for userspace.
>
> Signed-off-by: Benjamin Berg <benjamin.berg at intel.com>
> ---
> arch/um/os-Linux/skas/process.c | 54 ++++++++++++++++++++++++++++++---
> 1 file changed, 50 insertions(+), 4 deletions(-)
>
> diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
> index 41a288dcfc34..ee332a2aeea6 100644
> --- a/arch/um/os-Linux/skas/process.c
> +++ b/arch/um/os-Linux/skas/process.c
> @@ -255,6 +255,31 @@ static int userspace_tramp(void *stack)
> int userspace_pid[NR_CPUS];
> int kill_userspace_mm[NR_CPUS];
>
> +struct tramp_data {
> + int pid;
> + void *clone_sp;
> + void *stack;
> +};
> +
> +static int userspace_tramp_clone_vm(void *data)
> +{
> + struct tramp_data *tramp_data = data;
> +
> + /*
> + * This helper exist to do a double-clone. First with CLONE_VM which
> + * effectively disables things like rseq, and then the second one to
> + * get a new memory space.
> + */
> +
> + tramp_data->pid = clone(userspace_tramp, tramp_data->clone_sp,
> + CLONE_PARENT | CLONE_FILES | SIGCHLD,
> + tramp_data->stack);
> + if (tramp_data->pid < 0)
> + tramp_data->pid = -errno;
> +
> + exit(0);
> +}
> +
> /**
> * start_userspace() - prepare a new userspace process
> * @stub_stack: pointer to the stub stack.
> @@ -268,9 +293,10 @@ int kill_userspace_mm[NR_CPUS];
> */
> int start_userspace(unsigned long stub_stack)
> {
> + struct tramp_data tramp_data;
> void *stack;
> unsigned long sp;
> - int pid, status, n, flags, err;
> + int pid, status, n, err;
>
> /* setup a temporary stack page */
> stack = mmap(NULL, UM_KERN_PAGE_SIZE,
> @@ -286,10 +312,13 @@ int start_userspace(unsigned long stub_stack)
> /* set stack pointer to the end of the stack page, so it can grow downwards */
> sp = (unsigned long)stack + UM_KERN_PAGE_SIZE;
>
> - flags = CLONE_FILES | SIGCHLD;
> + tramp_data.stack = (void *) stub_stack;
> + tramp_data.clone_sp = (void *) sp;
> + tramp_data.pid = -EINVAL;
>
> /* clone into new userspace process */
> - pid = clone(userspace_tramp, (void *) sp, flags, (void *) stub_stack);
> + pid = clone(userspace_tramp_clone_vm, (void *) sp,
> + CLONE_VM | CLONE_FILES | SIGCHLD, &tramp_data);
> if (pid < 0) {
> err = -errno;
> printk(UM_KERN_ERR "%s : clone failed, errno = %d\n",
> @@ -305,7 +334,24 @@ int start_userspace(unsigned long stub_stack)
> __func__, errno);
> goto out_kill;
> }
> - } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
> + } while (!WIFEXITED(status));
> +
> + pid = tramp_data.pid;
> + if (pid < 0) {
> + printk(UM_KERN_ERR "%s : second clone failed, errno = %d\n",
> + __func__, -pid);
> + return pid;
> + }
> +
> + do {
> + CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL));
> + if (n < 0) {
> + err = -errno;
> + printk(UM_KERN_ERR "%s : wait failed, errno = %d\n",
> + __func__, errno);
> + goto out_kill;
> + }
> + } while (WIFEXITED(status) && (WSTOPSIG(status) == SIGALRM));
>
> if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
> err = -EINVAL;
More information about the linux-um
mailing list