[RFC PATCH v2 08/13] um: nommu: configure fs register on host syscall invocation
Benjamin Berg
benjamin at sipsolutions.net
Wed Nov 27 02:00:11 PST 2024
Hi,
On Mon, 2024-11-11 at 15:27 +0900, Hajime Tazaki wrote:
> As userspace on UML/!MMU also need to configure %fs register when it is
> running to correctly access thread structure, host syscalls implemented
> in os-Linux drivers may be puzzled when they are called. Thus it has to
> configure %fs register via arch_prctl(SET_FS) on every host syscalls.
>
> [SNIP]
> +
> +/**
> + * get_host_cpu_features() return true with X86_FEATURE_FSGSBASE even
> + * if the kernel is older and disabled using fsgsbase instruction.
> + * thus detection is based on whether SIGILL is raised or not.
> + */
> +static jmp_buf jmpbuf;
> +static void sigill(int sig, siginfo_t *si, void *ctx_void)
> +{
> + siglongjmp(jmpbuf, 1);
> +}
> +
> +void __init check_fsgsbase(void)
> +{
> + unsigned long fsbase;
> + struct sigaction sa;
> +
> + /* Probe FSGSBASE */
> + memset(&sa, 0, sizeof(sa));
> + sa.sa_sigaction = sigill;
> + sa.sa_flags = SA_SIGINFO | SA_RESETHAND;
> + sigemptyset(&sa.sa_mask);
> + if (sigaction(SIGILL, &sa, 0))
> + os_warn("sigaction");
> +
> + os_info("Checking FSGSBASE instructions...");
> + if (sigsetjmp(jmpbuf, 0) == 0) {
> + asm volatile("rdfsbase %0" : "=r" (fsbase) :: "memory");
> + host_has_fsgsbase = 1;
> + os_info("OK\n");
> + } else {
> + host_has_fsgsbase = 0;
> + os_info("disabled\n");
> + }
> +}
According to Documentation/arch/x86/x86_64/fsgs.rst it looks like this
can also be checked using the HWCAP2_FSGSBASE bit in AT_HWCAP2.
Maybe that is a bit simpler?
> [SNIP]
>
> __visible void do_syscall_64(struct pt_regs *regs)
> {
> int syscall;
> @@ -49,6 +76,9 @@ __visible void do_syscall_64(struct pt_regs *regs)
> if (syscall == __NR_vfork)
> stack_copy = vfork_save_stack();
>
> + /* set fs register to the original host one */
> + os_x86_arch_prctl(0, ARCH_SET_FS, (void *)host_fs);
> +
> if (likely(syscall < NR_syscalls)) {
> PT_REGS_SET_SYSCALL_RETURN(regs,
> EXECUTE_SYSCALL(syscall, regs));
> @@ -63,6 +93,11 @@ __visible void do_syscall_64(struct pt_regs *regs)
> set_thread_flag(TIF_SIGPENDING);
> interrupt_end();
>
> + /* restore back fs register to userspace configured one */
> + os_x86_arch_prctl(0, ARCH_SET_FS,
> + (void *)(current->thread.regs.regs.gp[FS_BASE
> + / sizeof(unsigned long)]));
> +
> /* execve succeeded */
> if (syscall == __NR_execve && regs->regs.gp[HOST_AX] == 0)
> userspace(¤t->thread.regs.regs);
> diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c
> index edb17fc73e07..d56df936a2d7 100644
> --- a/arch/x86/um/syscalls_64.c
> +++ b/arch/x86/um/syscalls_64.c
> @@ -12,11 +12,26 @@
> #include <asm/prctl.h> /* XXX This should get the constants from libc */
> #include <registers.h>
> #include <os.h>
> +#include <asm/thread_info.h>
> +#include <asm/mman.h>
> +
> +#ifndef CONFIG_MMU
> +/*
> + * The guest libc can change FS, which confuses the host libc.
> + * In fact, changing FS directly is not supported (check
> + * man arch_prctl). So, whenever we make a host syscall,
> + * we should be changing FS to the original FS (not the
> + * one set by the guest libc). This original FS is stored
> + * in host_fs.
> + */
> +long long host_fs = -1;
Right, the libc already uses it for its own thread-local storage. That
is a bit annoying, as UML doesn't need threading in that sense.
Note that similar handling needs to happen for every userspace to
kernel switch. I guess the only other location is the signal handler.
Benjamin
> +#endif
>
> long arch_prctl(struct task_struct *task, int option,
> unsigned long __user *arg2)
> {
> long ret = -EINVAL;
> +#ifdef CONFIG_MMU
>
> switch (option) {
> case ARCH_SET_FS:
> @@ -38,6 +53,48 @@ long arch_prctl(struct task_struct *task, int option,
> }
>
> return ret;
> +#else
> +
> + unsigned long *ptr = arg2, tmp;
> +
> + switch (option) {
> + case ARCH_SET_FS:
> + if (host_fs == -1)
> + os_arch_prctl(0, ARCH_GET_FS, (void *)&host_fs);
> + ret = 0;
> + break;
> + case ARCH_SET_GS:
> + ret = 0;
> + break;
> + case ARCH_GET_FS:
> + case ARCH_GET_GS:
> + ptr = &tmp;
> + break;
> + }
> +
> + ret = os_arch_prctl(0, option, ptr);
> + if (ret)
> + return ret;
> +
> + switch (option) {
> + case ARCH_SET_FS:
> + current->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)] =
> + (unsigned long) arg2;
> + break;
> + case ARCH_SET_GS:
> + current->thread.regs.regs.gp[GS_BASE / sizeof(unsigned long)] =
> + (unsigned long) arg2;
> + break;
> + case ARCH_GET_FS:
> + ret = put_user(current->thread.regs.regs.gp[FS_BASE / sizeof(unsigned long)], arg2);
> + break;
> + case ARCH_GET_GS:
> + ret = put_user(current->thread.regs.regs.gp[GS_BASE / sizeof(unsigned long)], arg2);
> + break;
> + }
> +
> + return ret;
> +#endif
> }
>
> SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
More information about the linux-um
mailing list