[PATCH v6] um: Enable preemption in UML

Anton Ivanov anton.ivanov at cambridgegreys.com
Fri Sep 22 03:56:53 PDT 2023



On 22/09/2023 11:56, anton.ivanov at cambridgegreys.com wrote:
> From: Anton Ivanov <anton.ivanov at cambridgegreys.com>
> 
> 1. Preemption requires saving/restoring FPU state. This patch
> adds support for it using GCC intrinsics as well as appropriate
> storage space in the thread structure.
> 
> 2. irq critical sections need preempt_disable()/preempt_enable().
> 
> 3. TLB critical sections need preempt_disable()/preempt_enable().

New in this patch - 4 and 5.

> 
> 4. UML TLB flush is also invoked during a fork. This happens
> with interrupts and preempt disabled which disagrees with the
> standard mm locking via rwsem. The mm lock for this code path
> had to be replaced with an rcu.
> 
> 5. The FPU state area is statically allocated depending on
> the enabled PREEMPT options. PREEMPT_DYNAMIC and chosing the
> preemption model at start time is disabled for the UM arch.
> 
> Signed-off-by: Anton Ivanov <anton.ivanov at cambridgegreys.com>
> ---
>   arch/um/Kconfig                         |  2 +-
>   arch/um/include/asm/fpu/api.h           |  9 ++-
>   arch/um/include/asm/processor-generic.h |  4 ++
>   arch/um/kernel/Makefile                 |  4 ++
>   arch/um/kernel/fpu.c                    | 75 +++++++++++++++++++++++++
>   arch/um/kernel/irq.c                    |  2 +
>   arch/um/kernel/tlb.c                    | 21 ++++++-
>   7 files changed, 111 insertions(+), 6 deletions(-)
>   create mode 100644 arch/um/kernel/fpu.c
> 
> diff --git a/arch/um/Kconfig b/arch/um/Kconfig
> index b5e179360534..19176fde82f3 100644
> --- a/arch/um/Kconfig
> +++ b/arch/um/Kconfig
> @@ -11,7 +11,7 @@ config UML
>   	select ARCH_HAS_KCOV
>   	select ARCH_HAS_STRNCPY_FROM_USER
>   	select ARCH_HAS_STRNLEN_USER
> -	select ARCH_NO_PREEMPT
> +	select ARCH_NO_PREEMPT_DYNAMIC
>   	select HAVE_ARCH_AUDITSYSCALL
>   	select HAVE_ARCH_KASAN if X86_64
>   	select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
> diff --git a/arch/um/include/asm/fpu/api.h b/arch/um/include/asm/fpu/api.h
> index 71bfd9ef3938..9e7680bf48f0 100644
> --- a/arch/um/include/asm/fpu/api.h
> +++ b/arch/um/include/asm/fpu/api.h
> @@ -4,12 +4,15 @@
>   
>   /* Copyright (c) 2020 Cambridge Greys Ltd
>    * Copyright (c) 2020 Red Hat Inc.
> - * A set of "dummy" defines to allow the direct inclusion
> - * of x86 optimized copy, xor, etc routines into the
> - * UML code tree. */
> + */
>   
> +#if defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY)
> +extern void kernel_fpu_begin(void);
> +extern void kernel_fpu_end(void);
> +#else
>   #define kernel_fpu_begin() (void)0
>   #define kernel_fpu_end() (void)0
> +#endif
>   
>   static inline bool irq_fpu_usable(void)
>   {
> diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
> index 7414154b8e9a..9970e70be1e4 100644
> --- a/arch/um/include/asm/processor-generic.h
> +++ b/arch/um/include/asm/processor-generic.h
> @@ -44,6 +44,10 @@ struct thread_struct {
>   			} cb;
>   		} u;
>   	} request;
> +#if defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY)
> +/* Intel docs require xsave/xrestore area to be aligned to 64 bytes */
> +	u8 fpu[2048] __aligned(64);
> +#endif
>   };
>   
>   #define INIT_THREAD \
> diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
> index 811188be954c..c616e884a488 100644
> --- a/arch/um/kernel/Makefile
> +++ b/arch/um/kernel/Makefile
> @@ -26,9 +26,13 @@ obj-$(CONFIG_OF) += dtb.o
>   obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
>   obj-$(CONFIG_STACKTRACE) += stacktrace.o
>   obj-$(CONFIG_GENERIC_PCI_IOMAP) += ioport.o
> +obj-$(CONFIG_PREEMPT) += fpu.o
> +obj-$(CONFIG_PREEMPT_VOLUNTARY) += fpu.o
>   
>   USER_OBJS := config.o
>   
> +CFLAGS_fpu.o += -mxsave -mxsaveopt
> +
>   include $(srctree)/arch/um/scripts/Makefile.rules
>   
>   targets := config.c config.tmp capflags.c
> diff --git a/arch/um/kernel/fpu.c b/arch/um/kernel/fpu.c
> new file mode 100644
> index 000000000000..4817276b2a26
> --- /dev/null
> +++ b/arch/um/kernel/fpu.c
> @@ -0,0 +1,75 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2023 Cambridge Greys Ltd
> + * Copyright (C) 2023 Red Hat Inc
> + */
> +
> +#include <linux/cpu.h>
> +#include <linux/init.h>
> +#include <asm/fpu/api.h>
> +#include <asm/cpufeature.h>
> +
> +/*
> + * The critical section between kernel_fpu_begin() and kernel_fpu_end()
> + * is non-reentrant. It is the caller's responsibility to avoid reentrance.
> + */
> +
> +static DEFINE_PER_CPU(bool, in_kernel_fpu);
> +
> +/* UML and driver code it pulls out of the x86 tree knows about 387 features
> + * up to and including AVX512. TILE, etc are not yet supported.
> + */
> +
> +#define KNOWN_387_FEATURES 0xFF
> +
> +void kernel_fpu_begin(void)
> +{
> +	preempt_disable();
> +
> +	WARN_ON(this_cpu_read(in_kernel_fpu));
> +
> +	this_cpu_write(in_kernel_fpu, true);
> +
> +#ifdef CONFIG_64BIT
> +	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
> +		__builtin_ia32_xsaveopt64(&current->thread.fpu, KNOWN_387_FEATURES);
> +	else {
> +		if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> +			__builtin_ia32_xsave64(&current->thread.fpu, KNOWN_387_FEATURES);
> +		else
> +			__builtin_ia32_fxsave64(&current->thread.fpu);
> +	}
> +#else
> +	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVEOPT)))
> +		__builtin_ia32_xsaveopt(&current->thread.fpu, KNOWN_387_FEATURES);
> +	else {
> +		if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> +			__builtin_ia32_xsave(&current->thread.fpu, KNOWN_387_FEATURES);
> +		else
> +			__builtin_ia32_fxsave(&current->thread.fpu);
> +	}
> +#endif
> +}
> +EXPORT_SYMBOL_GPL(kernel_fpu_begin);
> +
> +void kernel_fpu_end(void)
> +{
> +	WARN_ON(!this_cpu_read(in_kernel_fpu));
> +
> +#ifdef CONFIG_64BIT
> +	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> +		__builtin_ia32_xrstor64(&current->thread.fpu, KNOWN_387_FEATURES);
> +	else
> +		__builtin_ia32_fxrstor64(&current->thread.fpu);
> +#else
> +	if (likely(cpu_has(&boot_cpu_data, X86_FEATURE_XSAVE)))
> +		__builtin_ia32_xrstor(&current->thread.fpu, KNOWN_387_FEATURES);
> +	else
> +		__builtin_ia32_fxrstor(&current->thread.fpu);
> +#endif
> +	this_cpu_write(in_kernel_fpu, false);
> +
> +	preempt_enable();
> +}
> +EXPORT_SYMBOL_GPL(kernel_fpu_end);
> +
> diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
> index 635d44606bfe..c02525da45df 100644
> --- a/arch/um/kernel/irq.c
> +++ b/arch/um/kernel/irq.c
> @@ -195,7 +195,9 @@ static void _sigio_handler(struct uml_pt_regs *regs,
>   
>   void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
>   {
> +	preempt_disable();
>   	_sigio_handler(regs, irqs_suspended);
> +	preempt_enable();
>   }
>   
>   static struct irq_entry *get_irq_entry_by_fd(int fd)
> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> index 7d050ab0f78a..00b1870c2d62 100644
> --- a/arch/um/kernel/tlb.c
> +++ b/arch/um/kernel/tlb.c
> @@ -322,6 +322,8 @@ static void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
>   	unsigned long addr = start_addr, next;
>   	int ret = 0, userspace = 1;
>   
> +	preempt_disable();
> +
>   	hvc = INIT_HVC(mm, force, userspace);
>   	pgd = pgd_offset(mm, addr);
>   	do {
> @@ -346,6 +348,7 @@ static void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
>   		       "process: %d\n", task_tgid_vnr(current));
>   		mm_idp->kill = 1;
>   	}
> +	preempt_enable();
>   }
>   
>   static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
> @@ -362,6 +365,9 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
>   
>   	mm = &init_mm;
>   	hvc = INIT_HVC(mm, force, userspace);
> +
> +	preempt_disable();
> +
>   	for (addr = start; addr < end;) {
>   		pgd = pgd_offset(mm, addr);
>   		if (!pgd_present(*pgd)) {
> @@ -449,6 +455,9 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
>   
>   	if (err < 0)
>   		panic("flush_tlb_kernel failed, errno = %d\n", err);
> +
> +	preempt_enable();
> +
>   	return updated;
>   }
>   
> @@ -466,6 +475,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
>   
>   	address &= PAGE_MASK;
>   
> +	preempt_disable();
> +
>   	pgd = pgd_offset(mm, address);
>   	if (!pgd_present(*pgd))
>   		goto kill;
> @@ -520,6 +531,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
>   
>   	*pte = pte_mkuptodate(*pte);
>   
> +	preempt_enable();
>   	return;
>   
>   kill:
> @@ -597,8 +609,13 @@ void force_flush_all(void)
>   	struct vm_area_struct *vma;
>   	VMA_ITERATOR(vmi, mm, 0);
>   
> -	mmap_read_lock(mm);
> +	/* We use a RCU lock instead of a mm lock, because
> +	 * this can be invoked out of critical/atomic sections
> +	 * and that does not agree with the sleepable semantics
> +	 * of the standard semaphore based mm lock.
> +	 */
> +	rcu_read_lock();
>   	for_each_vma(vmi, vma)
>   		fix_range(mm, vma->vm_start, vma->vm_end, 1);
> -	mmap_read_unlock(mm);
> +	rcu_read_unlock();
>   }

-- 
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/



More information about the linux-um mailing list