[PATCH] um: clean up mm creation

Anton Ivanov anton.ivanov at cambridgegreys.com
Fri Sep 22 06:41:58 PDT 2023


On 22/09/2023 12:16, Johannes Berg wrote:
> From: Johannes Berg <johannes.berg at intel.com>
>
> While enabling PREEMPT on UML, we found that the call to
> force_flush_all() cannot be done where it is, it sleeps
> while atomic.
>
> Further investigation shows that all this seems at least
> a bit roundabout and possibly wrong wrong in the first
> place - we copy from the 'current' process and then flush
> when it starts running.
>
> What we really want is to start fresh with an empty mm
> process, then have it all set up by the kernel (copying
> the original mm contents as needed), and then sync it
> in arch_dup_mmap().

Is there a way we can come up with COW here?

>
> We should do the same for the LDT, so need to split that
> to be able to do this.
>
> Note that this fixes what seems like an issue - we look
> at current->mm when we copy, but that doesn't seem right
> in the case of clone() without copying the MM. This is
> probably saved by the forced flush later right now.

We will need to work on this.

It is nearly twice slower than the current approach on a find /usr -type f -exec cat {} > /dev/null \;

>
> Signed-off-by: Johannes Berg <johannes.berg at intel.com>
> ---
>   arch/um/include/asm/Kbuild        |   1 -
>   arch/um/include/asm/mm_hooks.h    |  22 ++++++
>   arch/um/include/asm/mmu.h         |   3 +-
>   arch/um/include/asm/mmu_context.h |   2 +-
>   arch/um/include/shared/os.h       |   1 -
>   arch/um/kernel/process.c          |   3 -
>   arch/um/kernel/skas/mmu.c         |  35 ++++++----
>   arch/um/kernel/tlb.c              |   5 +-
>   arch/um/os-Linux/skas/process.c   | 107 ------------------------------
>   arch/x86/um/ldt.c                 |  53 +++++++--------
>   10 files changed, 76 insertions(+), 156 deletions(-)
>   create mode 100644 arch/um/include/asm/mm_hooks.h
>
> diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
> index b2d834a29f3a..de8d82a6fd7b 100644
> --- a/arch/um/include/asm/Kbuild
> +++ b/arch/um/include/asm/Kbuild
> @@ -26,5 +26,4 @@ generic-y += switch_to.h
>   generic-y += topology.h
>   generic-y += trace_clock.h
>   generic-y += kprobes.h
> -generic-y += mm_hooks.h
>   generic-y += vga.h
> diff --git a/arch/um/include/asm/mm_hooks.h b/arch/um/include/asm/mm_hooks.h
> new file mode 100644
> index 000000000000..b1016520c5b8
> --- /dev/null
> +++ b/arch/um/include/asm/mm_hooks.h
> @@ -0,0 +1,22 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_UM_MM_HOOKS_H
> +#define _ASM_UM_MM_HOOKS_H
> +
> +int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
> +
> +static inline void arch_exit_mmap(struct mm_struct *mm)
> +{
> +}
> +
> +static inline void arch_unmap(struct mm_struct *mm,
> +			unsigned long start, unsigned long end)
> +{
> +}
> +
> +static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
> +		bool write, bool execute, bool foreign)
> +{
> +	/* by default, allow everything */
> +	return true;
> +}
> +#endif	/* _ASM_UM_MM_HOOKS_H */
> diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h
> index 5b072aba5b65..68a710d23b5d 100644
> --- a/arch/um/include/asm/mmu.h
> +++ b/arch/um/include/asm/mmu.h
> @@ -18,7 +18,8 @@ typedef struct mm_context {
>   extern void __switch_mm(struct mm_id * mm_idp);
>   
>   /* Avoid tangled inclusion with asm/ldt.h */
> -extern long init_new_ldt(struct mm_context *to_mm, struct mm_context *from_mm);
> +extern int init_new_ldt(struct mm_context *to_mm);
> +extern int copy_ldt(struct mm_context *to_mm, struct mm_context *from_mm);
>   extern void free_ldt(struct mm_context *mm);
>   
>   #endif
> diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h
> index 68e2eb9cfb47..8668861d4a85 100644
> --- a/arch/um/include/asm/mmu_context.h
> +++ b/arch/um/include/asm/mmu_context.h
> @@ -13,7 +13,7 @@
>   #include <asm/mm_hooks.h>
>   #include <asm/mmu.h>
>   
> -extern void force_flush_all(void);
> +void force_flush_all(struct mm_struct *mm);
>   
>   #define activate_mm activate_mm
>   static inline void activate_mm(struct mm_struct *old, struct mm_struct *new)
> diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
> index 1a82c6548dd5..c9acc28fe47c 100644
> --- a/arch/um/include/shared/os.h
> +++ b/arch/um/include/shared/os.h
> @@ -289,7 +289,6 @@ extern int protect(struct mm_id * mm_idp, unsigned long addr,
>   /* skas/process.c */
>   extern int is_skas_winch(int pid, int fd, void *data);
>   extern int start_userspace(unsigned long stub_stack);
> -extern int copy_context_skas0(unsigned long stack, int pid);
>   extern void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs);
>   extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void));
>   extern void switch_threads(jmp_buf *me, jmp_buf *you);
> diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
> index 6daffb9d8a8d..a024acd6d85c 100644
> --- a/arch/um/kernel/process.c
> +++ b/arch/um/kernel/process.c
> @@ -25,7 +25,6 @@
>   #include <linux/threads.h>
>   #include <linux/resume_user_mode.h>
>   #include <asm/current.h>
> -#include <asm/mmu_context.h>
>   #include <linux/uaccess.h>
>   #include <as-layout.h>
>   #include <kern_util.h>
> @@ -139,8 +138,6 @@ void new_thread_handler(void)
>   /* Called magically, see new_thread_handler above */
>   void fork_handler(void)
>   {
> -	force_flush_all();
> -
>   	schedule_tail(current->thread.prev_sched);
>   
>   	/*
> diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
> index 656fe16c9b63..ac4ca203ac24 100644
> --- a/arch/um/kernel/skas/mmu.c
> +++ b/arch/um/kernel/skas/mmu.c
> @@ -10,13 +10,13 @@
>   
>   #include <asm/pgalloc.h>
>   #include <asm/sections.h>
> +#include <asm/mmu_context.h>
>   #include <as-layout.h>
>   #include <os.h>
>   #include <skas.h>
>   
>   int init_new_context(struct task_struct *task, struct mm_struct *mm)
>   {
> - 	struct mm_context *from_mm = NULL;
>   	struct mm_context *to_mm = &mm->context;
>   	unsigned long stack = 0;
>   	int ret = -ENOMEM;
> @@ -26,14 +26,13 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
>   		goto out;
>   
>   	to_mm->id.stack = stack;
> -	if (current->mm != NULL && current->mm != &init_mm)
> -		from_mm = &current->mm->context;
>   
> +	/*
> +	 * Allocate a completely fresh mm. We'll sync the mappings once
> +	 * the rest of the kernel is done, in arch_copy_mm().
> +	 */
>   	block_signals_trace();
> -	if (from_mm)
> -		to_mm->id.u.pid = copy_context_skas0(stack,
> -						     from_mm->id.u.pid);
> -	else to_mm->id.u.pid = start_userspace(stack);
> +	to_mm->id.u.pid = start_userspace(stack);
>   	unblock_signals_trace();
>   
>   	if (to_mm->id.u.pid < 0) {
> @@ -41,12 +40,9 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
>   		goto out_free;
>   	}
>   
> -	ret = init_new_ldt(to_mm, from_mm);
> -	if (ret < 0) {
> -		printk(KERN_ERR "init_new_context_skas - init_ldt"
> -		       " failed, errno = %d\n", ret);
> +	ret = init_new_ldt(to_mm);
> +	if (ret)
>   		goto out_free;
> -	}
>   
>   	return 0;
>   
> @@ -57,6 +53,21 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
>   	return ret;
>   }
>   
> +int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
> +{
> +	int ret = copy_ldt(&mm->context, &oldmm->context);
> +
> +	if (ret < 0) {
> +		printk(KERN_ERR "%s - copy_ldt failed, errno = %d\n",
> +		       __func__, ret);
> +		return ret;
> +	}
> +
> +	force_flush_all(mm);
> +	return 0;
> +}
> +
> +
>   void destroy_context(struct mm_struct *mm)
>   {
>   	struct mm_context *mmu = &mm->context;
> diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
> index 34ec8e677fb9..7c0161321fd9 100644
> --- a/arch/um/kernel/tlb.c
> +++ b/arch/um/kernel/tlb.c
> @@ -600,14 +600,11 @@ void flush_tlb_mm(struct mm_struct *mm)
>   		fix_range(mm, vma->vm_start, vma->vm_end, 0);
>   }
>   
> -void force_flush_all(void)
> +void force_flush_all(struct mm_struct *mm)
>   {
> -	struct mm_struct *mm = current->mm;
>   	struct vm_area_struct *vma;
>   	VMA_ITERATOR(vmi, mm, 0);
>   
> -	mmap_read_lock(mm);
>   	for_each_vma(vmi, vma)
>   		fix_range(mm, vma->vm_start, vma->vm_end, 1);
> -	mmap_read_unlock(mm);
>   }
> diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
> index f92129bbf981..403f4c6082b6 100644
> --- a/arch/um/os-Linux/skas/process.c
> +++ b/arch/um/os-Linux/skas/process.c
> @@ -508,113 +508,6 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs)
>   	}
>   }
>   
> -static unsigned long thread_regs[MAX_REG_NR];
> -static unsigned long thread_fp_regs[FP_SIZE];
> -
> -static int __init init_thread_regs(void)
> -{
> -	get_safe_registers(thread_regs, thread_fp_regs);
> -	/* Set parent's instruction pointer to start of clone-stub */
> -	thread_regs[REGS_IP_INDEX] = STUB_CODE +
> -				(unsigned long) stub_clone_handler -
> -				(unsigned long) __syscall_stub_start;
> -	thread_regs[REGS_SP_INDEX] = STUB_DATA + STUB_DATA_PAGES * UM_KERN_PAGE_SIZE -
> -		sizeof(void *);
> -#ifdef __SIGNAL_FRAMESIZE
> -	thread_regs[REGS_SP_INDEX] -= __SIGNAL_FRAMESIZE;
> -#endif
> -	return 0;
> -}
> -
> -__initcall(init_thread_regs);
> -
> -int copy_context_skas0(unsigned long new_stack, int pid)
> -{
> -	int err;
> -	unsigned long current_stack = current_stub_stack();
> -	struct stub_data *data = (struct stub_data *) current_stack;
> -	struct stub_data *child_data = (struct stub_data *) new_stack;
> -	unsigned long long new_offset;
> -	int new_fd = phys_mapping(uml_to_phys((void *)new_stack), &new_offset);
> -
> -	/*
> -	 * prepare offset and fd of child's stack as argument for parent's
> -	 * and child's mmap2 calls
> -	 */
> -	*data = ((struct stub_data) {
> -		.offset	= MMAP_OFFSET(new_offset),
> -		.fd     = new_fd,
> -		.parent_err = -ESRCH,
> -		.child_err = 0,
> -	});
> -
> -	*child_data = ((struct stub_data) {
> -		.child_err = -ESRCH,
> -	});
> -
> -	err = ptrace_setregs(pid, thread_regs);
> -	if (err < 0) {
> -		err = -errno;
> -		printk(UM_KERN_ERR "%s : PTRACE_SETREGS failed, pid = %d, errno = %d\n",
> -		      __func__, pid, -err);
> -		return err;
> -	}
> -
> -	err = put_fp_registers(pid, thread_fp_regs);
> -	if (err < 0) {
> -		printk(UM_KERN_ERR "%s : put_fp_registers failed, pid = %d, err = %d\n",
> -		       __func__, pid, err);
> -		return err;
> -	}
> -
> -	/*
> -	 * Wait, until parent has finished its work: read child's pid from
> -	 * parent's stack, and check, if bad result.
> -	 */
> -	err = ptrace(PTRACE_CONT, pid, 0, 0);
> -	if (err) {
> -		err = -errno;
> -		printk(UM_KERN_ERR "Failed to continue new process, pid = %d, errno = %d\n",
> -		       pid, errno);
> -		return err;
> -	}
> -
> -	wait_stub_done(pid);
> -
> -	pid = data->parent_err;
> -	if (pid < 0) {
> -		printk(UM_KERN_ERR "%s - stub-parent reports error %d\n",
> -		      __func__, -pid);
> -		return pid;
> -	}
> -
> -	/*
> -	 * Wait, until child has finished too: read child's result from
> -	 * child's stack and check it.
> -	 */
> -	wait_stub_done(pid);
> -	if (child_data->child_err != STUB_DATA) {
> -		printk(UM_KERN_ERR "%s - stub-child %d reports error %ld\n",
> -		       __func__, pid, data->child_err);
> -		err = data->child_err;
> -		goto out_kill;
> -	}
> -
> -	if (ptrace(PTRACE_OLDSETOPTIONS, pid, NULL,
> -		   (void *)PTRACE_O_TRACESYSGOOD) < 0) {
> -		err = -errno;
> -		printk(UM_KERN_ERR "%s : PTRACE_OLDSETOPTIONS failed, errno = %d\n",
> -		       __func__, errno);
> -		goto out_kill;
> -	}
> -
> -	return pid;
> -
> - out_kill:
> -	os_kill_ptraced_process(pid, 1);
> -	return err;
> -}
> -
>   void new_thread(void *stack, jmp_buf *buf, void (*handler)(void))
>   {
>   	(*buf)[0].JB_IP = (unsigned long) handler;
> diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
> index 255a44dd415a..609feaeff23b 100644
> --- a/arch/x86/um/ldt.c
> +++ b/arch/x86/um/ldt.c
> @@ -297,36 +297,37 @@ static void ldt_get_host_info(void)
>   	free_pages((unsigned long)ldt, order);
>   }
>   
> -long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm)
> +int init_new_ldt(struct mm_context *new_mm)
>   {
> -	struct user_desc desc;
> +	struct user_desc desc = {};
>   	short * num_p;
> -	int i;
> -	long page, err=0;
> +	int err = 0;
>   	void *addr = NULL;
>   
> -
>   	mutex_init(&new_mm->arch.ldt.lock);
>   
> -	if (!from_mm) {
> -		memset(&desc, 0, sizeof(desc));
> -		/*
> -		 * Now we try to retrieve info about the ldt, we
> -		 * inherited from the host. All ldt-entries found
> -		 * will be reset in the following loop
> -		 */
> -		ldt_get_host_info();
> -		for (num_p=host_ldt_entries; *num_p != -1; num_p++) {
> -			desc.entry_number = *num_p;
> -			err = write_ldt_entry(&new_mm->id, 1, &desc,
> -					      &addr, *(num_p + 1) == -1);
> -			if (err)
> -				break;
> -		}
> -		new_mm->arch.ldt.entry_count = 0;
> -
> -		goto out;
> +	memset(&desc, 0, sizeof(desc));
> +	/*
> +	 * Now we try to retrieve info about the ldt, we
> +	 * inherited from the host. All ldt-entries found
> +	 * will be reset in the following loop
> +	 */
> +	ldt_get_host_info();
> +	for (num_p=host_ldt_entries; *num_p != -1; num_p++) {
> +		desc.entry_number = *num_p;
> +		err = write_ldt_entry(&new_mm->id, 1, &desc,
> +				      &addr, *(num_p + 1) == -1);
> +		if (err)
> +			break;
>   	}
> +	new_mm->arch.ldt.entry_count = 0;
> +
> +	return err;
> +}
> +
> +int copy_ldt(struct mm_context *new_mm, struct mm_context *from_mm)
> +{
> +	int err = 0;
>   
>   	/*
>   	 * Our local LDT is used to supply the data for
> @@ -339,7 +340,9 @@ long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm)
>   		memcpy(new_mm->arch.ldt.u.entries, from_mm->arch.ldt.u.entries,
>   		       sizeof(new_mm->arch.ldt.u.entries));
>   	else {
> -		i = from_mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
> +		int i = from_mm->arch.ldt.entry_count / LDT_ENTRIES_PER_PAGE;
> +		unsigned long page;
> +
>   		while (i-->0) {
>   			page = __get_free_page(GFP_KERNEL|__GFP_ZERO);
>   			if (!page) {
> @@ -355,11 +358,9 @@ long init_new_ldt(struct mm_context *new_mm, struct mm_context *from_mm)
>   	new_mm->arch.ldt.entry_count = from_mm->arch.ldt.entry_count;
>   	mutex_unlock(&from_mm->arch.ldt.lock);
>   
> -    out:
>   	return err;
>   }
>   
> -
>   void free_ldt(struct mm_context *mm)
>   {
>   	int i;

-- 
Anton R. Ivanov
Cambridgegreys Limited. Registered in England. Company Number 10273661
https://www.cambridgegreys.com/




More information about the linux-um mailing list