[RFC PATCH 4/4] um: Add SMP support
Tiwei Bie
tiwei.bie at linux.dev
Sun Jul 13 10:25:36 PDT 2025
From: Tiwei Bie <tiwei.btw at antgroup.com>
This is currently a PoC patch and requires further improvements.
Sorry for the patch size. It will be split into smaller patches
in the future.
Signed-off-by: Tiwei Bie <tiwei.btw at antgroup.com>
---
arch/um/Kconfig | 28 ++-
arch/um/include/asm/Kbuild | 3 +
arch/um/include/asm/current.h | 5 +-
arch/um/include/asm/hardirq.h | 24 ++-
arch/um/include/asm/irqflags.h | 4 +-
arch/um/include/asm/mmu.h | 7 +
arch/um/include/asm/pgtable.h | 2 +
arch/um/include/asm/processor-generic.h | 6 +
arch/um/include/asm/smp.h | 31 +++-
arch/um/include/asm/spinlock.h | 8 +
arch/um/include/linux/smp-internal.h | 8 +
arch/um/include/linux/time-internal.h | 3 +
arch/um/include/shared/kern_util.h | 2 +
arch/um/include/shared/longjmp.h | 3 +-
arch/um/include/shared/os.h | 12 +-
arch/um/include/shared/smp.h | 14 ++
arch/um/kernel/Makefile | 1 +
arch/um/kernel/irq.c | 31 +++-
arch/um/kernel/ksyms.c | 2 +-
arch/um/kernel/mem.c | 2 +
arch/um/kernel/process.c | 19 +-
arch/um/kernel/skas/mmu.c | 16 +-
arch/um/kernel/smp.c | 223 ++++++++++++++++++++++++
arch/um/kernel/time.c | 48 +++--
arch/um/kernel/tlb.c | 5 +-
arch/um/kernel/trap.c | 2 +-
arch/um/kernel/um_arch.c | 60 ++++++-
arch/um/os-Linux/Makefile | 4 +-
arch/um/os-Linux/file.c | 72 ++++++--
arch/um/os-Linux/main.c | 5 +-
arch/um/os-Linux/process.c | 15 ++
arch/um/os-Linux/signal.c | 16 +-
arch/um/os-Linux/skas/process.c | 1 +
arch/um/os-Linux/smp.c | 44 +++++
arch/um/os-Linux/start_up.c | 3 +
arch/um/os-Linux/time.c | 29 +--
arch/um/os-Linux/user_syms.c | 5 +
37 files changed, 687 insertions(+), 76 deletions(-)
create mode 100644 arch/um/include/asm/spinlock.h
create mode 100644 arch/um/include/linux/smp-internal.h
create mode 100644 arch/um/include/shared/smp.h
create mode 100644 arch/um/kernel/smp.c
create mode 100644 arch/um/os-Linux/smp.c
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 9083bfdb7735..a3130156c9af 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -30,6 +30,7 @@ config UML
select HAVE_GCC_PLUGINS
select ARCH_SUPPORTS_LTO_CLANG
select ARCH_SUPPORTS_LTO_CLANG_THIN
+ select ARCH_USE_QUEUED_RWLOCKS
select TRACE_IRQFLAGS_SUPPORT
select TTY # Needed for line.c
select HAVE_ARCH_VMAP_STACK
@@ -79,10 +80,30 @@ config HZ
int
default 100
-config NR_CPUS
+config SMP
+ bool "Symmetric multi-processing support"
+ default n
+ help
+ This option enables UML SMP support.
+
+config NR_CPUS_RANGE_BEGIN
+ int
+ default 1 if !SMP
+ default 2
+
+config NR_CPUS_RANGE_END
int
- range 1 1
- default 1
+ default 256
+
+config NR_CPUS_DEFAULT
+ int
+ default 2 if SMP
+ default 1 if !SMP
+
+config NR_CPUS
+ int "Maximum number of CPUs" if SMP
+ range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
+ default NR_CPUS_DEFAULT
source "arch/$(HEADER_ARCH)/um/Kconfig"
@@ -258,6 +279,7 @@ source "arch/um/drivers/Kconfig"
config ARCH_SUSPEND_POSSIBLE
def_bool y
+ depends on !SMP
menu "Power management options"
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 04ab3b653a48..d8c436d6eb8c 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -19,8 +19,11 @@ generic-y += param.h
generic-y += parport.h
generic-y += percpu.h
generic-y += preempt.h
+generic-y += qrwlock.h
+generic-y += qrwlock_types.h
generic-y += runtime-const.h
generic-y += softirq_stack.h
+generic-y += spinlock_types.h
generic-y += switch_to.h
generic-y += topology.h
generic-y += trace_clock.h
diff --git a/arch/um/include/asm/current.h b/arch/um/include/asm/current.h
index de64e032d66c..7469ba5f2a42 100644
--- a/arch/um/include/asm/current.h
+++ b/arch/um/include/asm/current.h
@@ -7,15 +7,16 @@
#ifndef __ASSEMBLY__
+#include <asm/smp.h>
+
struct task_struct;
extern struct task_struct *cpu_tasks[NR_CPUS];
static __always_inline struct task_struct *get_current(void)
{
- return cpu_tasks[0];
+ return cpu_tasks[raw_smp_processor_id()];
}
-
#define current get_current()
#endif /* __ASSEMBLY__ */
diff --git a/arch/um/include/asm/hardirq.h b/arch/um/include/asm/hardirq.h
index 52e2c36267a9..cd6e4fc98436 100644
--- a/arch/um/include/asm/hardirq.h
+++ b/arch/um/include/asm/hardirq.h
@@ -2,8 +2,30 @@
#ifndef __ASM_UM_HARDIRQ_H
#define __ASM_UM_HARDIRQ_H
-#include <asm-generic/hardirq.h>
+#include <linux/cache.h>
+#include <linux/threads.h>
#define __ARCH_IRQ_EXIT_IRQS_DISABLED 1
+typedef struct {
+ unsigned int __softirq_pending;
+#if IS_ENABLED(CONFIG_SMP)
+ unsigned int irq_resched_count;
+ unsigned int irq_call_count;
+#endif
+} ____cacheline_aligned irq_cpustat_t;
+
+DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+
+#define __ARCH_IRQ_STAT
+
+#define inc_irq_stat(member) this_cpu_inc(irq_stat.member)
+
+#include <linux/irq.h>
+
+static inline void ack_bad_irq(unsigned int irq)
+{
+ printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
+}
+
#endif /* __ASM_UM_HARDIRQ_H */
diff --git a/arch/um/include/asm/irqflags.h b/arch/um/include/asm/irqflags.h
index 1e69ef5bc35e..31e49e0894c5 100644
--- a/arch/um/include/asm/irqflags.h
+++ b/arch/um/include/asm/irqflags.h
@@ -2,7 +2,7 @@
#ifndef __UM_IRQFLAGS_H
#define __UM_IRQFLAGS_H
-extern int signals_enabled;
+int um_get_signals(void);
int um_set_signals(int enable);
void block_signals(void);
void unblock_signals(void);
@@ -10,7 +10,7 @@ void unblock_signals(void);
#define arch_local_save_flags arch_local_save_flags
static inline unsigned long arch_local_save_flags(void)
{
- return signals_enabled;
+ return um_get_signals();
}
#define arch_local_irq_restore arch_local_irq_restore
diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h
index 4d0e4239f3cc..2f9fb9c788d2 100644
--- a/arch/um/include/asm/mmu.h
+++ b/arch/um/include/asm/mmu.h
@@ -7,6 +7,7 @@
#define __ARCH_UM_MMU_H
#include "linux/types.h"
+#include <linux/spinlock.h>
#include <mm_id.h>
typedef struct mm_context {
@@ -17,6 +18,12 @@ typedef struct mm_context {
/* Address range in need of a TLB sync */
unsigned long sync_tlb_range_from;
unsigned long sync_tlb_range_to;
+ spinlock_t sync_tlb_lock;
} mm_context_t;
+#define INIT_MM_CONTEXT(mm) \
+ .context = { \
+ .sync_tlb_lock = __SPIN_LOCK_INITIALIZER(mm.context.sync_tlb_lock), \
+ }
+
#endif
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 24fdea6f88c3..91aec3698475 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -225,6 +225,8 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval)
static inline void um_tlb_mark_sync(struct mm_struct *mm, unsigned long start,
unsigned long end)
{
+ guard(spinlock_irqsave)(&mm->context.sync_tlb_lock);
+
if (!mm->context.sync_tlb_range_to) {
mm->context.sync_tlb_range_from = start;
mm->context.sync_tlb_range_to = end;
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 236fdfd7cdbe..792761b9a02b 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -81,6 +81,12 @@ struct cpuinfo_um {
extern struct cpuinfo_um boot_cpu_data;
+#if IS_ENABLED(CONFIG_SMP)
+extern struct cpuinfo_um uml_cpu_data[];
+#else
+#define uml_cpu_data &boot_cpu_data
+#endif
+
#define cache_line_size() (boot_cpu_data.cache_alignment)
#define KSTK_REG(tsk, reg) get_thread_reg(reg, &tsk->thread.switch_buf)
diff --git a/arch/um/include/asm/smp.h b/arch/um/include/asm/smp.h
index a8cc1d46ddcb..585f2d59dfc7 100644
--- a/arch/um/include/asm/smp.h
+++ b/arch/um/include/asm/smp.h
@@ -2,6 +2,35 @@
#ifndef __UM_SMP_H
#define __UM_SMP_H
-#define hard_smp_processor_id() 0
+#if IS_ENABLED(CONFIG_SMP)
+
+#include <linux/bitops.h>
+#include <asm/current.h>
+#include <linux/cpumask.h>
+#include <shared/smp.h>
+
+#define raw_smp_processor_id raw_smp_processor_id
+static inline int raw_smp_processor_id(void)
+{
+ return uml_curr_cpu();
+}
+
+#define cpu_logical_map(n) (n)
+#define cpu_number_map(n) (n)
+#define NO_PROC_ID -1
+
+extern int uml_ncpus;
+
+void arch_smp_send_reschedule(int cpu);
+
+void arch_send_call_function_single_ipi(int cpu);
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+
+static inline void smp_cpus_done(unsigned int maxcpus) { }
+
+#else
+#define raw_smp_processor_id() 0
+#endif
#endif
diff --git a/arch/um/include/asm/spinlock.h b/arch/um/include/asm/spinlock.h
new file mode 100644
index 000000000000..f2258443c316
--- /dev/null
+++ b/arch/um/include/asm/spinlock.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_UM_SPINLOCK_H
+#define __ASM_UM_SPINLOCK_H
+
+#include <asm/processor.h>
+#include <asm-generic/spinlock.h>
+
+#endif /* __ASM_UM_SPINLOCK_H */
diff --git a/arch/um/include/linux/smp-internal.h b/arch/um/include/linux/smp-internal.h
new file mode 100644
index 000000000000..689c43c5105f
--- /dev/null
+++ b/arch/um/include/linux/smp-internal.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SMP_INTERNAL_H__
+#define __SMP_INTERNAL_H__
+
+int smp_sigio_handler(struct uml_pt_regs *regs);
+void IPI_handler(int cpu, struct uml_pt_regs *regs);
+
+#endif /* __SMP_INTERNAL_H__ */
diff --git a/arch/um/include/linux/time-internal.h b/arch/um/include/linux/time-internal.h
index 138908b999d7..286e75f0852a 100644
--- a/arch/um/include/linux/time-internal.h
+++ b/arch/um/include/linux/time-internal.h
@@ -90,4 +90,7 @@ extern unsigned long tt_extra_sched_jiffies;
* which is intentional since we really shouldn't link it in that case.
*/
void time_travel_ndelay(unsigned long nsec);
+
+void um_setup_timer(void);
+
#endif /* __TIMER_INTERNAL_H__ */
diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h
index 00ca3e12fd9a..894b127bf22f 100644
--- a/arch/um/include/shared/kern_util.h
+++ b/arch/um/include/shared/kern_util.h
@@ -12,8 +12,10 @@
struct siginfo;
extern int uml_exitcode;
+extern int uml_ncpus;
extern int kmalloc_ok;
+extern int disable_kmalloc[];
#define UML_ROUND_UP(addr) \
((((unsigned long) addr) + PAGE_SIZE - 1) & PAGE_MASK)
diff --git a/arch/um/include/shared/longjmp.h b/arch/um/include/shared/longjmp.h
index 8863319039f3..c53e43d980c8 100644
--- a/arch/um/include/shared/longjmp.h
+++ b/arch/um/include/shared/longjmp.h
@@ -5,7 +5,6 @@
#include <sysdep/archsetjmp.h>
#include <os.h>
-extern int signals_enabled;
extern int setjmp(jmp_buf);
extern void longjmp(jmp_buf, int);
@@ -15,7 +14,7 @@ extern void longjmp(jmp_buf, int);
#define UML_SETJMP(buf) ({ \
int n, enable; \
- enable = *(volatile int *)&signals_enabled; \
+ enable = um_get_signals(); \
n = setjmp(*buf); \
if(n != 0) \
um_set_signals_trace(enable); \
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index b35cc8ce333b..77ecd1104520 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -156,6 +156,7 @@ extern int os_pwrite_file(int fd, const void *buf, int count, unsigned long long
extern int os_file_modtime(const char *file, long long *modtime);
extern int os_pipe(int *fd, int stream, int close_on_exec);
extern int os_set_fd_async(int fd);
+extern int os_set_fd_async_thread(int fd);
extern int os_clear_fd_async(int fd);
extern int os_set_fd_block(int fd, int blocking);
extern int os_accept_connection(int fd);
@@ -203,6 +204,7 @@ extern void os_kill_process(int pid, int reap_child);
extern void os_kill_ptraced_process(int pid, int reap_child);
extern int os_getpid(void);
+extern int os_gettid(void);
extern void init_new_thread_signals(void);
@@ -216,6 +218,8 @@ extern int can_drop_memory(void);
void os_set_pdeathsig(void);
+int os_futex_wake(void *uaddr, unsigned int val);
+
/* execvp.c */
extern int execvp_noalloc(char *buf, const char *file, char *const argv[]);
/* helper.c */
@@ -243,6 +247,7 @@ extern void send_sigio_to_self(void);
extern int change_sig(int signal, int on);
extern void block_signals(void);
extern void unblock_signals(void);
+extern int um_get_signals(void);
extern int um_set_signals(int enable);
extern int um_set_signals_trace(int enable);
extern void deliver_alarm(void);
@@ -268,9 +273,9 @@ extern void os_warn(const char *fmt, ...)
/* time.c */
extern void os_idle_sleep(void);
extern int os_timer_create(void);
-extern int os_timer_set_interval(unsigned long long nsecs);
-extern int os_timer_one_shot(unsigned long long nsecs);
-extern void os_timer_disable(void);
+extern int os_timer_set_interval(int cpu, unsigned long long nsecs);
+extern int os_timer_one_shot(int cpu, unsigned long long nsecs);
+extern void os_timer_disable(int cpu);
extern long long os_persistent_clock_emulation(void);
extern long long os_nsecs(void);
@@ -291,6 +296,7 @@ extern void userspace(struct uml_pt_regs *regs);
extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void));
extern void switch_threads(jmp_buf *me, jmp_buf *you);
extern int start_idle_thread(void *stack, jmp_buf *switch_buf);
+extern void start_idle_thread_secondary(jmp_buf *switch_buf);
extern void initial_thread_cb_skas(void (*proc)(void *),
void *arg);
extern void halt_skas(void);
diff --git a/arch/um/include/shared/smp.h b/arch/um/include/shared/smp.h
new file mode 100644
index 000000000000..21544fad51db
--- /dev/null
+++ b/arch/um/include/shared/smp.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __UM_SHARED_SMP_H
+#define __UM_SHARED_SMP_H
+
+#if IS_ENABLED(CONFIG_SMP)
+int uml_curr_cpu(void);
+#else
+#define uml_curr_cpu() 0
+#endif
+
+int start_cpu_thread(int cpu);
+void start_idle(void);
+
+#endif /* __UM_SHARED_SMP_H */
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index b8f4e9281599..be60bc451b3f 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_GPROF) += gprof_syms.o
obj-$(CONFIG_OF) += dtb.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
+obj-$(CONFIG_SMP) += smp.o
USER_OBJS := config.o
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 0dfaf96bb7da..9c351f537811 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -20,8 +20,12 @@
#include <os.h>
#include <irq_user.h>
#include <irq_kern.h>
+#include <linux/smp-internal.h>
#include <linux/time-internal.h>
+DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+
+#define irq_stats(x) (&per_cpu(irq_stat, x))
/* When epoll triggers we do not know why it did so
* we can also have different IRQs for read and write.
@@ -205,6 +209,9 @@ static void _sigio_handler(struct uml_pt_regs *regs,
if (!irqs_suspended)
irq_do_pending_events(timetravel_handlers_only);
+ if (smp_sigio_handler(regs))
+ return;
+
while (1) {
/* This is now lockless - epoll keeps back-referencesto the irqs
* which have trigger it so there is no need to walk the irq
@@ -683,7 +690,7 @@ void __init init_IRQ(void)
{
int i;
- irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq);
+ irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_percpu_irq);
for (i = 1; i < UM_LAST_SIGNAL_IRQ; i++)
irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
@@ -696,3 +703,25 @@ void sigchld_handler(int sig, struct siginfo *unused_si,
{
do_IRQ(SIGCHLD_IRQ, regs);
}
+
+/*
+ * /proc/interrupts printing for arch specific interrupts
+ */
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+ int cpu;
+
+#if IS_ENABLED(CONFIG_SMP)
+ seq_printf(p, "%*s: ", prec, "RES");
+ for_each_online_cpu(cpu)
+ seq_printf(p, "%10u ", irq_stats(cpu)->irq_resched_count);
+ seq_puts(p, " Rescheduling interrupts\n");
+
+ seq_printf(p, "%*s: ", prec, "CAL");
+ for_each_online_cpu(cpu)
+ seq_printf(p, "%10u ", irq_stats(cpu)->irq_call_count);
+ seq_puts(p, " Function call interrupts\n");
+#endif
+
+ return 0;
+}
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index f2fb77da08cf..96314c31e61c 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -6,8 +6,8 @@
#include <linux/module.h>
#include <os.h>
+EXPORT_SYMBOL(um_get_signals);
EXPORT_SYMBOL(um_set_signals);
-EXPORT_SYMBOL(signals_enabled);
EXPORT_SYMBOL(os_stat_fd);
EXPORT_SYMBOL(os_stat_file);
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 76bec7de81b5..8e7742140e93 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -53,6 +53,8 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD];
/* Initialized at boot time, and readonly after that */
int kmalloc_ok = 0;
+int disable_kmalloc[NR_CPUS] = { 0 };
+
/* Used during early boot */
static unsigned long brk_end;
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 1be644de9e41..9caa3d56b7c7 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -35,6 +35,7 @@
#include <os.h>
#include <skas.h>
#include <registers.h>
+#include <linux/smp-internal.h>
#include <linux/time-internal.h>
#include <linux/elfcore.h>
@@ -185,11 +186,12 @@ int copy_thread(struct task_struct * p, const struct kernel_clone_args *args)
void initial_thread_cb(void (*proc)(void *), void *arg)
{
- int save_kmalloc_ok = kmalloc_ok;
+ int cpu = raw_smp_processor_id();
+ int save_kmalloc = disable_kmalloc[cpu];
- kmalloc_ok = 0;
+ disable_kmalloc[cpu] = 1;
initial_thread_cb_skas(proc, arg);
- kmalloc_ok = save_kmalloc_ok;
+ disable_kmalloc[cpu] = save_kmalloc;
}
int arch_dup_task_struct(struct task_struct *dst,
@@ -299,3 +301,14 @@ unsigned long __get_wchan(struct task_struct *p)
return 0;
}
+
+int smp_sigio_handler(struct uml_pt_regs *regs)
+{
+#if IS_ENABLED(CONFIG_SMP)
+ int cpu = raw_smp_processor_id();
+ IPI_handler(cpu, regs);
+ if (cpu != 0)
+ return 1;
+#endif
+ return 0;
+}
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index afe9a2f251ef..fbb4b1c39185 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -29,6 +29,8 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
unsigned long stack = 0;
int ret = -ENOMEM;
+ spin_lock_init(&mm->context.sync_tlb_lock);
+
stack = __get_free_pages(GFP_KERNEL | __GFP_ZERO, ilog2(STUB_DATA_PAGES));
if (stack == 0)
goto out;
@@ -73,6 +75,9 @@ void destroy_context(struct mm_struct *mm)
return;
}
+ scoped_guard(spinlock_irqsave, &mm_list_lock)
+ list_del(&mm->context.list);
+
if (mmu->id.pid > 0) {
os_kill_ptraced_process(mmu->id.pid, 1);
mmu->id.pid = -1;
@@ -82,10 +87,6 @@ void destroy_context(struct mm_struct *mm)
os_close_file(mmu->id.sock);
free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
-
- guard(spinlock_irqsave)(&mm_list_lock);
-
- list_del(&mm->context.list);
}
static irqreturn_t mm_sigchld_irq(int irq, void* dev)
@@ -110,12 +111,11 @@ static irqreturn_t mm_sigchld_irq(int irq, void* dev)
/* Marks the MM as dead */
mm_context->id.pid = -1;
- /*
- * NOTE: If SMP is implemented, a futex_wake
- * needs to be added here.
- */
stub_data = (void *)mm_context->id.stack;
stub_data->futex = FUTEX_IN_KERN;
+#if IS_ENABLED(CONFIG_SMP)
+ os_futex_wake(&stub_data->futex, 1);
+#endif
/*
* NOTE: Currently executing syscalls by
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
new file mode 100644
index 000000000000..c38af62d04a5
--- /dev/null
+++ b/arch/um/kernel/smp.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
+ * Copyright (C) 2025 Ant Group
+ */
+
+#include <linux/percpu.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/module.h>
+#include <linux/threads.h>
+#include <linux/interrupt.h>
+#include <linux/err.h>
+#include <linux/hardirq.h>
+#include <linux/cpu.h>
+#include <linux/smp-internal.h>
+#include <linux/time-internal.h>
+#include <asm/smp.h>
+#include <asm/processor.h>
+#include <asm/spinlock.h>
+#include <kern.h>
+#include <smp.h>
+#include <irq_user.h>
+#include <as-layout.h>
+#include <os.h>
+
+/*
+ * Per CPU bogomips and other parameters
+ * The only piece used here is the ipi pipe, which is set before SMP is
+ * started and never changed.
+ */
+struct cpuinfo_um uml_cpu_data[NR_CPUS];
+
+void arch_smp_send_reschedule(int cpu)
+{
+ os_write_file(uml_cpu_data[cpu].ipi_pipe[1], "R", 1);
+}
+
+void smp_send_stop(void)
+{
+ int i;
+
+ printk(KERN_INFO "Stopping all CPUs...");
+ for (i = 0; i < num_online_cpus(); i++) {
+ if (i == current_thread_info()->cpu)
+ continue;
+ os_write_file(uml_cpu_data[i].ipi_pipe[1], "S", 1);
+ }
+ printk(KERN_CONT "done\n");
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+ os_write_file(uml_cpu_data[cpu].ipi_pipe[1], "I", 1);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+ int cpu;
+
+ for_each_cpu(cpu, mask)
+ os_write_file(uml_cpu_data[cpu].ipi_pipe[1], "M", 1);
+}
+
+static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
+static cpumask_t cpu_callin_map = CPU_MASK_NONE;
+
+static int idle_proc(void *unused)
+{
+ int err, cpu = raw_smp_processor_id();
+
+ err = os_pipe(uml_cpu_data[cpu].ipi_pipe, 1, 1);
+ if (err < 0)
+ panic("CPU#%d failed to create IPI pipe, err = %d", cpu, -err);
+
+ os_set_fd_async_thread(uml_cpu_data[cpu].ipi_pipe[0]);
+
+ wmb();
+ if (cpumask_test_and_set_cpu(cpu, &cpu_callin_map)) {
+ printk(KERN_ERR "huh, CPU#%d already present??\n", cpu);
+ BUG();
+ }
+
+ while (!cpumask_test_cpu(cpu, &smp_commenced_mask))
+ cpu_relax();
+
+ notify_cpu_starting(cpu);
+ set_cpu_online(cpu, true);
+
+ um_setup_timer();
+
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+ return 0;
+}
+
+static struct task_struct *idle_thread[NR_CPUS];
+static char irqstack[NR_CPUS][THREAD_SIZE] __aligned(THREAD_SIZE);
+
+void start_idle(void)
+{
+ int cpu = raw_smp_processor_id();
+ struct mm_struct *mm = &init_mm;
+ struct task_struct *p = idle_thread[cpu];
+
+ p->thread_info.cpu = cpu;
+
+ stack_protections((unsigned long) &irqstack[cpu]);
+ set_sigstack(&irqstack[cpu], THREAD_SIZE);
+
+ mmgrab(mm);
+ p->active_mm = mm;
+
+ p->thread.request.thread.proc = idle_proc;
+ p->thread.request.thread.arg = NULL;
+
+ new_thread(task_stack_page(p), &p->thread.switch_buf, new_thread_handler);
+ start_idle_thread_secondary(&p->thread.switch_buf);
+}
+
+static struct task_struct *new_idle_thread(int cpu)
+{
+ struct task_struct *new_task;
+
+ new_task = fork_idle(cpu);
+ if (IS_ERR(new_task))
+ panic("%s: fork_idle failed, error = %ld", __func__,
+ PTR_ERR(new_task));
+
+ cpu_tasks[cpu] = new_task;
+ return new_task;
+}
+
+void __init smp_prepare_cpus(unsigned int maxcpus)
+{
+ unsigned long waittime;
+ int err, cpu, me = smp_processor_id();
+
+ set_cpu_online(me, true);
+ cpumask_set_cpu(me, &cpu_callin_map);
+
+ err = os_pipe(uml_cpu_data[me].ipi_pipe, 1, 1);
+ if (err < 0)
+ panic("CPU#0 failed to create IPI pipe, errno = %d", -err);
+
+ os_set_fd_async_thread(uml_cpu_data[me].ipi_pipe[0]);
+
+ for (cpu = 1; cpu < uml_ncpus; cpu++) {
+ printk(KERN_INFO "Booting processor %d...\n", cpu);
+
+ idle_thread[cpu] = new_idle_thread(cpu);
+ err = start_cpu_thread(cpu);
+ if (err < 0)
+ panic("CPU#%d failed to start cpu thread, errno = %d", cpu, -err);
+
+ waittime = 200000000;
+ while (waittime-- && !cpumask_test_cpu(cpu, &cpu_callin_map))
+ cpu_relax();
+
+ printk(KERN_INFO "%s\n",
+ cpumask_test_cpu(cpu, &cpu_callin_map) ? "done" : "failed");
+ set_cpu_present(cpu, true);
+ }
+}
+
+void smp_prepare_boot_cpu(void)
+{
+ set_cpu_online(smp_processor_id(), true);
+}
+
+int __cpu_up(unsigned int cpu, struct task_struct *tidle)
+{
+ cpumask_set_cpu(cpu, &smp_commenced_mask);
+ while (!cpu_online(cpu))
+ mb();
+ return 0;
+}
+
+void IPI_handler(int cpu, struct uml_pt_regs *regs)
+{
+ struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs);
+ unsigned char c;
+ int fd;
+
+ irq_enter();
+
+ fd = uml_cpu_data[cpu].ipi_pipe[0];
+ while (os_read_file(fd, &c, 1) == 1) {
+ switch (c) {
+ case 'R':
+ inc_irq_stat(irq_resched_count);
+ scheduler_ipi();
+ break;
+
+ case 'S':
+ printk(KERN_INFO "CPU#%d stopping\n", cpu);
+ while (1)
+ pause();
+ break;
+
+ case 'I':
+ inc_irq_stat(irq_call_count);
+ generic_smp_call_function_single_interrupt();
+ break;
+
+ case 'M':
+ inc_irq_stat(irq_call_count);
+ generic_smp_call_function_interrupt();
+ break;
+
+ default:
+ printk(KERN_ERR "CPU#%d received unknown IPI [%c]!\n",
+ cpu, c);
+ break;
+ }
+ }
+
+ irq_exit();
+ set_irq_regs(old_regs);
+}
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index ae0fa2173778..83b16d37ce33 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -625,9 +625,10 @@ void time_travel_sleep(void)
* controller application.
*/
unsigned long long next = S64_MAX;
+ int cpu = raw_smp_processor_id();
if (time_travel_mode == TT_MODE_BASIC)
- os_timer_disable();
+ os_timer_disable(cpu);
time_travel_update_time(next, true);
@@ -638,9 +639,9 @@ void time_travel_sleep(void)
* This is somewhat wrong - we should get the first
* one sooner like the os_timer_one_shot() below...
*/
- os_timer_set_interval(time_travel_timer_interval);
+ os_timer_set_interval(cpu, time_travel_timer_interval);
} else {
- os_timer_one_shot(time_travel_timer_event.time - next);
+ os_timer_one_shot(cpu, time_travel_timer_event.time - next);
}
}
}
@@ -758,6 +759,8 @@ extern u64 time_travel_ext_req(u32 op, u64 time);
#define time_travel_del_event(e) do { } while (0)
#endif
+static struct clock_event_device timer_clockevent[NR_CPUS];
+
void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
{
unsigned long flags;
@@ -780,12 +783,14 @@ void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
static int itimer_shutdown(struct clock_event_device *evt)
{
+ int cpu = evt - &timer_clockevent[0];
+
if (time_travel_mode != TT_MODE_OFF)
time_travel_del_event(&time_travel_timer_event);
if (time_travel_mode != TT_MODE_INFCPU &&
time_travel_mode != TT_MODE_EXTERNAL)
- os_timer_disable();
+ os_timer_disable(cpu);
return 0;
}
@@ -793,6 +798,7 @@ static int itimer_shutdown(struct clock_event_device *evt)
static int itimer_set_periodic(struct clock_event_device *evt)
{
unsigned long long interval = NSEC_PER_SEC / HZ;
+ int cpu = evt - &timer_clockevent[0];
if (time_travel_mode != TT_MODE_OFF) {
time_travel_del_event(&time_travel_timer_event);
@@ -805,7 +811,7 @@ static int itimer_set_periodic(struct clock_event_device *evt)
if (time_travel_mode != TT_MODE_INFCPU &&
time_travel_mode != TT_MODE_EXTERNAL)
- os_timer_set_interval(interval);
+ os_timer_set_interval(cpu, interval);
return 0;
}
@@ -825,7 +831,7 @@ static int itimer_next_event(unsigned long delta,
if (time_travel_mode != TT_MODE_INFCPU &&
time_travel_mode != TT_MODE_EXTERNAL)
- return os_timer_one_shot(delta);
+ return os_timer_one_shot(raw_smp_processor_id(), delta);
return 0;
}
@@ -835,10 +841,9 @@ static int itimer_one_shot(struct clock_event_device *evt)
return itimer_next_event(0, evt);
}
-static struct clock_event_device timer_clockevent = {
+static struct clock_event_device _timer_clockevent = {
.name = "posix-timer",
.rating = 250,
- .cpumask = cpu_possible_mask,
.features = CLOCK_EVT_FEAT_PERIODIC |
CLOCK_EVT_FEAT_ONESHOT,
.set_state_shutdown = itimer_shutdown,
@@ -856,6 +861,9 @@ static struct clock_event_device timer_clockevent = {
static irqreturn_t um_timer(int irq, void *dev)
{
+ int cpu = raw_smp_processor_id();
+ struct clock_event_device *evt = &timer_clockevent[cpu];
+
/*
* Interrupt the (possibly) running userspace process, technically this
* should only happen if userspace is currently executing.
@@ -867,7 +875,7 @@ static irqreturn_t um_timer(int irq, void *dev)
get_current()->mm)
os_alarm_process(get_current()->mm->context.id.pid);
- (*timer_clockevent.event_handler)(&timer_clockevent);
+ evt->event_handler(evt);
return IRQ_HANDLED;
}
@@ -904,8 +912,26 @@ static struct clocksource timer_clocksource = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
+void um_setup_timer(void)
+{
+ int cpu = raw_smp_processor_id();
+ struct clock_event_device *evt = &timer_clockevent[cpu];
+ int err;
+
+ err = os_timer_create();
+ if (err != 0) {
+ printk(KERN_ERR "creation of timer failed - errno = %d\n", -err);
+ return;
+ }
+ memcpy(evt, &_timer_clockevent, sizeof(*evt));
+ evt->cpumask = cpumask_of(cpu);
+ clockevents_register_device(evt);
+}
+
static void __init um_timer_setup(void)
{
+ int cpu = raw_smp_processor_id();
+ struct clock_event_device *evt = &timer_clockevent[cpu];
int err;
err = request_irq(TIMER_IRQ, um_timer, IRQF_TIMER, "hr timer", NULL);
@@ -924,7 +950,9 @@ static void __init um_timer_setup(void)
printk(KERN_ERR "clocksource_register_hz returned %d\n", err);
return;
}
- clockevents_register_device(&timer_clockevent);
+ memcpy(evt, &_timer_clockevent, sizeof(*evt));
+ evt->cpumask = cpumask_of(cpu);
+ clockevents_register_device(evt);
}
void read_persistent_clock64(struct timespec64 *ts)
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index cf7e0d4407f2..39608cccf2c6 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -162,9 +162,11 @@ int um_tlb_sync(struct mm_struct *mm)
{
pgd_t *pgd;
struct vm_ops ops;
- unsigned long addr = mm->context.sync_tlb_range_from, next;
+ unsigned long addr, next;
int ret = 0;
+ guard(spinlock_irqsave)(&mm->context.sync_tlb_lock);
+
if (mm->context.sync_tlb_range_to == 0)
return 0;
@@ -177,6 +179,7 @@ int um_tlb_sync(struct mm_struct *mm)
ops.unmap = unmap;
}
+ addr = mm->context.sync_tlb_range_from;
pgd = pgd_offset(mm, addr);
do {
next = pgd_addr_end(addr, mm->context.sync_tlb_range_to);
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 5b80a3a89c20..177615820a4c 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -316,7 +316,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user,
if (!is_user && regs)
current->thread.segv_regs = container_of(regs, struct pt_regs, regs);
- if (!is_user && init_mm.context.sync_tlb_range_to) {
+ if (!is_user && address >= start_vm && address < end_vm) {
/*
* Kernel has pending updates from set_ptes that were not
* flushed yet. Syncing them should fix the pagefault (if not
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 2f5ee045bc7a..d7fbf127021d 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -74,6 +74,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
{
int i = 0;
+#if IS_ENABLED(CONFIG_SMP)
+ i = (struct cpuinfo_um *) v - uml_cpu_data;
+ if (!cpu_online(i))
+ return 0;
+#endif
+
seq_printf(m, "processor\t: %d\n", i);
seq_printf(m, "vendor_id\t: User Mode Linux\n");
seq_printf(m, "model name\t: UML\n");
@@ -90,13 +96,12 @@ static int show_cpuinfo(struct seq_file *m, void *v)
loops_per_jiffy/(500000/HZ),
(loops_per_jiffy/(5000/HZ)) % 100);
-
return 0;
}
static void *c_start(struct seq_file *m, loff_t *pos)
{
- return *pos < nr_cpu_ids ? &boot_cpu_data + *pos : NULL;
+ return *pos < nr_cpu_ids ? uml_cpu_data + *pos : NULL;
}
static void *c_next(struct seq_file *m, void *v, loff_t *pos)
@@ -124,6 +129,9 @@ unsigned long uml_reserved; /* Also modified in mem_init */
unsigned long start_vm;
unsigned long end_vm;
+/* Set in uml_ncpus_setup */
+int uml_ncpus = 1;
+
/* Set in early boot */
static int have_root __initdata;
static int have_console __initdata;
@@ -176,6 +184,27 @@ __uml_setup("console=", uml_console_setup,
" Specify the preferred console output driver\n\n"
);
+#if IS_ENABLED(CONFIG_SMP)
+static int __init uml_ncpus_setup(char *line, int *add)
+{
+ *add = 0;
+
+ if (!sscanf(line, "%d", ¨_ncpus)) {
+ os_warn("Couldn't parse '%s'\n", line);
+ return -1;
+ }
+
+ uml_ncpus = min(uml_ncpus, NR_CPUS);
+
+ return 0;
+}
+
+__uml_setup("ncpus=", uml_ncpus_setup,
+"ncpus=<# of desired CPUs>\n"
+" This tells an SMP kernel how many virtual processors to start.\n\n"
+);
+#endif
+
static int __init Usage(char *line, int *add)
{
const char **p;
@@ -413,6 +442,20 @@ int __init __weak read_initrd(void)
return 0;
}
+#if IS_ENABLED(CONFIG_SMP)
+static void __init prefill_possible_map(void)
+{
+ int i;
+
+ for (i = 0; i < uml_ncpus; i++)
+ set_cpu_possible(i, true);
+ for (; i < NR_CPUS; i++)
+ set_cpu_possible(i, false);
+}
+#else
+static inline void prefill_possible_map(void) {}
+#endif
+
void __init setup_arch(char **cmdline_p)
{
u8 rng_seed[32];
@@ -426,6 +469,7 @@ void __init setup_arch(char **cmdline_p)
strscpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
setup_hostinfo(host_info, sizeof host_info);
+ prefill_possible_map();
if (os_getrandom(rng_seed, sizeof(rng_seed), 0) == sizeof(rng_seed)) {
add_bootloader_randomness(rng_seed, sizeof(rng_seed));
@@ -460,6 +504,18 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
}
+#if IS_ENABLED(CONFIG_SMP)
+void alternatives_smp_module_add(struct module *mod, char *name,
+ void *locks, void *locks_end,
+ void *text, void *text_end)
+{
+}
+
+void alternatives_smp_module_del(struct module *mod)
+{
+}
+#endif
+
void *text_poke(void *addr, const void *opcode, size_t len)
{
/*
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index fae836713487..70c73c22f715 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -16,8 +16,10 @@ CFLAGS_main.o += -Wno-frame-larger-than
obj-$(CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA) += elf_aux.o
+obj-$(CONFIG_SMP) += smp.o
+
USER_OBJS := $(user-objs-y) elf_aux.o execvp.o file.o helper.o irq.o \
main.o mem.o process.o registers.o sigio.o signal.o start_up.o time.o \
- tty.o umid.o util.o
+ tty.o umid.o util.o smp.o
include $(srctree)/arch/um/scripts/Makefile.rules
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 617886d1fb1e..1c050d9f1de6 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -78,7 +78,7 @@ int os_access(const char *file, int mode)
(mode & OS_ACC_X_OK ? X_OK : 0) |
(mode & OS_ACC_F_OK ? F_OK : 0);
- err = access(file, amode);
+ CATCH_EINTR(err = access(file, amode));
if (err < 0)
return -errno;
@@ -90,7 +90,7 @@ int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg)
{
int err;
- err = ioctl(fd, cmd, arg);
+ CATCH_EINTR(err = ioctl(fd, cmd, arg));
if (err < 0)
return -errno;
@@ -147,13 +147,13 @@ int os_file_mode(const char *file, struct openflags *mode_out)
*mode_out = OPENFLAGS();
- err = access(file, W_OK);
+ CATCH_EINTR(err = access(file, W_OK));
if (err && (errno != EACCES))
return -errno;
else if (!err)
*mode_out = of_write(*mode_out);
- err = access(file, R_OK);
+ CATCH_EINTR(err = access(file, R_OK));
if (err && (errno != EACCES))
return -errno;
else if (!err)
@@ -185,7 +185,7 @@ int os_open_file(const char *file, struct openflags flags, int mode)
if (flags.a)
f |= O_APPEND;
- fd = open64(file, f, mode);
+ CATCH_EINTR(fd = open64(file, f, mode));
if (fd < 0)
return -errno;
@@ -245,7 +245,7 @@ int os_seek_file(int fd, unsigned long long offset)
{
unsigned long long actual;
- actual = lseek64(fd, offset, SEEK_SET);
+ CATCH_EINTR(actual = lseek64(fd, offset, SEEK_SET));
if (actual != offset)
return -errno;
return 0;
@@ -253,8 +253,9 @@ int os_seek_file(int fd, unsigned long long offset)
int os_read_file(int fd, void *buf, int len)
{
- int n = read(fd, buf, len);
+ int n;
+ CATCH_EINTR(n = read(fd, buf, len));
if (n < 0)
return -errno;
return n;
@@ -262,8 +263,9 @@ int os_read_file(int fd, void *buf, int len)
int os_pread_file(int fd, void *buf, int len, unsigned long long offset)
{
- int n = pread(fd, buf, len, offset);
+ int n;
+ CATCH_EINTR(n = pread(fd, buf, len, offset));
if (n < 0)
return -errno;
return n;
@@ -271,8 +273,9 @@ int os_pread_file(int fd, void *buf, int len, unsigned long long offset)
int os_write_file(int fd, const void *buf, int len)
{
- int n = write(fd, (void *) buf, len);
+ int n;
+ CATCH_EINTR(n = write(fd, (void *) buf, len));
if (n < 0)
return -errno;
return n;
@@ -280,8 +283,9 @@ int os_write_file(int fd, const void *buf, int len)
int os_sync_file(int fd)
{
- int n = fdatasync(fd);
+ int n;
+ CATCH_EINTR(n = fdatasync(fd));
if (n < 0)
return -errno;
return n;
@@ -289,8 +293,9 @@ int os_sync_file(int fd)
int os_pwrite_file(int fd, const void *buf, int len, unsigned long long offset)
{
- int n = pwrite(fd, (void *) buf, len, offset);
+ int n;
+ CATCH_EINTR(n = pwrite(fd, (void *) buf, len, offset));
if (n < 0)
return -errno;
return n;
@@ -393,6 +398,41 @@ int os_pipe(int *fds, int stream, int close_on_exec)
int os_set_fd_async(int fd)
{
+ struct f_owner_ex owner = {
+ .type = F_OWNER_TID,
+ .pid = os_getpid(),
+ };
+ int err, flags;
+
+ flags = fcntl(fd, F_GETFL);
+ if (flags < 0)
+ return -errno;
+
+ flags |= O_ASYNC | O_NONBLOCK;
+ if (fcntl(fd, F_SETFL, flags) < 0) {
+ err = -errno;
+ printk(UM_KERN_ERR "%s: failed to set O_ASYNC and O_NONBLOCK on fd # %d, errno = %d\n",
+ __func__, fd, errno);
+ return err;
+ }
+
+ if ((fcntl(fd, F_SETSIG, SIGIO) < 0) ||
+ (fcntl(fd, F_SETOWN_EX, &owner) < 0)) {
+ err = -errno;
+ printk(UM_KERN_ERR "%s: Failed to fcntl F_SETOWN_EX (or F_SETSIG) fd %d, errno = %d\n",
+ __func__, fd, errno);
+ return err;
+ }
+
+ return 0;
+}
+
+int os_set_fd_async_thread(int fd)
+{
+ struct f_owner_ex owner = {
+ .type = F_OWNER_TID,
+ .pid = os_gettid(),
+ };
int err, flags;
flags = fcntl(fd, F_GETFL);
@@ -402,16 +442,16 @@ int os_set_fd_async(int fd)
flags |= O_ASYNC | O_NONBLOCK;
if (fcntl(fd, F_SETFL, flags) < 0) {
err = -errno;
- printk(UM_KERN_ERR "os_set_fd_async : failed to set O_ASYNC "
- "and O_NONBLOCK on fd # %d, errno = %d\n", fd, errno);
+ printk(UM_KERN_ERR "%s: failed to set O_ASYNC and O_NONBLOCK on fd # %d, errno = %d\n",
+ __func__, fd, errno);
return err;
}
if ((fcntl(fd, F_SETSIG, SIGIO) < 0) ||
- (fcntl(fd, F_SETOWN, os_getpid()) < 0)) {
+ (fcntl(fd, F_SETOWN_EX, &owner) < 0)) {
err = -errno;
- printk(UM_KERN_ERR "os_set_fd_async : Failed to fcntl F_SETOWN "
- "(or F_SETSIG) fd %d, errno = %d\n", fd, errno);
+ printk(UM_KERN_ERR "%s: Failed to fcntl F_SETOWN_EX (or F_SETSIG) fd %d, errno = %d\n",
+ __func__, fd, errno);
return err;
}
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 3c63ce19e3bf..92028c14d2a3 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -16,6 +16,7 @@
#include <init.h>
#include <kern_util.h>
#include <os.h>
+#include <smp.h>
#include <um_malloc.h>
#include "internal.h"
@@ -171,7 +172,7 @@ int __init main(int argc, char **argv, char **envp)
*/
/* stop timers and set timer signal to be ignored */
- os_timer_disable();
+ os_timer_disable(0);
/* disable SIGIO for the fds and set SIGIO to be ignored */
err = deactivate_all_fds();
@@ -207,7 +208,7 @@ void *__wrap_malloc(int size)
{
void *ret;
- if (!kmalloc_ok)
+ if (!kmalloc_ok || disable_kmalloc[uml_curr_cpu()])
return __real_malloc(size);
else if (size <= UM_KERN_PAGE_SIZE)
/* finding contiguous pages can be hard*/
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index 00b49e90d05f..3cae654cbaf7 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -10,6 +10,7 @@
#include <errno.h>
#include <signal.h>
#include <fcntl.h>
+#include <linux/futex.h>
#include <sys/mman.h>
#include <sys/ptrace.h>
#include <sys/prctl.h>
@@ -82,6 +83,11 @@ int os_getpid(void)
return syscall(__NR_getpid);
}
+int os_gettid(void)
+{
+ return syscall(__NR_gettid);
+}
+
int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len,
int r, int w, int x)
{
@@ -189,3 +195,12 @@ void os_set_pdeathsig(void)
{
prctl(PR_SET_PDEATHSIG, SIGKILL);
}
+
+int os_futex_wake(void *uaddr, unsigned int val)
+{
+ int r;
+
+ CATCH_EINTR(r = syscall(__NR_futex, uaddr, FUTEX_WAKE, val,
+ NULL, NULL, 0));
+ return r < 0 ? -errno : r;
+}
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 11f07f498270..5fa7909111d5 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -68,12 +68,12 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
#define SIGCHLD_BIT 2
#define SIGCHLD_MASK (1 << SIGCHLD_BIT)
-int signals_enabled;
+static __thread int signals_enabled;
#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
static int signals_blocked, signals_blocked_pending;
#endif
-static unsigned int signals_pending;
-static unsigned int signals_active = 0;
+static __thread unsigned int signals_pending;
+static __thread unsigned int signals_active;
static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
{
@@ -131,10 +131,9 @@ static void timer_real_alarm_handler(mcontext_t *mc)
static void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc)
{
- int enabled;
+ int enabled = signals_enabled;
- enabled = signals_enabled;
- if (!signals_enabled) {
+ if (!enabled) {
signals_pending |= SIGALRM_MASK;
return;
}
@@ -342,6 +341,11 @@ void unblock_signals(void)
}
}
+int um_get_signals(void)
+{
+ return signals_enabled;
+}
+
int um_set_signals(int enable)
{
int ret;
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 78f48fa9db8b..790b51328219 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -25,6 +25,7 @@
#include <os.h>
#include <ptrace_user.h>
#include <registers.h>
+#include <smp.h>
#include <skas.h>
#include <sysdep/stub.h>
#include <sysdep/mcontext.h>
diff --git a/arch/um/os-Linux/smp.c b/arch/um/os-Linux/smp.c
new file mode 100644
index 000000000000..4b75887f8537
--- /dev/null
+++ b/arch/um/os-Linux/smp.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2025 Ant Group
+ */
+
+#include <stdint.h>
+#include <errno.h>
+#include <pthread.h>
+#include <kern_util.h>
+#include <os.h>
+#include <smp.h>
+
+static __thread int __curr_cpu;
+
+int uml_curr_cpu(void)
+{
+ return __curr_cpu;
+}
+
+static pthread_t cpu_threads[CONFIG_NR_CPUS];
+
+static void *cpu_thread(void *cpup)
+{
+ __curr_cpu = (uintptr_t)cpup;
+ start_idle();
+ return NULL;
+}
+
+int start_cpu_thread(int cpu)
+{
+ if (pthread_create(&cpu_threads[cpu], NULL, cpu_thread,
+ (void *)(uintptr_t)cpu) != 0)
+ return -errno;
+ return 0;
+}
+
+void start_idle_thread_secondary(jmp_buf *switch_buf)
+{
+ longjmp(*switch_buf, 1);
+
+ /* unreachable */
+ printk(UM_KERN_ERR "impossible long jump!");
+ fatal_sigsegv();
+}
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index a827c2e01aa5..240fc3c2fb17 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -481,6 +481,9 @@ void __init os_early_checks(void)
fatal("SECCOMP userspace requested but not functional!\n");
}
+ if (uml_ncpus > 1)
+ fatal("SMP is not supported with PTRACE userspace.\n");
+
using_seccomp = 0;
check_ptrace();
diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c
index 4d5591d96d8c..bbe5cf82642d 100644
--- a/arch/um/os-Linux/time.c
+++ b/arch/um/os-Linux/time.c
@@ -14,9 +14,10 @@
#include <sys/time.h>
#include <kern_util.h>
#include <os.h>
+#include <smp.h>
#include <string.h>
-static timer_t event_high_res_timer = 0;
+static timer_t event_high_res_timer[CONFIG_NR_CPUS] = { 0 };
static inline long long timespec_to_ns(const struct timespec *ts)
{
@@ -36,15 +37,22 @@ long long os_persistent_clock_emulation(void)
*/
int os_timer_create(void)
{
- timer_t *t = &event_high_res_timer;
+ int cpu = uml_curr_cpu();
+ timer_t *t = &event_high_res_timer[cpu];
+ struct sigevent sigev = {
+ .sigev_notify = SIGEV_THREAD_ID,
+ .sigev_signo = SIGALRM,
+ .sigev_value.sival_ptr = t,
+ ._sigev_un._tid = os_gettid(),
+ };
- if (timer_create(CLOCK_MONOTONIC, NULL, t) == -1)
+ if (timer_create(CLOCK_MONOTONIC, &sigev, t) == -1)
return -1;
return 0;
}
-int os_timer_set_interval(unsigned long long nsecs)
+int os_timer_set_interval(int cpu, unsigned long long nsecs)
{
struct itimerspec its;
@@ -54,13 +62,13 @@ int os_timer_set_interval(unsigned long long nsecs)
its.it_interval.tv_sec = nsecs / UM_NSEC_PER_SEC;
its.it_interval.tv_nsec = nsecs % UM_NSEC_PER_SEC;
- if (timer_settime(event_high_res_timer, 0, &its, NULL) == -1)
+ if (timer_settime(event_high_res_timer[cpu], 0, &its, NULL) == -1)
return -errno;
return 0;
}
-int os_timer_one_shot(unsigned long long nsecs)
+int os_timer_one_shot(int cpu, unsigned long long nsecs)
{
struct itimerspec its = {
.it_value.tv_sec = nsecs / UM_NSEC_PER_SEC,
@@ -70,19 +78,19 @@ int os_timer_one_shot(unsigned long long nsecs)
.it_interval.tv_nsec = 0, // we cheat here
};
- timer_settime(event_high_res_timer, 0, &its, NULL);
+ timer_settime(event_high_res_timer[cpu], 0, &its, NULL);
return 0;
}
/**
* os_timer_disable() - disable the posix (interval) timer
*/
-void os_timer_disable(void)
+void os_timer_disable(int cpu)
{
struct itimerspec its;
memset(&its, 0, sizeof(struct itimerspec));
- timer_settime(event_high_res_timer, 0, &its, NULL);
+ timer_settime(event_high_res_timer[cpu], 0, &its, NULL);
}
long long os_nsecs(void)
@@ -100,6 +108,7 @@ void os_idle_sleep(void)
{
struct itimerspec its;
sigset_t set, old;
+ int cpu = uml_curr_cpu();
/* block SIGALRM while we analyze the timer state */
sigemptyset(&set);
@@ -107,7 +116,7 @@ void os_idle_sleep(void)
sigprocmask(SIG_BLOCK, &set, &old);
/* check the timer, and if it'll fire then wait for it */
- timer_gettime(event_high_res_timer, &its);
+ timer_gettime(event_high_res_timer[cpu], &its);
if (its.it_value.tv_sec || its.it_value.tv_nsec)
sigsuspend(&old);
/* either way, restore the signal mask */
diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c
index a310ae27b479..c22ab1e9e50b 100644
--- a/arch/um/os-Linux/user_syms.c
+++ b/arch/um/os-Linux/user_syms.c
@@ -41,3 +41,8 @@ EXPORT_SYMBOL(vsyscall_end);
extern int __sprintf_chk(char *str, int flag, size_t len, const char *format);
EXPORT_SYMBOL(__sprintf_chk);
#endif
+
+#if IS_ENABLED(CONFIG_SMP)
+int uml_curr_cpu(void);
+EXPORT_SYMBOL(uml_curr_cpu);
+#endif
--
2.34.1
More information about the linux-um
mailing list