[PATCH] arm64: mm: don't acquire mutex when rewriting swapper

Mark Rutland mark.rutland at arm.com
Tue Sep 20 06:47:31 PDT 2022


Since commit:

  47546a1912fc4a03 ("arm64: mm: install KPTI nG mappings with MMU enabled)"

... when building with CONFIG_DEBUG_ATOMIC_SLEEP=y and booting under
QEMU TCG with '-cpu max', there's a boot-time splat:

| BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580
| in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 15, name: migration/0
| preempt_count: 1, expected: 0
| RCU nest depth: 0, expected: 0
| no locks held by migration/0/15.
| irq event stamp: 28
| hardirqs last  enabled at (27): [<ffff8000091ed180>] _raw_spin_unlock_irq+0x3c/0x7c
| hardirqs last disabled at (28): [<ffff8000081b8d74>] multi_cpu_stop+0x150/0x18c
| softirqs last  enabled at (0): [<ffff80000809a314>] copy_process+0x594/0x1964
| softirqs last disabled at (0): [<0000000000000000>] 0x0
| CPU: 0 PID: 15 Comm: migration/0 Not tainted 6.0.0-rc3-00002-g419b42ff7eef #3
| Hardware name: linux,dummy-virt (DT)
| Stopper: multi_cpu_stop+0x0/0x18c <- stop_cpus.constprop.0+0xa0/0xfc
| Call trace:
|  dump_backtrace.part.0+0xd0/0xe0
|  show_stack+0x1c/0x5c
|  dump_stack_lvl+0x88/0xb4
|  dump_stack+0x1c/0x38
|  __might_resched+0x180/0x230
|  __might_sleep+0x4c/0xa0
|  __mutex_lock+0x5c/0x450
|  mutex_lock_nested+0x30/0x40
|  create_kpti_ng_temp_pgd+0x4fc/0x6d0
|  kpti_install_ng_mappings+0x2b8/0x3b0
|  cpu_enable_non_boot_scope_capabilities+0x7c/0xd0
|  multi_cpu_stop+0xa0/0x18c
|  cpu_stopper_thread+0x88/0x11c
|  smpboot_thread_fn+0x1ec/0x290
|  kthread+0x118/0x120
|  ret_from_fork+0x10/0x20

Since commit:

  ee017ee353506fce ("arm64/mm: avoid fixmap race condition when create pud mapping")

... once the kernel leave the SYSTEM_BOOTING state, the fixmap pagetable
entries are protected by the fixmap_lock mutex.

The new KPTI rewrite code uses __create_pgd_mapping() to create a
temporary pagetable. This happens in atomic context, after secondary
CPUs are brought up and the kernel has left the SYSTEM_BOOTING state.
Hence we try to acquire a mutex in atomic context, which is generally
unsound (though benign in this case as the mutex should be free and all
other CPUs are quiescent).

This patch avoids the issue by pulling the mutex out of alloc_init_pud()
and calling it at a higher level in the pagetable manipulation code.
This allows it to be used without locking where one CPU is known to be
in exclusive control of the machine, even after having left the
SYSTEM_BOOTING state.

Fixes: 47546a1912fc4a03 ("arm64: mm: install KPTI nG mappings with MMU enabled)"
Signed-off-by: Mark Rutland <mark.rutland at arm.com>
Cc: Ard Biesheuvel <ardb at kernel.org>
Cc: Catalin Marinas <catalin.marinas at arm.com>
Cc: Will Deacon <will at kernel.org>
---
 arch/arm64/mm/mmu.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index e7ad44585f40a..eb489302c28a4 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -331,12 +331,6 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
 	}
 	BUG_ON(p4d_bad(p4d));
 
-	/*
-	 * No need for locking during early boot. And it doesn't work as
-	 * expected with KASLR enabled.
-	 */
-	if (system_state != SYSTEM_BOOTING)
-		mutex_lock(&fixmap_lock);
 	pudp = pud_set_fixmap_offset(p4dp, addr);
 	do {
 		pud_t old_pud = READ_ONCE(*pudp);
@@ -368,15 +362,13 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
 	} while (pudp++, addr = next, addr != end);
 
 	pud_clear_fixmap();
-	if (system_state != SYSTEM_BOOTING)
-		mutex_unlock(&fixmap_lock);
 }
 
-static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
-				 unsigned long virt, phys_addr_t size,
-				 pgprot_t prot,
-				 phys_addr_t (*pgtable_alloc)(int),
-				 int flags)
+static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys,
+					unsigned long virt, phys_addr_t size,
+					pgprot_t prot,
+					phys_addr_t (*pgtable_alloc)(int),
+					int flags)
 {
 	unsigned long addr, end, next;
 	pgd_t *pgdp = pgd_offset_pgd(pgdir, virt);
@@ -400,8 +392,20 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
 	} while (pgdp++, addr = next, addr != end);
 }
 
+static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
+				 unsigned long virt, phys_addr_t size,
+				 pgprot_t prot,
+				 phys_addr_t (*pgtable_alloc)(int),
+				 int flags)
+{
+	mutex_lock(&fixmap_lock);
+	__create_pgd_mapping_locked(pgdir, phys, virt, size, prot,
+				    pgtable_alloc, flags);
+	mutex_unlock(&fixmap_lock);
+}
+
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-extern __alias(__create_pgd_mapping)
+extern __alias(__create_pgd_mapping_locked)
 void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
 			     phys_addr_t size, pgprot_t prot,
 			     phys_addr_t (*pgtable_alloc)(int), int flags);
-- 
2.30.2




More information about the linux-arm-kernel mailing list