[PATCH] arm64: mm: don't acquire mutex when rewriting swapper
Ard Biesheuvel
ardb at kernel.org
Tue Sep 20 07:25:33 PDT 2022
On Tue, 20 Sept 2022 at 15:47, Mark Rutland <mark.rutland at arm.com> wrote:
>
> Since commit:
>
> 47546a1912fc4a03 ("arm64: mm: install KPTI nG mappings with MMU enabled)"
>
> ... when building with CONFIG_DEBUG_ATOMIC_SLEEP=y and booting under
> QEMU TCG with '-cpu max', there's a boot-time splat:
>
> | BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580
> | in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 15, name: migration/0
> | preempt_count: 1, expected: 0
> | RCU nest depth: 0, expected: 0
> | no locks held by migration/0/15.
> | irq event stamp: 28
> | hardirqs last enabled at (27): [<ffff8000091ed180>] _raw_spin_unlock_irq+0x3c/0x7c
> | hardirqs last disabled at (28): [<ffff8000081b8d74>] multi_cpu_stop+0x150/0x18c
> | softirqs last enabled at (0): [<ffff80000809a314>] copy_process+0x594/0x1964
> | softirqs last disabled at (0): [<0000000000000000>] 0x0
> | CPU: 0 PID: 15 Comm: migration/0 Not tainted 6.0.0-rc3-00002-g419b42ff7eef #3
> | Hardware name: linux,dummy-virt (DT)
> | Stopper: multi_cpu_stop+0x0/0x18c <- stop_cpus.constprop.0+0xa0/0xfc
> | Call trace:
> | dump_backtrace.part.0+0xd0/0xe0
> | show_stack+0x1c/0x5c
> | dump_stack_lvl+0x88/0xb4
> | dump_stack+0x1c/0x38
> | __might_resched+0x180/0x230
> | __might_sleep+0x4c/0xa0
> | __mutex_lock+0x5c/0x450
> | mutex_lock_nested+0x30/0x40
> | create_kpti_ng_temp_pgd+0x4fc/0x6d0
> | kpti_install_ng_mappings+0x2b8/0x3b0
> | cpu_enable_non_boot_scope_capabilities+0x7c/0xd0
> | multi_cpu_stop+0xa0/0x18c
> | cpu_stopper_thread+0x88/0x11c
> | smpboot_thread_fn+0x1ec/0x290
> | kthread+0x118/0x120
> | ret_from_fork+0x10/0x20
>
> Since commit:
>
> ee017ee353506fce ("arm64/mm: avoid fixmap race condition when create pud mapping")
>
> ... once the kernel leave the SYSTEM_BOOTING state, the fixmap pagetable
> entries are protected by the fixmap_lock mutex.
>
> The new KPTI rewrite code uses __create_pgd_mapping() to create a
> temporary pagetable. This happens in atomic context, after secondary
> CPUs are brought up and the kernel has left the SYSTEM_BOOTING state.
> Hence we try to acquire a mutex in atomic context, which is generally
> unsound (though benign in this case as the mutex should be free and all
> other CPUs are quiescent).
>
> This patch avoids the issue by pulling the mutex out of alloc_init_pud()
> and calling it at a higher level in the pagetable manipulation code.
> This allows it to be used without locking where one CPU is known to be
> in exclusive control of the machine, even after having left the
> SYSTEM_BOOTING state.
>
> Fixes: 47546a1912fc4a03 ("arm64: mm: install KPTI nG mappings with MMU enabled)"
> Signed-off-by: Mark Rutland <mark.rutland at arm.com>
> Cc: Ard Biesheuvel <ardb at kernel.org>
> Cc: Catalin Marinas <catalin.marinas at arm.com>
> Cc: Will Deacon <will at kernel.org>
Reviewed-by: Ard Biesheuvel <ardb at kernel.org>
> ---
> arch/arm64/mm/mmu.c | 32 ++++++++++++++++++--------------
> 1 file changed, 18 insertions(+), 14 deletions(-)
>
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index e7ad44585f40a..eb489302c28a4 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -331,12 +331,6 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
> }
> BUG_ON(p4d_bad(p4d));
>
> - /*
> - * No need for locking during early boot. And it doesn't work as
> - * expected with KASLR enabled.
> - */
> - if (system_state != SYSTEM_BOOTING)
> - mutex_lock(&fixmap_lock);
> pudp = pud_set_fixmap_offset(p4dp, addr);
> do {
> pud_t old_pud = READ_ONCE(*pudp);
> @@ -368,15 +362,13 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
> } while (pudp++, addr = next, addr != end);
>
> pud_clear_fixmap();
> - if (system_state != SYSTEM_BOOTING)
> - mutex_unlock(&fixmap_lock);
> }
>
> -static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
> - unsigned long virt, phys_addr_t size,
> - pgprot_t prot,
> - phys_addr_t (*pgtable_alloc)(int),
> - int flags)
> +static void __create_pgd_mapping_locked(pgd_t *pgdir, phys_addr_t phys,
> + unsigned long virt, phys_addr_t size,
> + pgprot_t prot,
> + phys_addr_t (*pgtable_alloc)(int),
> + int flags)
> {
> unsigned long addr, end, next;
> pgd_t *pgdp = pgd_offset_pgd(pgdir, virt);
> @@ -400,8 +392,20 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
> } while (pgdp++, addr = next, addr != end);
> }
>
> +static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
> + unsigned long virt, phys_addr_t size,
> + pgprot_t prot,
> + phys_addr_t (*pgtable_alloc)(int),
> + int flags)
> +{
> + mutex_lock(&fixmap_lock);
> + __create_pgd_mapping_locked(pgdir, phys, virt, size, prot,
> + pgtable_alloc, flags);
> + mutex_unlock(&fixmap_lock);
> +}
> +
> #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
> -extern __alias(__create_pgd_mapping)
> +extern __alias(__create_pgd_mapping_locked)
> void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
> phys_addr_t size, pgprot_t prot,
> phys_addr_t (*pgtable_alloc)(int), int flags);
> --
> 2.30.2
>
More information about the linux-arm-kernel
mailing list