[PATCH] arm64/alternatives: use subsections for replacement sequences

Ard Biesheuvel ardb at kernel.org
Thu Jul 9 08:31:26 EDT 2020


On Thu, 9 Jul 2020 at 14:11, Alexandru Elisei <alexandru.elisei at arm.com> wrote:
>
> Hi,
>

Hi Alex,

Apologies for the breakage.


> I should post the entire boot log:
...
> [    0.002204] pc : work_pending+0x32c/0x348

This suggests that you ended executing directly from the alternatives
replacement section that gets appended to the end of work_pending (and
therefore gets mistaken for being part of it)

It appears that the following code in alternatives.c

static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
{
    unsigned long replptr;

    if (kernel_text_address(pc))
       return true;

returns true inadvertently for the branch in this piece of code in entry.S

alternative_if ARM64_HAS_IRQ_PRIO_MASKING
    ldr x20, [sp, #S_PMR_SAVE]
    msr_s SYS_ICC_PMR_EL1, x20
    mrs_s x21, SYS_ICC_CTLR_EL1
    tbz x21, #6, .L__skip_pmr_sync\@ // Check for ICC_CTLR_EL1.PMHE
    dsb sy // Ensure priority change is seen by redistributor
.L__skip_pmr_sync\@:


due to the fact that kernel_text_address() has no way of
distinguishing branches inside the subsection from branches that
require updating. So the alternatives patching code dutifully updates
the tbz opcode and points it to its original target in the subsection.

This is going to be rather tricky to fix, unless we special case
tbz/cbz branches and other branches with limited range that would
never have worked before anyway.

For now, better to just revert it and revisit it later.







> [    0.002210] lr : el1_irq+0xcc/0x180
> [    0.002214] sp : ffffd7c5dced3d90
> [    0.002220] pmr_save: 000000f0
> [    0.002224] x29: ffffd7c5dced3ec0 x28: ffffd7c5dcee2e00
> [    0.002231] x27: ffffd7c5dcee2e00 x26: ffff2842a2ce9000
> [    0.002242] x25: ffff800010000000 x24: 0000000000000001
> [    0.002244] x23: 3c2dd225ee77b65f x22: 2576d4c31e698712
> [    0.002254] x21: 0000000000000402 x20: 00000000000000f0
> [    0.002264] x19: ffffd7c5dced3d90 x18: 0000000000000020
> [    0.002274] x17: 0000000066f2d860 x16: 0000000000000001
> [    0.002274] x15: 0000000000000000 x14: 0000000000000000
> [    0.002284] x13: 00000000fa83b2da x12: 0000000000000000
> [    0.002295] x11: 0000000000000000 x10: 3c2dd225ee77b65f
> [    0.002306] x9 : 2576d4c31e698712 x8 : ffffd7c5dcee3bd8
> [    0.002314] x7 : 000000000000ba5e x6 : 00000000fffedb08
> [    0.002316] x5 : 00000000ffffffff x4 : ffff2842a2ce9000
> [    0.002327] x3 : 00000000000000f0 x2 : 00000000000000f0
> [    0.002338] x1 : ffff2842a2ce9000 x0 : 00000000000000f0
> [    0.002344] Call trace:
> [    0.002348]  work_pending+0x32c/0x348
> [    0.002354]  do_idle+0x230/0x30c
> [    0.002364]  cpu_startup_entry+0x24/0x80
> [    0.002370]  rest_init+0xd8/0xe8
> [    0.002380]  arch_call_rest_init+0x10/0x1c
> [    0.002384]  start_kernel+0x4b8/0x4f0
> [    0.002394] Code: d5033f9f d518d03f d503201f d503201f (a9440801)
> [    0.002404] ---[ end trace 08b34e49c88e0252 ]---
> [    0.002413] Kernel panic - not syncing: Attempted to kill the idle task!
> [    0.002414] SMP: stopping secondary CPUs
> [    0.002434] ---[ end Kernel panic - not syncing: Attempted to kill the idle
> task! ]---
>
> If it helps, I tried it with caches enabled and the logs were identical (checked
> with diff).
>
> Thanks,
>
> Alex
>
> On 7/9/20 11:57 AM, Alexandru Elisei wrote:
> > Hi,
> >
> > I think this patch breaks pseudo-NMIs. I bisected the following splat to this commit:
> >
> > [    0.002103] Unable to handle kernel NULL pointer dereference at virtual address
> > 0000000000000130
> > [    0.002124] Mem abort info:
> > [    0.002124]   ESR = 0x96000004
> > [    0.002135]   EC = 0x25: DABT (current EL), IL = 32 bits
> > [    0.002135]   SET = 0, FnV = 0
> > [    0.002146]   EA = 0, S1PTW = 0
> > [    0.002146] Data abort info:
> > [    0.002154]   ISV = 0, ISS = 0x00000004
> > [    0.002156]   CM = 0, WnR = 0
> > [    0.002164] [0000000000000130] user address but active_mm is swapper
> > [    0.002167] Internal error: Oops: 96000004 [#1] PREEMPT SMP
> > [    0.002174] Modules linked in:
> > [    0.002184] CPU: 0 PID: 0 Comm: swapper/0 Not tainted
> > 5.8.0-rc1-00024-gf7b93d42945c-dirty #135
> > [    0.002188] Hardware name: FVP Base (DT)
> > [    0.002194] pstate: 000003c9 (nzcv DAIF -PAN -UAO BTYPE=--)
> > [    0.002204] pc : work_pending+0x32c/0x348
> > [    0.002210] lr : el1_irq+0xcc/0x180
> > [    0.002214] sp : ffffd7c5dced3d90
> > [    0.002220] pmr_save: 000000f0
> > [    0.002224] x29: ffffd7c5dced3ec0 x28: ffffd7c5dcee2e00
> > [    0.002231] x27: ffffd7c5dcee2e00 x26: ffff2842a2ce9000
> > [    0.002242] x25: ffff800010000000 x24: 0000000000000001
> > [    0.002244] x23: 3c2dd225ee77b65f x22: 2576d4c31e698712
> > [    0.002254] x21: 0000000000000402 x20: 00000000000000f0
> > [    0.002264] x19: ffffd7c5dced3d90 x18: 0000000000000020
> > [    0.002274] x17: 0000000066f2d860 x16: 0000000000000001
> > [    0.002274] x15: 0000000000000000 x14: 0000000000000000
> > [    0.002284] x13: 00000000fa83b2da x12: 0000000000000000
> > [    0.002295] x11: 0000000000000000 x10: 3c2dd225ee77b65f
> > [    0.002306] x9 : 2576d4c31e698712 x8 : ffffd7c5dcee3bd8
> > [    0.002314] x7 : 000000000000ba5e x6 : 00000000fffedb08
> > [    0.002316] x5 : 00000000ffffffff x4 : ffff2842a2ce9000
> > [    0.002327] x3 : 00000000000000f0 x2 : 00000000000000f0
> > [    0.002338] x1 : ffff2842a2ce9000 x0 : 00000000000000f0
> > [    0.002344] Call trace:
> > [    0.002348]  work_pending+0x32c/0x348
> > [    0.002354]  do_idle+0x230/0x30c
> > [    0.002364]  cpu_startup_entry+0x24/0x80
> > [    0.002370]  rest_init+0xd8/0xe8
> > [    0.002380]  arch_call_rest_init+0x10/0x1c
> > [    0.002384]  start_kernel+0x4b8/0x4f0
> > [    0.002394] Code: d5033f9f d518d03f d503201f d503201f (a9440801)
> > [    0.002404] ---[ end trace 08b34e49c88e0252 ]---
> > [    0.002413] Kernel panic - not syncing: Attempted to kill the idle task!
> > [    0.002414] SMP: stopping secondary CPUs
> > [    0.002434] ---[ end Kernel panic - not syncing: Attempted to kill the idle
> > task! ]---
> >
> > The config I used is defconfig + pseudo-NMIs enabled (CONFIG_ARM64_PSEUDO_NMI=y
> > or, in menuconfig, Kernel features -> Support for NMI-like interrupts). The
> > compiler that I used:
> >
> > $ aarch64-linux-gnu-gcc --version
> > aarch64-linux-gnu-gcc (GCC) 10.1.0
> > Copyright (C) 2020 Free Software Foundation, Inc.
> > This is free software; see the source for copying conditions.  There is NO
> > warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
> >
> > The kernel was run on the model. Command line to launch the model:
> >
> > $MODEL --application cluster0.*=$PAYLOAD --config-file $CONFIG \
> >     -C bp.pl011_uart0.out_file=$LOGFILE
> >
> > Config file:
> >
> > bp.dram_size=8
> > bp.pl011_uart0.unbuffered_output=1
> > bp.pl011_uart0.untimed_fifos=true
> > bp.refcounter.non_arch_start_at_default=1
> > bp.secure_memory=0
> > bp.smsc_91c111.enabled=0
> > bp.ve_sysregs.mmbSiteDefault=0
> > cache_state_modelled=false
> > cluster0.NUM_CORES=4
> > cluster0.amu_has_external_interface=1
> > cluster0.cpu0.CONFIG64=1
> > cluster0.cpu0.crypto_sha3=1
> > cluster0.cpu0.crypto_sha512=1
> > cluster0.cpu0.crypto_sm3=1
> > cluster0.cpu0.crypto_sm4=1
> > cluster0.cpu0.enable_crc32=1
> > cluster0.has_16k_granule=1
> > cluster0.has_4k_granule=1
> > cluster0.has_64k_granule=1
> > cluster0.has_amu=1
> > cluster0.has_arm_v8-1=1
> > cluster0.has_arm_v8-2=1
> > cluster0.has_arm_v8-3=1
> > cluster0.has_arm_v8-4=1
> > cluster0.has_arm_v8-5=1
> > cluster0.has_branch_target_exception=1
> > cluster0.has_ccidx=1
> > cluster0.has_data_alignment_flag=1
> > cluster0.has_fp16=1
> > cluster0.has_large_system_ext=1
> > cluster0.has_large_va=1
> > cluster0.has_ldm_stm_ordering_control=1
> > cluster0.has_lrcpc=1
> > cluster0.has_mpam=1
> > cluster0.has_ras=1
> > cluster0.has_ras_double_fault=1
> > cluster0.has_rndr=1
> > cluster0.has_el3=1                                    # (bool  , init-time)
> > default = '1'      : Implements EL3
> > gic_distributor.GITS_BASER0-type=1
> > gic_distributor.ITS-count=1
> > gic_distributor.ITS-hardware-collection-count=1
> > gic_distributor.direct-lpi-support=1
> > pctl.startup=0.*.*.*
> >
> > I wrapped the kernel with bootwrapper, with this patch on top to clear SCR_EL3.FIQ
> > and make pseudo-NMIs work:
> >
> > diff --git a/arch/aarch64/boot.S b/arch/aarch64/boot.S
> > index 74705cded338..21e72f336057 100644
> > --- a/arch/aarch64/boot.S
> > +++ b/arch/aarch64/boot.S
> > @@ -51,6 +51,7 @@ _start:
> >  #ifndef KERNEL_32
> >         orr     x0, x0, #(1 << 10)              // 64-bit EL2
> >  #endif
> > +       orr     x0, x0, #(1 << 2)               // Route FIQs to EL3
> >         msr     scr_el3, x0
> >
> >         msr     cptr_el3, xzr                   // Disable copro. traps to EL3
> > diff --git a/arch/aarch64/include/asm/gic-v3.h b/arch/aarch64/include/asm/gic-v3.h
> > index e743c02ffe5e..ea7862745872 100644
> > --- a/arch/aarch64/include/asm/gic-v3.h
> > +++ b/arch/aarch64/include/asm/gic-v3.h
> > @@ -32,4 +32,9 @@ static inline void gic_write_icc_ctlr(uint32_t val)
> >         asm volatile ("msr " ICC_CTLR_EL3 ", %0" : : "r" (val));
> >  }
> >
> > +static inline void gic_write_icc_pmr(uint64_t val)
> > +{
> > +       asm volatile ("msr " ICC_PMR_EL1 ", %0" : : "r" (val));
> > +}
> > +
> >  #endif
> > diff --git a/gic-v3.c b/gic-v3.c
> > index c2f66bae4c35..5aeb51a1a7be 100644
> > --- a/gic-v3.c
> > +++ b/gic-v3.c
> > @@ -122,6 +122,9 @@ void gic_secure_init(void)
> >         gic_write_icc_sre(sre);
> >         isb();
> >
> > +       gic_write_icc_pmr(0xff);
> > +       isb();
> > +
> >         gic_write_icc_ctlr(0);
> >         isb();
> >  }
> >
> > Bootwrapper command line:
> >
> > ./configure \
> >     --host=aarch64-linux-gnu \
> >     --with-kernel-dir=${HOST_KERNEL_DIR} \
> >     --with-dtb=${HOST_KERNEL_DIR}/arch/arm64/boot/dts/arm/base_gicv3.dtb \
> >     --with-cmdline="earlycon=pl011,0x1c090000 console=ttyAMA0
> > irqchip.gicv3_pseudo_nmi=1" \
> >     --enable-gicv3 \
> >     --enable-psci \
> >     --with-initrd=${HOST_BUILDROOT_DIR}/output/images/rootfs.cpio.gz
> >
> > I was also able to reproduce it on my rockpro64, but I was using the pseudo-NMI
> > series that allows them to be used on the board [1] and a modified dtb to remove
> > the big cores. I can post the log + whatever else is need to reproduce it, if
> > anyone thinks that would help.
> >
> > [1] https://lists.infradead.org/pipermail/linux-arm-kernel/2020-June/580143.html
> >
> > Thanks,
> >
> > Alex
> >
> > On 6/30/20 9:19 AM, Ard Biesheuvel wrote:
> >> When building very large kernels, the logic that emits replacement
> >> sequences for alternatives fails when relative branches are present
> >> in the code that is emitted into the .altinstr_replacement section
> >> and patched in at the original site and fixed up. The reason is that
> >> the linker will insert veneers if relative branches go out of range,
> >> and due to the relative distance of the .altinstr_replacement from
> >> the .text section where its branch targets usually live, veneers
> >> may be emitted at the end of the .altinstr_replacement section, with
> >> the relative branches in the sequence pointed at the veneers instead
> >> of the actual target.
> >>
> >> The alternatives patching logic will attempt to fix up the branch to
> >> point to its original target, which will be the veneer in this case,
> >> but given that the patch site is likely to be far away as well, it
> >> will be out of range and so patching will fail. There are other cases
> >> where these veneers are problematic, e.g., when the target of the
> >> branch is in .text while the patch site is in .init.text, in which
> >> case putting the replacement sequence inside .text may not help either.
> >>
> >> So let's use subsections to emit the replacement code as closely as
> >> possible to the patch site, to ensure that veneers are only likely to
> >> be emitted if they are required at the patch site as well, in which
> >> case they will be in range for the replacement sequence both before
> >> and after it is transported to the patch site.
> >>
> >> This will prevent alternative sequences in non-init code from being
> >> released from memory after boot, but this is tolerable given that the
> >> entire section is only 512 KB on an allyesconfig build (which weighs in
> >> at 500+ MB for the entire Image). Also, note that modules today carry
> >> the replacement sequences in non-init sections as well, and any of
> >> those that target init code will be emitted into init sections after
> >> this change.
> >>
> >> This fixes an early crash when booting an allyesconfig kernel on a
> >> system where any of the alternatives sequences containing relative
> >> branches are activated at boot (e.g., ARM64_HAS_PAN on TX2)
> >>
> >> Cc: Suzuki K Poulose <suzuki.poulose at arm.com>
> >> Cc: James Morse <james.morse at arm.com>
> >> Cc: Andre Przywara <andre.przywara at arm.com>
> >> Cc: Dave P Martin <dave.martin at arm.com>
> >> Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
> >> ---
> >>  arch/arm64/include/asm/alternative.h | 16 ++++++++--------
> >>  arch/arm64/kernel/vmlinux.lds.S      |  3 ---
> >>  2 files changed, 8 insertions(+), 11 deletions(-)
> >>
> >> diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
> >> index 5e5dc05d63a0..12f0eb56a1cc 100644
> >> --- a/arch/arm64/include/asm/alternative.h
> >> +++ b/arch/arm64/include/asm/alternative.h
> >> @@ -73,11 +73,11 @@ static inline void apply_alternatives_module(void *start, size_t length) { }
> >>      ".pushsection .altinstructions,\"a\"\n"                         \
> >>      ALTINSTR_ENTRY(feature)                                         \
> >>      ".popsection\n"                                                 \
> >> -    ".pushsection .altinstr_replacement, \"a\"\n"                   \
> >> +    ".subsection 1\n"                                               \
> >>      "663:\n\t"                                                      \
> >>      newinstr "\n"                                                   \
> >>      "664:\n\t"                                                      \
> >> -    ".popsection\n\t"                                               \
> >> +    ".previous\n\t"                                                 \
> >>      ".org   . - (664b-663b) + (662b-661b)\n\t"                      \
> >>      ".org   . - (662b-661b) + (664b-663b)\n"                        \
> >>      ".endif\n"
> >> @@ -117,9 +117,9 @@ static inline void apply_alternatives_module(void *start, size_t length) { }
> >>  662:        .pushsection .altinstructions, "a"
> >>      altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f
> >>      .popsection
> >> -    .pushsection .altinstr_replacement, "ax"
> >> +    .subsection 1
> >>  663:        \insn2
> >> -664:        .popsection
> >> +664:        .previous
> >>      .org    . - (664b-663b) + (662b-661b)
> >>      .org    . - (662b-661b) + (664b-663b)
> >>      .endif
> >> @@ -160,7 +160,7 @@ static inline void apply_alternatives_module(void *start, size_t length) { }
> >>      .pushsection .altinstructions, "a"
> >>      altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f
> >>      .popsection
> >> -    .pushsection .altinstr_replacement, "ax"
> >> +    .subsection 1
> >>      .align 2        /* So GAS knows label 661 is suitably aligned */
> >>  661:
> >>  .endm
> >> @@ -179,9 +179,9 @@ static inline void apply_alternatives_module(void *start, size_t length) { }
> >>  .macro alternative_else
> >>  662:
> >>      .if .Lasm_alt_mode==0
> >> -    .pushsection .altinstr_replacement, "ax"
> >> +    .subsection 1
> >>      .else
> >> -    .popsection
> >> +    .previous
> >>      .endif
> >>  663:
> >>  .endm
> >> @@ -192,7 +192,7 @@ static inline void apply_alternatives_module(void *start, size_t length) { }
> >>  .macro alternative_endif
> >>  664:
> >>      .if .Lasm_alt_mode==0
> >> -    .popsection
> >> +    .previous
> >>      .endif
> >>      .org    . - (664b-663b) + (662b-661b)
> >>      .org    . - (662b-661b) + (664b-663b)
> >> diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> >> index 6827da7f3aa5..5423ffe0a987 100644
> >> --- a/arch/arm64/kernel/vmlinux.lds.S
> >> +++ b/arch/arm64/kernel/vmlinux.lds.S
> >> @@ -165,9 +165,6 @@ SECTIONS
> >>              *(.altinstructions)
> >>              __alt_instructions_end = .;
> >>      }
> >> -    .altinstr_replacement : {
> >> -            *(.altinstr_replacement)
> >> -    }
> >>
> >>      . = ALIGN(SEGMENT_ALIGN);
> >>      __inittext_end = .;
> > _______________________________________________
> > linux-arm-kernel mailing list
> > linux-arm-kernel at lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel



More information about the linux-arm-kernel mailing list