[PATCH] arm64/alternatives: use subsections for replacement sequences

Alexandru Elisei alexandru.elisei at arm.com
Thu Jul 9 06:57:29 EDT 2020


Hi,

I think this patch breaks pseudo-NMIs. I bisected the following splat to this commit:

[    0.002103] Unable to handle kernel NULL pointer dereference at virtual address
0000000000000130
[    0.002124] Mem abort info:
[    0.002124]   ESR = 0x96000004
[    0.002135]   EC = 0x25: DABT (current EL), IL = 32 bits
[    0.002135]   SET = 0, FnV = 0
[    0.002146]   EA = 0, S1PTW = 0
[    0.002146] Data abort info:
[    0.002154]   ISV = 0, ISS = 0x00000004
[    0.002156]   CM = 0, WnR = 0
[    0.002164] [0000000000000130] user address but active_mm is swapper
[    0.002167] Internal error: Oops: 96000004 [#1] PREEMPT SMP
[    0.002174] Modules linked in:
[    0.002184] CPU: 0 PID: 0 Comm: swapper/0 Not tainted
5.8.0-rc1-00024-gf7b93d42945c-dirty #135
[    0.002188] Hardware name: FVP Base (DT)
[    0.002194] pstate: 000003c9 (nzcv DAIF -PAN -UAO BTYPE=--)
[    0.002204] pc : work_pending+0x32c/0x348
[    0.002210] lr : el1_irq+0xcc/0x180
[    0.002214] sp : ffffd7c5dced3d90
[    0.002220] pmr_save: 000000f0
[    0.002224] x29: ffffd7c5dced3ec0 x28: ffffd7c5dcee2e00
[    0.002231] x27: ffffd7c5dcee2e00 x26: ffff2842a2ce9000
[    0.002242] x25: ffff800010000000 x24: 0000000000000001
[    0.002244] x23: 3c2dd225ee77b65f x22: 2576d4c31e698712
[    0.002254] x21: 0000000000000402 x20: 00000000000000f0
[    0.002264] x19: ffffd7c5dced3d90 x18: 0000000000000020
[    0.002274] x17: 0000000066f2d860 x16: 0000000000000001
[    0.002274] x15: 0000000000000000 x14: 0000000000000000
[    0.002284] x13: 00000000fa83b2da x12: 0000000000000000
[    0.002295] x11: 0000000000000000 x10: 3c2dd225ee77b65f
[    0.002306] x9 : 2576d4c31e698712 x8 : ffffd7c5dcee3bd8
[    0.002314] x7 : 000000000000ba5e x6 : 00000000fffedb08
[    0.002316] x5 : 00000000ffffffff x4 : ffff2842a2ce9000
[    0.002327] x3 : 00000000000000f0 x2 : 00000000000000f0
[    0.002338] x1 : ffff2842a2ce9000 x0 : 00000000000000f0
[    0.002344] Call trace:
[    0.002348]  work_pending+0x32c/0x348
[    0.002354]  do_idle+0x230/0x30c
[    0.002364]  cpu_startup_entry+0x24/0x80
[    0.002370]  rest_init+0xd8/0xe8
[    0.002380]  arch_call_rest_init+0x10/0x1c
[    0.002384]  start_kernel+0x4b8/0x4f0
[    0.002394] Code: d5033f9f d518d03f d503201f d503201f (a9440801)
[    0.002404] ---[ end trace 08b34e49c88e0252 ]---
[    0.002413] Kernel panic - not syncing: Attempted to kill the idle task!
[    0.002414] SMP: stopping secondary CPUs
[    0.002434] ---[ end Kernel panic - not syncing: Attempted to kill the idle
task! ]---

The config I used is defconfig + pseudo-NMIs enabled (CONFIG_ARM64_PSEUDO_NMI=y
or, in menuconfig, Kernel features -> Support for NMI-like interrupts). The
compiler that I used:

$ aarch64-linux-gnu-gcc --version
aarch64-linux-gnu-gcc (GCC) 10.1.0
Copyright (C) 2020 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

The kernel was run on the model. Command line to launch the model:

$MODEL --application cluster0.*=$PAYLOAD --config-file $CONFIG \
    -C bp.pl011_uart0.out_file=$LOGFILE

Config file:

bp.dram_size=8
bp.pl011_uart0.unbuffered_output=1
bp.pl011_uart0.untimed_fifos=true
bp.refcounter.non_arch_start_at_default=1
bp.secure_memory=0
bp.smsc_91c111.enabled=0
bp.ve_sysregs.mmbSiteDefault=0
cache_state_modelled=false
cluster0.NUM_CORES=4
cluster0.amu_has_external_interface=1
cluster0.cpu0.CONFIG64=1
cluster0.cpu0.crypto_sha3=1
cluster0.cpu0.crypto_sha512=1
cluster0.cpu0.crypto_sm3=1
cluster0.cpu0.crypto_sm4=1
cluster0.cpu0.enable_crc32=1
cluster0.has_16k_granule=1
cluster0.has_4k_granule=1
cluster0.has_64k_granule=1
cluster0.has_amu=1
cluster0.has_arm_v8-1=1
cluster0.has_arm_v8-2=1
cluster0.has_arm_v8-3=1
cluster0.has_arm_v8-4=1
cluster0.has_arm_v8-5=1
cluster0.has_branch_target_exception=1
cluster0.has_ccidx=1
cluster0.has_data_alignment_flag=1
cluster0.has_fp16=1
cluster0.has_large_system_ext=1
cluster0.has_large_va=1
cluster0.has_ldm_stm_ordering_control=1
cluster0.has_lrcpc=1
cluster0.has_mpam=1
cluster0.has_ras=1
cluster0.has_ras_double_fault=1
cluster0.has_rndr=1
cluster0.has_el3=1                                    # (bool  , init-time)
default = '1'      : Implements EL3
gic_distributor.GITS_BASER0-type=1
gic_distributor.ITS-count=1
gic_distributor.ITS-hardware-collection-count=1
gic_distributor.direct-lpi-support=1
pctl.startup=0.*.*.*

I wrapped the kernel with bootwrapper, with this patch on top to clear SCR_EL3.FIQ
and make pseudo-NMIs work:

diff --git a/arch/aarch64/boot.S b/arch/aarch64/boot.S
index 74705cded338..21e72f336057 100644
--- a/arch/aarch64/boot.S
+++ b/arch/aarch64/boot.S
@@ -51,6 +51,7 @@ _start:
 #ifndef KERNEL_32
        orr     x0, x0, #(1 << 10)              // 64-bit EL2
 #endif
+       orr     x0, x0, #(1 << 2)               // Route FIQs to EL3
        msr     scr_el3, x0
 
        msr     cptr_el3, xzr                   // Disable copro. traps to EL3
diff --git a/arch/aarch64/include/asm/gic-v3.h b/arch/aarch64/include/asm/gic-v3.h
index e743c02ffe5e..ea7862745872 100644
--- a/arch/aarch64/include/asm/gic-v3.h
+++ b/arch/aarch64/include/asm/gic-v3.h
@@ -32,4 +32,9 @@ static inline void gic_write_icc_ctlr(uint32_t val)
        asm volatile ("msr " ICC_CTLR_EL3 ", %0" : : "r" (val));
 }
 
+static inline void gic_write_icc_pmr(uint64_t val)
+{
+       asm volatile ("msr " ICC_PMR_EL1 ", %0" : : "r" (val));
+}
+
 #endif
diff --git a/gic-v3.c b/gic-v3.c
index c2f66bae4c35..5aeb51a1a7be 100644
--- a/gic-v3.c
+++ b/gic-v3.c
@@ -122,6 +122,9 @@ void gic_secure_init(void)
        gic_write_icc_sre(sre);
        isb();
 
+       gic_write_icc_pmr(0xff);
+       isb();
+
        gic_write_icc_ctlr(0);
        isb();
 }

Bootwrapper command line:

./configure \
    --host=aarch64-linux-gnu \
    --with-kernel-dir=${HOST_KERNEL_DIR} \
    --with-dtb=${HOST_KERNEL_DIR}/arch/arm64/boot/dts/arm/base_gicv3.dtb \
    --with-cmdline="earlycon=pl011,0x1c090000 console=ttyAMA0
irqchip.gicv3_pseudo_nmi=1" \
    --enable-gicv3 \
    --enable-psci \
    --with-initrd=${HOST_BUILDROOT_DIR}/output/images/rootfs.cpio.gz

I was also able to reproduce it on my rockpro64, but I was using the pseudo-NMI
series that allows them to be used on the board [1] and a modified dtb to remove
the big cores. I can post the log + whatever else is need to reproduce it, if
anyone thinks that would help.

[1] https://lists.infradead.org/pipermail/linux-arm-kernel/2020-June/580143.html

Thanks,

Alex

On 6/30/20 9:19 AM, Ard Biesheuvel wrote:
> When building very large kernels, the logic that emits replacement
> sequences for alternatives fails when relative branches are present
> in the code that is emitted into the .altinstr_replacement section
> and patched in at the original site and fixed up. The reason is that
> the linker will insert veneers if relative branches go out of range,
> and due to the relative distance of the .altinstr_replacement from
> the .text section where its branch targets usually live, veneers
> may be emitted at the end of the .altinstr_replacement section, with
> the relative branches in the sequence pointed at the veneers instead
> of the actual target.
>
> The alternatives patching logic will attempt to fix up the branch to
> point to its original target, which will be the veneer in this case,
> but given that the patch site is likely to be far away as well, it
> will be out of range and so patching will fail. There are other cases
> where these veneers are problematic, e.g., when the target of the
> branch is in .text while the patch site is in .init.text, in which
> case putting the replacement sequence inside .text may not help either.
>
> So let's use subsections to emit the replacement code as closely as
> possible to the patch site, to ensure that veneers are only likely to
> be emitted if they are required at the patch site as well, in which
> case they will be in range for the replacement sequence both before
> and after it is transported to the patch site.
>
> This will prevent alternative sequences in non-init code from being
> released from memory after boot, but this is tolerable given that the
> entire section is only 512 KB on an allyesconfig build (which weighs in
> at 500+ MB for the entire Image). Also, note that modules today carry
> the replacement sequences in non-init sections as well, and any of
> those that target init code will be emitted into init sections after
> this change.
>
> This fixes an early crash when booting an allyesconfig kernel on a
> system where any of the alternatives sequences containing relative
> branches are activated at boot (e.g., ARM64_HAS_PAN on TX2)
>
> Cc: Suzuki K Poulose <suzuki.poulose at arm.com>
> Cc: James Morse <james.morse at arm.com>
> Cc: Andre Przywara <andre.przywara at arm.com>
> Cc: Dave P Martin <dave.martin at arm.com>
> Signed-off-by: Ard Biesheuvel <ardb at kernel.org>
> ---
>  arch/arm64/include/asm/alternative.h | 16 ++++++++--------
>  arch/arm64/kernel/vmlinux.lds.S      |  3 ---
>  2 files changed, 8 insertions(+), 11 deletions(-)
>
> diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
> index 5e5dc05d63a0..12f0eb56a1cc 100644
> --- a/arch/arm64/include/asm/alternative.h
> +++ b/arch/arm64/include/asm/alternative.h
> @@ -73,11 +73,11 @@ static inline void apply_alternatives_module(void *start, size_t length) { }
>  	".pushsection .altinstructions,\"a\"\n"				\
>  	ALTINSTR_ENTRY(feature)						\
>  	".popsection\n"							\
> -	".pushsection .altinstr_replacement, \"a\"\n"			\
> +	".subsection 1\n"						\
>  	"663:\n\t"							\
>  	newinstr "\n"							\
>  	"664:\n\t"							\
> -	".popsection\n\t"						\
> +	".previous\n\t"							\
>  	".org	. - (664b-663b) + (662b-661b)\n\t"			\
>  	".org	. - (662b-661b) + (664b-663b)\n"			\
>  	".endif\n"
> @@ -117,9 +117,9 @@ static inline void apply_alternatives_module(void *start, size_t length) { }
>  662:	.pushsection .altinstructions, "a"
>  	altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f
>  	.popsection
> -	.pushsection .altinstr_replacement, "ax"
> +	.subsection 1
>  663:	\insn2
> -664:	.popsection
> +664:	.previous
>  	.org	. - (664b-663b) + (662b-661b)
>  	.org	. - (662b-661b) + (664b-663b)
>  	.endif
> @@ -160,7 +160,7 @@ static inline void apply_alternatives_module(void *start, size_t length) { }
>  	.pushsection .altinstructions, "a"
>  	altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f
>  	.popsection
> -	.pushsection .altinstr_replacement, "ax"
> +	.subsection 1
>  	.align 2	/* So GAS knows label 661 is suitably aligned */
>  661:
>  .endm
> @@ -179,9 +179,9 @@ static inline void apply_alternatives_module(void *start, size_t length) { }
>  .macro alternative_else
>  662:
>  	.if .Lasm_alt_mode==0
> -	.pushsection .altinstr_replacement, "ax"
> +	.subsection 1
>  	.else
> -	.popsection
> +	.previous
>  	.endif
>  663:
>  .endm
> @@ -192,7 +192,7 @@ static inline void apply_alternatives_module(void *start, size_t length) { }
>  .macro alternative_endif
>  664:
>  	.if .Lasm_alt_mode==0
> -	.popsection
> +	.previous
>  	.endif
>  	.org	. - (664b-663b) + (662b-661b)
>  	.org	. - (662b-661b) + (664b-663b)
> diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> index 6827da7f3aa5..5423ffe0a987 100644
> --- a/arch/arm64/kernel/vmlinux.lds.S
> +++ b/arch/arm64/kernel/vmlinux.lds.S
> @@ -165,9 +165,6 @@ SECTIONS
>  		*(.altinstructions)
>  		__alt_instructions_end = .;
>  	}
> -	.altinstr_replacement : {
> -		*(.altinstr_replacement)
> -	}
>  
>  	. = ALIGN(SEGMENT_ALIGN);
>  	__inittext_end = .;



More information about the linux-arm-kernel mailing list