[bootwrapper PATCH v2 09/13] aarch64: move the bulk of EL3 initialization to C

Andre Przywara andre.przywara at arm.com
Mon Jan 17 06:31:04 PST 2022


On Fri, 14 Jan 2022 10:56:49 +0000
Mark Rutland <mark.rutland at arm.com> wrote:

Hi Mark,

> The majority of state that we initialize at EL3 is necessary for code at
> lower ELs to function, but isnt' necessary for the boot-wrapper itself.
> Given that, it would be better to write this in C where it can be
> written mode clearly, and where it will be possible to add logging/debug
> logic.

Ah, thanks, that looks much nicer and easier to read now, also is more
robust, as keeping register values alive for more than a few assembly
lines always scares me.

> This patch migrates the AArch64 EL3 initialization to C.
> 
> There should be no functional change as a result of this patch.

I compared the removed assembly code against to added C code, and also
checked the register bits against the ARM ARM.
Two (and a half) things stood out, see below:

> 
> Signed-off-by: Mark Rutland <mark.rutland at arm.com>
> ---
>  Makefile.am                    |  2 +-
>  arch/aarch64/boot.S            | 92 +---------------------------------
>  arch/aarch64/include/asm/cpu.h | 36 ++++++++++++-
>  arch/aarch64/init.c            | 69 +++++++++++++++++++++++++
>  4 files changed, 107 insertions(+), 92 deletions(-)
> 
> diff --git a/Makefile.am b/Makefile.am
> index 885a77c..a598b95 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -153,7 +153,7 @@ $(COMMON_SRC):
>  %.o: %.S Makefile | $(ARCH_SRC)
>  	$(CC) $(CPPFLAGS) -D__ASSEMBLY__ $(CFLAGS) $(DEFINES) -c -o $@ $<
>  
> -%.o: %.c Makefile | $(COMMON_SRC)
> +%.o: %.c Makefile | $(ARCH_SRC) $(COMMON_SRC)
>  	$(CC) $(CPPFLAGS) $(CFLAGS) $(DEFINES) -c -o $@ $<
>  
>  model.lds: $(LD_SCRIPT) Makefile
> diff --git a/arch/aarch64/boot.S b/arch/aarch64/boot.S
> index 17b4a75..c0ec518 100644
> --- a/arch/aarch64/boot.S
> +++ b/arch/aarch64/boot.S
> @@ -45,96 +45,6 @@ reset_at_el3:
>  	msr	sctlr_el3, x0
>  	isb
>  
> -	mov	x0, #0x30			// RES1
> -	orr	x0, x0, #(1 << 0)		// Non-secure EL1
> -	orr	x0, x0, #(1 << 8)		// HVC enable
> -
> -	/* Enable pointer authentication if present */
> -	mrs	x1, id_aa64isar1_el1
> -	/* We check for APA+API and GPA+GPI */
> -	ldr	x2, =((0xff << 24) | (0xff << 4))
> -	and	x1, x1, x2
> -	cbz	x1, 1f
> -
> -	orr	x0, x0, #(1 << 16)		// AP key enable
> -	orr	x0, x0, #(1 << 17)		// AP insn enable
> -1:
> -	/* Enable TME if present */
> -	mrs	x1, id_aa64isar0_el1
> -	ubfx	x1, x1, #24, #4
> -	cbz	x1, 1f
> -
> -	orr	x0, x0, #(1 << 34)		// TME enable
> -1:
> -	/* Enable FGT if present */
> -	mrs	x1, id_aa64mmfr0_el1
> -	ubfx	x1, x1, #56, #4
> -	cbz	x1, 1f
> -
> -	orr	x0, x0, #(1 << 27)		// FGT enable
> -1:
> -	/* Enable ECV2 if present (allows CNTPOFF_EL2) */
> -	mrs	x1, id_aa64mmfr0_el1
> -	ubfx	x1, x1, #60, #4
> -	cmp	x1, #2
> -	b.lt	1f
> -
> -	orr	x0, x0, #(1 << 28)		// ECV enable
> -1:
> -	/* Enable MTE if present */
> -	mrs	x10, id_aa64pfr1_el1
> -	ubfx	x10, x10, #8, #4
> -	cmp	x10, #2
> -	b.lt	1f
> -
> -	orr	x0, x0, #(1 << 26)		// ATA enable
> -1:
> -#ifndef KERNEL_32
> -	orr	x0, x0, #(1 << 10)		// 64-bit EL2
> -#endif
> -	msr	scr_el3, x0
> -
> -	msr	cptr_el3, xzr			// Disable copro. traps to EL3
> -
> -	mov	x0, xzr
> -	mrs	x1, id_aa64dfr0_el1
> -	ubfx	x1, x1, #32, #4
> -	cbz	x1, 1f
> -
> -	// Enable SPE for the non-secure world.
> -	orr	x0, x0, #(0x3 << 12)
> -
> -	// Do not trap PMSNEVFR_EL1 if present
> -	cmp	x1, #3
> -	b.lt	1f
> -	orr	x0, x0, #(1 << 36)
> -
> -1:	mrs	x1, id_aa64dfr0_el1
> -	ubfx	x1, x1, #44, #4
> -	cbz	x1, 1f
> -
> -	// Enable TRBE for the non-secure world.
> -	ldr	x1, =(0x3 << 24)
> -	orr	x0, x0, x1
> -
> -1:	msr	mdcr_el3, x0			// Disable traps to EL3
> -
> -	mrs	x0, id_aa64pfr0_el1
> -	ubfx	x0, x0, #32, #4			// SVE present?
> -	cbz	x0, 1f				// Skip SVE init if not
> -
> -	mrs	x0, cptr_el3
> -	orr	x0, x0, #CPTR_EL3_EZ		// enable SVE
> -	msr	cptr_el3, x0
> -	isb
> -
> -	mov	x0, #ZCR_EL3_LEN_MASK		// SVE: Enable full vector len
> -	msr	ZCR_EL3, x0			// for EL2.
> -
> -1:
> -	ldr	x0, =COUNTER_FREQ
> -	msr	cntfrq_el0, x0
> -
>  	cpuid	x0, x1
>  	bl	find_logical_id
>  	cmp	x0, #MPIDR_INVALID
> @@ -143,6 +53,8 @@ reset_at_el3:
>  
>  	bl	cpu_init_bootwrapper
>  
> +	bl	cpu_init_el3
> +
>  	bl	gic_secure_init
>  
>  	b	start_el3
> diff --git a/arch/aarch64/include/asm/cpu.h b/arch/aarch64/include/asm/cpu.h
> index 1e9141a..e078ae4 100644
> --- a/arch/aarch64/include/asm/cpu.h
> +++ b/arch/aarch64/include/asm/cpu.h
> @@ -31,7 +31,41 @@
>  #define SCTLR_EL1_RES1		(BIT(29) | BIT(28) | BIT(23) | BIT(22) | \
>  				 BIT(11) | BIT(8) | BIT(7) | BIT(4))
>  
> -#define HCR_EL2_RES1		(BIT(1))
> +#define ZCR_EL3				s3_6_c1_c2_0
> +#define ZCR_EL3_LEN			BITS(3, 1)

The (current) actual length field should be BITS(3, 0), no?

> +
> +#define MDCR_EL3_NSPB_NS_NOTRAP		(UL(3) << 12)
> +#define MDCR_EL3_NSTB_NS_NOTRAP		(UL(3) << 24)
> +#define MDCR_EL3_ENPMSN			BIT(36)
> +
> +#define SCR_EL3_RES1			BITS(5, 4)
> +#define SCR_EL3_NS			BIT(0)
> +#define SCR_EL3_HCE			BIT(8)
> +#define SCR_EL3_RW			BIT(10)
> +#define SCR_EL3_APK			BIT(16)
> +#define SCR_EL3_API			BIT(17)
> +#define SCR_EL3_ATA			BIT(26)
> +#define SCR_EL3_FGTEN			BIT(27)
> +#define SCR_EL3_ECVEN			BIT(28)
> +#define SCR_EL3_TME			BIT(34)
> +
> +#define HCR_EL2_RES1			BIT(1)
> +
> +#define ID_AA64DFR0_EL1_PMSVER		BITS(35, 32)
> +#define ID_AA64DFR0_EL1_TRACEBUFFER	BITS(47, 44)
> +
> +#define ID_AA64ISAR0_EL1_TME		BITS(27, 24)
> +
> +#define ID_AA64ISAR1_EL1_APA		BITS(7, 4)
> +#define ID_AA64ISAR1_EL1_API		BITS(11, 8)
> +#define ID_AA64ISAR1_EL1_GPA		BITS(27, 24)
> +#define ID_AA64ISAR1_EL1_GPI		BITS(31, 28)
> +
> +#define ID_AA64MMFR0_EL1_FGT		BITS(59, 56)
> +#define ID_AA64MMFR0_EL1_ECV		BITS(63, 60)
> +
> +#define ID_AA64PFR1_EL1_MTE		BITS(11, 8)
> +#define ID_AA64PFR0_EL1_SVE		BITS(35, 32)
>  
>  /*
>   * Initial register values required for the boot-wrapper to run out-of-reset.
> diff --git a/arch/aarch64/init.c b/arch/aarch64/init.c
> index 82816e7..ded9871 100644
> --- a/arch/aarch64/init.c
> +++ b/arch/aarch64/init.c
> @@ -8,6 +8,7 @@
>   */
>  #include <cpu.h>
>  #include <platform.h>
> +#include <stdbool.h>
>  
>  void announce_arch(void)
>  {
> @@ -17,3 +18,71 @@ void announce_arch(void)
>  	print_char('0' + el);
>  	print_string("\r\n");
>  }
> +
> +static inline bool kernel_is_32bit(void)
> +{
> +#ifdef KERNEL_32
> +	return true;
> +#else
> +	return false;
> +#endif
> +}
> +
> +static inline bool cpu_has_pauth(void)
> +{
> +	const unsigned long id_pauth = ID_AA64ISAR1_EL1_APA |
> +				       ID_AA64ISAR1_EL1_API |
> +				       ID_AA64ISAR1_EL1_GPA |
> +				       ID_AA64ISAR1_EL1_GPI;
> +
> +	return mrs(ID_AA64ISAR1_EL1) & id_pauth;
> +}
> +
> +void cpu_init_el3(void)
> +{
> +	unsigned long scr = SCR_EL3_RES1 | SCR_EL3_NS | SCR_EL3_HCE;
> +	unsigned long mdcr = 0;
> +	unsigned long cptr = 0;
> +
> +	if (cpu_has_pauth())
> +		scr |= SCR_EL3_APK | SCR_EL3_API;
> +
> +	if (mrs_field(ID_AA64ISAR0_EL1, TME))
> +		scr |= SCR_EL3_TME;
> +
> +	if (mrs_field(ID_AA64MMFR0_EL1, FGT))
> +		scr |= SCR_EL3_FGTEN;
> +
> +	if (mrs_field(ID_AA64MMFR0_EL1, ECV) >= 2)
> +		scr |= SCR_EL3_ECVEN;
> +
> +	if (mrs_field(ID_AA64PFR1_EL1, MTE))

The assembly code checked for >=2, which seems correct to me, as
SCR_EL3_ATA is about MTE2?

> +		scr |= SCR_EL3_ATA;
> +
> +	if (!kernel_is_32bit())
> +		scr |= SCR_EL3_RW;
> +
> +	msr(SCR_EL3, scr);
> +
> +	msr(CPTR_EL3, cptr);
> +
> +	if (mrs_field(ID_AA64DFR0_EL1, PMSVER))
> +		mdcr |= MDCR_EL3_NSPB_NS_NOTRAP;
> +
> +	if (mrs_field(ID_AA64DFR0_EL1, PMSVER) >= 3)
> +		mdcr |= MDCR_EL3_ENPMSN;
> +
> +	if (mrs_field(ID_AA64DFR0_EL1, TRACEBUFFER))
> +		mdcr |= MDCR_EL3_NSTB_NS_NOTRAP;
> +
> +	msr(MDCR_EL3, mdcr);
> +
> +	if (mrs_field(ID_AA64PFR0_EL1, SVE)) {
> +		cptr |= CPTR_EL3_EZ;
> +		msr(CPTR_EL3, cptr);
> +		isb();
> +		msr(ZCR_EL3, ZCR_EL3_LEN);

So when comparing this to the other uses of XXX_EL3_YYY, they typically
describe a mask, but here we seems to abuse this as a value? And apart
from bit 0 missing from it (as noted above), the existing code writes 0x1ff
into that register, presumable to cover future vector length extensions
beyond 2048 bits (which those RAZ/WI fields in bits[8:4] seem to suggest).
So shall we define ZCR_EL3_MAX_VEC_LEN to 0x1ff above, and then use that?
Or ignore the crystal ball, and just stick with 2048 bits, by writing 0xf?

The rest passed the checks.

Cheers,
Andre

> +	}
> +
> +	msr(CNTFRQ_EL0, COUNTER_FREQ);
> +}




More information about the linux-arm-kernel mailing list