[PATCH v3 1/2] firmware: Use lla to access all global symbols
Anup Patel
Anup.Patel at wdc.com
Fri Mar 19 09:40:50 GMT 2021
> -----Original Message-----
> From: opensbi <opensbi-bounces at lists.infradead.org> On Behalf Of Vincent
> Chen
> Sent: 17 March 2021 06:47
> To: opensbi at lists.infradead.org
> Cc: Vincent Chen <vincent.chen at sifive.com>
> Subject: [PATCH v3 1/2] firmware: Use lla to access all global symbols
>
> When OpenSBI is compiled as fPIE mode, the assembler will translate "la"
> to GOT reference pattern. It will cause to cost an additional load instruction
> when obtaining the symbol address. However, if the symbol locates within
> the positive or negative 2GB region, we can use "lla"
> instead of "la" to avoid unneeded GOT references. This patch assumes that
> the OpenSBI image excluding the payload does not exceed 2GB. Based on
> this assumption, all "la" instructions are replaced by "lla" to avoid
> performance degradation when compiling as fPIE mode.
>
> Signed-off-by: Vincent Chen <vincent.chen at sifive.com>
> Reviewed-by: Anup Patel <anup.patel at wdc.com>
Applied this patch to the riscv/opensbi repo
Thanks,
Anup
>
> ---
> firmware/fw_base.S | 88 +++++++++++++++++++++----------------------
> firmware/fw_dynamic.S | 18 ++++-----
> firmware/fw_jump.S | 2 +-
> firmware/fw_payload.S | 2 +-
> firmware/payloads/test_head.S | 18 ++++-----
> 5 files changed, 64 insertions(+), 64 deletions(-)
>
> diff --git a/firmware/fw_base.S b/firmware/fw_base.S index
> ab33e11..6cc5f88 100644
> --- a/firmware/fw_base.S
> +++ b/firmware/fw_base.S
> @@ -57,39 +57,39 @@ _start:
> bne a0, a6, _wait_relocate_copy_done
> _try_lottery:
> /* Jump to relocation wait loop if we don't get relocation lottery */
> - la a6, _relocate_lottery
> + lla a6, _relocate_lottery
> li a7, 1
> amoadd.w a6, a7, (a6)
> bnez a6, _wait_relocate_copy_done
>
> /* Save load address */
> - la t0, _load_start
> - la t1, _start
> + lla t0, _load_start
> + lla t1, _start
> REG_S t1, 0(t0)
>
> /* Relocate if load address != link address */
> _relocate:
> - la t0, _link_start
> + lla t0, _link_start
> REG_L t0, 0(t0)
> - la t1, _link_end
> + lla t1, _link_end
> REG_L t1, 0(t1)
> - la t2, _load_start
> + lla t2, _load_start
> REG_L t2, 0(t2)
> sub t3, t1, t0
> add t3, t3, t2
> beq t0, t2, _relocate_done
> - la t4, _relocate_done
> + lla t4, _relocate_done
> sub t4, t4, t2
> add t4, t4, t0
> blt t2, t0, _relocate_copy_to_upper
> _relocate_copy_to_lower:
> ble t1, t2, _relocate_copy_to_lower_loop
> - la t3, _relocate_lottery
> + lla t3, _relocate_lottery
> BRANGE t2, t1, t3, _start_hang
> - la t3, _boot_status
> + lla t3, _boot_status
> BRANGE t2, t1, t3, _start_hang
> - la t3, _relocate
> - la t5, _relocate_done
> + lla t3, _relocate
> + lla t5, _relocate_done
> BRANGE t2, t1, t3, _start_hang
> BRANGE t2, t1, t5, _start_hang
> BRANGE t3, t5, t2, _start_hang
> @@ -102,12 +102,12 @@ _relocate_copy_to_lower_loop:
> jr t4
> _relocate_copy_to_upper:
> ble t3, t0, _relocate_copy_to_upper_loop
> - la t2, _relocate_lottery
> + lla t2, _relocate_lottery
> BRANGE t0, t3, t2, _start_hang
> - la t2, _boot_status
> + lla t2, _boot_status
> BRANGE t0, t3, t2, _start_hang
> - la t2, _relocate
> - la t5, _relocate_done
> + lla t2, _relocate
> + lla t5, _relocate_done
> BRANGE t0, t3, t2, _start_hang
> BRANGE t0, t3, t5, _start_hang
> BRANGE t2, t5, t0, _start_hang
> @@ -119,12 +119,12 @@ _relocate_copy_to_upper_loop:
> blt t0, t1, _relocate_copy_to_upper_loop
> jr t4
> _wait_relocate_copy_done:
> - la t0, _start
> - la t1, _link_start
> + lla t0, _start
> + lla t1, _link_start
> REG_L t1, 0(t1)
> beq t0, t1, _wait_for_boot_hart
> - la t2, _boot_status
> - la t3, _wait_for_boot_hart
> + lla t2, _boot_status
> + lla t3, _wait_for_boot_hart
> sub t3, t3, t0
> add t3, t3, t1
> 1:
> @@ -143,10 +143,10 @@ _relocate_done:
> * Mark relocate copy done
> * Use _boot_status copy relative to the load address
> */
> - la t0, _boot_status
> - la t1, _link_start
> + lla t0, _boot_status
> + lla t1, _link_start
> REG_L t1, 0(t1)
> - la t2, _load_start
> + lla t2, _load_start
> REG_L t2, 0(t2)
> sub t0, t0, t1
> add t0, t0, t2
> @@ -161,19 +161,19 @@ _relocate_done:
> call _reset_regs
>
> /* Zero-out BSS */
> - la s4, _bss_start
> - la s5, _bss_end
> + lla s4, _bss_start
> + lla s5, _bss_end
> _bss_zero:
> REG_S zero, (s4)
> add s4, s4, __SIZEOF_POINTER__
> blt s4, s5, _bss_zero
>
> /* Setup temporary trap handler */
> - la s4, _start_hang
> + lla s4, _start_hang
> csrw CSR_MTVEC, s4
>
> /* Setup temporary stack */
> - la s4, _fw_end
> + lla s4, _fw_end
> li s5, (SBI_SCRATCH_SIZE * 2)
> add sp, s4, s5
>
> @@ -184,7 +184,7 @@ _bss_zero:
>
> #ifdef FW_FDT_PATH
> /* Override previous arg1 */
> - la a1, fw_fdt_bin
> + lla a1, fw_fdt_bin
> #endif
>
> /*
> @@ -202,7 +202,7 @@ _bss_zero:
> * s7 -> HART Count
> * s8 -> HART Stack Size
> */
> - la a4, platform
> + lla a4, platform
> #if __riscv_xlen == 64
> lwu s7, SBI_PLATFORM_HART_COUNT_OFFSET(a4)
> lwu s8, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(a4)
> @@ -212,7 +212,7 @@ _bss_zero:
> #endif
>
> /* Setup scratch space for all the HARTs*/
> - la tp, _fw_end
> + lla tp, _fw_end
> mul a5, s7, s8
> add tp, tp, a5
> /* Keep a copy of tp */
> @@ -230,8 +230,8 @@ _scratch_init:
>
> /* Initialize scratch space */
> /* Store fw_start and fw_size in scratch space */
> - la a4, _fw_start
> - la a5, _fw_end
> + lla a4, _fw_start
> + lla a5, _fw_end
> mul t0, s7, s8
> add a5, a5, t0
> sub a5, a5, a4
> @@ -253,16 +253,16 @@ _scratch_init:
> REG_S a0, SBI_SCRATCH_NEXT_MODE_OFFSET(tp)
> MOV_3R a0, s0, a1, s1, a2, s2
> /* Store warm_boot address in scratch space */
> - la a4, _start_warm
> + lla a4, _start_warm
> REG_S a4, SBI_SCRATCH_WARMBOOT_ADDR_OFFSET(tp)
> /* Store platform address in scratch space */
> - la a4, platform
> + lla a4, platform
> REG_S a4, SBI_SCRATCH_PLATFORM_ADDR_OFFSET(tp)
> /* Store hartid-to-scratch function address in scratch space */
> - la a4, _hartid_to_scratch
> + lla a4, _hartid_to_scratch
> REG_S a4, SBI_SCRATCH_HARTID_TO_SCRATCH_OFFSET(tp)
> /* Store trap-exit function address in scratch space */
> - la a4, _trap_exit
> + lla a4, _trap_exit
> REG_S a4, SBI_SCRATCH_TRAP_EXIT_OFFSET(tp)
> /* Clear tmp0 in scratch space */
> REG_S zero, SBI_SCRATCH_TMP0_OFFSET(tp)
> @@ -343,7 +343,7 @@ _fdt_reloc_done:
>
> /* mark boot hart done */
> li t0, BOOT_STATUS_BOOT_HART_DONE
> - la t1, _boot_status
> + lla t1, _boot_status
> REG_S t0, 0(t1)
> fence rw, rw
> j _start_warm
> @@ -351,7 +351,7 @@ _fdt_reloc_done:
> /* waiting for boot hart to be done (_boot_status == 2) */
> _wait_for_boot_hart:
> li t0, BOOT_STATUS_BOOT_HART_DONE
> - la t1, _boot_status
> + lla t1, _boot_status
> REG_L t1, 0(t1)
> /* Reduce the bus traffic so that boot hart may proceed faster */
> nop
> @@ -369,7 +369,7 @@ _start_warm:
> csrw CSR_MIP, zero
>
> /* Find HART count and HART stack size */
> - la a4, platform
> + lla a4, platform
> #if __riscv_xlen == 64
> lwu s7, SBI_PLATFORM_HART_COUNT_OFFSET(a4)
> lwu s8, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(a4)
> @@ -400,7 +400,7 @@ _start_warm:
> 3: bge s6, s7, _start_hang
>
> /* Find the scratch space based on HART index */
> - la tp, _fw_end
> + lla tp, _fw_end
> mul a5, s7, s8
> add tp, tp, a5
> mul a5, s8, s6
> @@ -415,13 +415,13 @@ _start_warm:
> add sp, tp, zero
>
> /* Setup trap handler */
> - la a4, _trap_handler
> + lla a4, _trap_handler
> #if __riscv_xlen == 32
> csrr a5, CSR_MISA
> srli a5, a5, ('H' - 'A')
> andi a5, a5, 0x1
> beq a5, zero, _skip_trap_handler_rv32_hyp
> - la a4, _trap_handler_rv32_hyp
> + lla a4, _trap_handler_rv32_hyp
> _skip_trap_handler_rv32_hyp:
> #endif
> csrw CSR_MTVEC, a4
> @@ -432,7 +432,7 @@ _skip_trap_handler_rv32_hyp:
> srli a5, a5, ('H' - 'A')
> andi a5, a5, 0x1
> beq a5, zero, _skip_trap_exit_rv32_hyp
> - la a4, _trap_exit_rv32_hyp
> + lla a4, _trap_exit_rv32_hyp
> csrr a5, CSR_MSCRATCH
> REG_S a4, SBI_SCRATCH_TRAP_EXIT_OFFSET(a5)
> _skip_trap_exit_rv32_hyp:
> @@ -468,7 +468,7 @@ _hartid_to_scratch:
> * t1 -> HART Stack End
> * t2 -> Temporary
> */
> - la t2, platform
> + lla t2, platform
> #if __riscv_xlen == 64
> lwu t0, SBI_PLATFORM_HART_STACK_SIZE_OFFSET(t2)
> lwu t2, SBI_PLATFORM_HART_COUNT_OFFSET(t2)
> @@ -478,7 +478,7 @@ _hartid_to_scratch:
> #endif
> sub t2, t2, a1
> mul t2, t2, t0
> - la t1, _fw_end
> + lla t1, _fw_end
> add t1, t1, t2
> li t2, SBI_SCRATCH_SIZE
> sub a0, t1, t2
> diff --git a/firmware/fw_dynamic.S b/firmware/fw_dynamic.S index
> 8b56947..0705e63 100644
> --- a/firmware/fw_dynamic.S
> +++ b/firmware/fw_dynamic.S
> @@ -54,7 +54,7 @@ fw_boot_hart:
> */
> fw_save_info:
> /* Save next arg1 in 'a1' */
> - la a4, _dynamic_next_arg1
> + lla a4, _dynamic_next_arg1
> REG_S a1, (a4)
>
> /* Sanity checks */
> @@ -66,13 +66,13 @@ fw_save_info:
> bgt a3, a4, _bad_dynamic_info
>
> /* Save version == 0x1 fields */
> - la a4, _dynamic_next_addr
> + lla a4, _dynamic_next_addr
> REG_L a3, FW_DYNAMIC_INFO_NEXT_ADDR_OFFSET(a2)
> REG_S a3, (a4)
> - la a4, _dynamic_next_mode
> + lla a4, _dynamic_next_mode
> REG_L a3, FW_DYNAMIC_INFO_NEXT_MODE_OFFSET(a2)
> REG_S a3, (a4)
> - la a4, _dynamic_options
> + lla a4, _dynamic_options
> REG_L a3, FW_DYNAMIC_INFO_OPTIONS_OFFSET(a2)
> REG_S a3, (a4)
>
> @@ -80,7 +80,7 @@ fw_save_info:
> li a4, 0x2
> REG_L a3, FW_DYNAMIC_INFO_VERSION_OFFSET(a2)
> blt a3, a4, 2f
> - la a4, _dynamic_boot_hart
> + lla a4, _dynamic_boot_hart
> REG_L a3, FW_DYNAMIC_INFO_BOOT_HART_OFFSET(a2)
> REG_S a3, (a4)
> 2:
> @@ -96,7 +96,7 @@ fw_save_info:
> * The next arg1 should be returned in 'a0'.
> */
> fw_next_arg1:
> - la a0, _dynamic_next_arg1
> + lla a0, _dynamic_next_arg1
> REG_L a0, (a0)
> ret
>
> @@ -108,7 +108,7 @@ fw_next_arg1:
> * The next address should be returned in 'a0'.
> */
> fw_next_addr:
> - la a0, _dynamic_next_addr
> + lla a0, _dynamic_next_addr
> REG_L a0, (a0)
> ret
>
> @@ -120,7 +120,7 @@ fw_next_addr:
> * The next address should be returned in 'a0'
> */
> fw_next_mode:
> - la a0, _dynamic_next_mode
> + lla a0, _dynamic_next_mode
> REG_L a0, (a0)
> ret
>
> @@ -133,7 +133,7 @@ fw_next_mode:
> * The next address should be returned in 'a0'.
> */
> fw_options:
> - la a0, _dynamic_options
> + lla a0, _dynamic_options
> REG_L a0, (a0)
> ret
>
> diff --git a/firmware/fw_jump.S b/firmware/fw_jump.S index
> 8553f8c..5b24f8b 100644
> --- a/firmware/fw_jump.S
> +++ b/firmware/fw_jump.S
> @@ -59,7 +59,7 @@ fw_next_arg1:
> * The next address should be returned in 'a0'.
> */
> fw_next_addr:
> - la a0, _jump_addr
> + lla a0, _jump_addr
> REG_L a0, (a0)
> ret
>
> diff --git a/firmware/fw_payload.S b/firmware/fw_payload.S index
> 1ef121e..c53a3bb 100644
> --- a/firmware/fw_payload.S
> +++ b/firmware/fw_payload.S
> @@ -59,7 +59,7 @@ fw_next_arg1:
> * The next address should be returned in 'a0'.
> */
> fw_next_addr:
> - la a0, payload_bin
> + lla a0, payload_bin
> ret
>
> .section .entry, "ax", %progbits
> diff --git a/firmware/payloads/test_head.S
> b/firmware/payloads/test_head.S index 840013e..4852f71 100644
> --- a/firmware/payloads/test_head.S
> +++ b/firmware/payloads/test_head.S
> @@ -28,20 +28,20 @@
> .globl _start
> _start:
> /* Pick one hart to run the main boot sequence */
> - la a3, _hart_lottery
> + lla a3, _hart_lottery
> li a2, 1
> amoadd.w a3, a2, (a3)
> bnez a3, _start_hang
>
> /* Save a0 and a1 */
> - la a3, _boot_a0
> + lla a3, _boot_a0
> REG_S a0, 0(a3)
> - la a3, _boot_a1
> + lla a3, _boot_a1
> REG_S a1, 0(a3)
>
> /* Zero-out BSS */
> - la a4, _bss_start
> - la a5, _bss_end
> + lla a4, _bss_start
> + lla a5, _bss_end
> _bss_zero:
> REG_S zero, (a4)
> add a4, a4, __SIZEOF_POINTER__
> @@ -53,18 +53,18 @@ _start_warm:
> csrw CSR_SIP, zero
>
> /* Setup exception vectors */
> - la a3, _start_hang
> + lla a3, _start_hang
> csrw CSR_STVEC, a3
>
> /* Setup stack */
> - la a3, _payload_end
> + lla a3, _payload_end
> li a4, 0x2000
> add sp, a3, a4
>
> /* Jump to C main */
> - la a3, _boot_a0
> + lla a3, _boot_a0
> REG_L a0, 0(a3)
> - la a3, _boot_a1
> + lla a3, _boot_a1
> REG_L a1, 0(a3)
> call test_main
>
> --
> 2.7.4
>
>
> --
> opensbi mailing list
> opensbi at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/opensbi
More information about the opensbi
mailing list