[PATCH] RISC-V: avoid using t6/x31 in hand asm
Deepak Gupta
debug at rivosinc.com
Thu Jun 26 15:51:01 PDT 2025
On Thu, Jun 26, 2025 at 02:44:55PM -0700, Vineet Gupta wrote:
>This is technically NFC as nothing functional changes, except for usage
>of different regs in some of the asm code.
>
>This paves the way for using t6 as a global reg for say hoisting percpu
>base address (with further compiler toggles suh as -ffixed-t6.
>
>Lightly tested on QEMU: survives boot/hachbench (thx Atish for the
>testing).
Do we care about 32 bit? I see that bpf jitting for 32bit uses t6 in
codegen. Doesn't look like 64-bit uses t6 in codegen (but take a closer
look). CC: bjorn
>
>Cc: Yunhui Cui <cuiyunhui at bytedance.com>
>Cc: Atish Patra <atish.patra at linux.dev>
>Signed-off-by: Vineet Gupta <vineetg at rivosinc.com>
>---
> arch/riscv/kernel/entry.S | 14 +++----
> arch/riscv/kernel/mcount.S | 4 +-
> arch/riscv/lib/memcpy.S | 66 ++++++++++++++++-----------------
> arch/riscv/lib/memmove.S | 20 +++++-----
> arch/riscv/lib/strncmp.S | 6 +--
> arch/riscv/lib/uaccess.S | 24 ++++++------
> arch/riscv/lib/uaccess_vector.S | 12 +++---
> 7 files changed, 73 insertions(+), 73 deletions(-)
>
>diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
>index 75656afa2d6b..0b5791feae5f 100644
>--- a/arch/riscv/kernel/entry.S
>+++ b/arch/riscv/kernel/entry.S
>@@ -285,13 +285,13 @@ ASM_NOKPROBE(ret_from_exception)
> #ifdef CONFIG_VMAP_STACK
> SYM_CODE_START_LOCAL(handle_kernel_stack_overflow)
> /* we reach here from kernel context, sscratch must be 0 */
>- csrrw x31, CSR_SCRATCH, x31
>- asm_per_cpu sp, overflow_stack, x31
>- li x31, OVERFLOW_STACK_SIZE
>- add sp, sp, x31
>- /* zero out x31 again and restore x31 */
>- xor x31, x31, x31
>- csrrw x31, CSR_SCRATCH, x31
>+ csrrw x30, CSR_SCRATCH, x30
>+ asm_per_cpu sp, overflow_stack, x30
>+ li x30, OVERFLOW_STACK_SIZE
>+ add sp, sp, x30
>+ /* zero out x30 again and restore x30 */
>+ xor x30, x30, x30
>+ csrrw x30, CSR_SCRATCH, x30
>
> addi sp, sp, -(PT_SIZE_ON_STACK)
>
>diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S
>index da4a4000e57e..d7e4303a18a4 100644
>--- a/arch/riscv/kernel/mcount.S
>+++ b/arch/riscv/kernel/mcount.S
>@@ -89,8 +89,8 @@ SYM_FUNC_START(_mcount)
>
> la t3, ftrace_graph_entry
> REG_L t2, 0(t3)
>- la t6, ftrace_graph_entry_stub
>- bne t2, t6, .Ldo_ftrace_graph_caller
>+ la t5, ftrace_graph_entry_stub
>+ bne t2, t5, .Ldo_ftrace_graph_caller
> #endif
> la t3, ftrace_trace_function
> REG_L t5, 0(t3)
>diff --git a/arch/riscv/lib/memcpy.S b/arch/riscv/lib/memcpy.S
>index 44e009ec5fef..37ebe9f2b355 100644
>--- a/arch/riscv/lib/memcpy.S
>+++ b/arch/riscv/lib/memcpy.S
>@@ -8,13 +8,13 @@
>
> /* void *memcpy(void *, const void *, size_t) */
> SYM_FUNC_START(__memcpy)
>- move t6, a0 /* Preserve return value */
>+ move t5, a0 /* Preserve return value */
>
> /* Defer to byte-oriented copy for small sizes */
> sltiu a3, a2, 128
> bnez a3, 4f
> /* Use word-oriented copy only if low-order bits match */
>- andi a3, t6, SZREG-1
>+ andi a3, a0, SZREG-1
> andi a4, a1, SZREG-1
> bne a3, a4, 4f
>
>@@ -30,8 +30,8 @@ SYM_FUNC_START(__memcpy)
> 1:
> lb a5, 0(a1)
> addi a1, a1, 1
>- sb a5, 0(t6)
>- addi t6, t6, 1
>+ sb a5, 0(t5)
>+ addi t5, t5, 1
> bltu a1, a3, 1b
> sub a2, a2, a4 /* Update count */
>
>@@ -49,31 +49,31 @@ SYM_FUNC_START(__memcpy)
> REG_L t2, 6*SZREG(a1)
> REG_L t3, 7*SZREG(a1)
> REG_L t4, 8*SZREG(a1)
>- REG_L t5, 9*SZREG(a1)
>- REG_S a4, 0(t6)
>- REG_S a5, SZREG(t6)
>- REG_S a6, 2*SZREG(t6)
>- REG_S a7, 3*SZREG(t6)
>- REG_S t0, 4*SZREG(t6)
>- REG_S t1, 5*SZREG(t6)
>- REG_S t2, 6*SZREG(t6)
>- REG_S t3, 7*SZREG(t6)
>- REG_S t4, 8*SZREG(t6)
>- REG_S t5, 9*SZREG(t6)
>- REG_L a4, 10*SZREG(a1)
>- REG_L a5, 11*SZREG(a1)
>- REG_L a6, 12*SZREG(a1)
>- REG_L a7, 13*SZREG(a1)
>- REG_L t0, 14*SZREG(a1)
>- REG_L t1, 15*SZREG(a1)
>+ REG_S a4, 0(t5)
>+ REG_S a5, SZREG(t5)
>+ REG_S a6, 2*SZREG(t5)
>+ REG_S a7, 3*SZREG(t5)
>+ REG_S t0, 4*SZREG(t5)
>+ REG_S t1, 5*SZREG(t5)
>+ REG_S t2, 6*SZREG(t5)
>+ REG_S t3, 7*SZREG(t5)
>+ REG_S t4, 8*SZREG(t5)
>+ REG_L a4, 9*SZREG(a1)
>+ REG_L a5, 10*SZREG(a1)
>+ REG_L a6, 11*SZREG(a1)
>+ REG_L a7, 12*SZREG(a1)
>+ REG_L t0, 13*SZREG(a1)
>+ REG_L t1, 14*SZREG(a1)
>+ REG_L t2, 15*SZREG(a1)
> addi a1, a1, 16*SZREG
>- REG_S a4, 10*SZREG(t6)
>- REG_S a5, 11*SZREG(t6)
>- REG_S a6, 12*SZREG(t6)
>- REG_S a7, 13*SZREG(t6)
>- REG_S t0, 14*SZREG(t6)
>- REG_S t1, 15*SZREG(t6)
>- addi t6, t6, 16*SZREG
>+ REG_S a4, 9*SZREG(t5)
>+ REG_S a5, 10*SZREG(t5)
>+ REG_S a6, 11*SZREG(t5)
>+ REG_S a7, 12*SZREG(t5)
>+ REG_S t0, 13*SZREG(t5)
>+ REG_S t1, 14*SZREG(t5)
>+ REG_S t2, 15*SZREG(t5)
>+ addi t5, t5, 16*SZREG
> bltu a1, a3, 3b
> andi a2, a2, (16*SZREG)-1 /* Update count */
>
>@@ -83,15 +83,15 @@ SYM_FUNC_START(__memcpy)
> add a3, a1, a2
>
> /* Use word-oriented copy if co-aligned to word boundary */
>- or a5, a1, t6
>+ or a5, a1, t5
> or a5, a5, a3
> andi a5, a5, 3
> bnez a5, 5f
> 7:
> lw a4, 0(a1)
> addi a1, a1, 4
>- sw a4, 0(t6)
>- addi t6, t6, 4
>+ sw a4, 0(t5)
>+ addi t5, t5, 4
> bltu a1, a3, 7b
>
> ret
>@@ -99,8 +99,8 @@ SYM_FUNC_START(__memcpy)
> 5:
> lb a4, 0(a1)
> addi a1, a1, 1
>- sb a4, 0(t6)
>- addi t6, t6, 1
>+ sb a4, 0(t5)
>+ addi t5, t5, 1
> bltu a1, a3, 5b
> 6:
> ret
>diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S
>index cb3e2e7ef0ba..5fbd84c2cf18 100644
>--- a/arch/riscv/lib/memmove.S
>+++ b/arch/riscv/lib/memmove.S
>@@ -35,7 +35,7 @@ SYM_FUNC_START(__memmove)
> * Forward Copy: t3 - Index counter of dest
> * Reverse Copy: t4 - Index counter of dest
> * Both Copy Modes: t5 - Inclusive first multibyte/aligned of dest
>- * Both Copy Modes: t6 - Non-Inclusive last multibyte/aligned of dest
>+ * Both Copy Modes: a3 - Non-Inclusive last multibyte/aligned of dest
> * Both Copy Modes: t0 - Link / Temporary for load-store
> * Both Copy Modes: t1 - Temporary for load-store
> * Both Copy Modes: t2 - Temporary for load-store
>@@ -47,7 +47,7 @@ SYM_FUNC_START(__memmove)
>
> /*
> * Solve for some register values now.
>- * Byte copy does not need t5 or t6.
>+ * Byte copy does not need t5 or a3.
> */
> mv t3, a0
> add t4, a0, a2
>@@ -62,10 +62,10 @@ SYM_FUNC_START(__memmove)
> beqz t0, .Lbyte_copy
>
> /*
>- * Now solve for t5 and t6.
>+ * Now solve for t5 and a3.
> */
> andi t5, t3, -SZREG
>- andi t6, t4, -SZREG
>+ andi a3, t4, -SZREG
> /*
> * If dest(Register t3) rounded down to the nearest naturally
> * aligned SZREG address, does not equal dest, then add SZREG
>@@ -99,7 +99,7 @@ SYM_FUNC_START(__memmove)
> slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */
> sub a5, a1, t3 /* Find the difference between src and dest */
> andi a1, a1, -SZREG /* Align the src pointer */
>- addi a2, t6, SZREG /* The other breakpoint for the unrolled loop*/
>+ addi a2, a3, SZREG /* The other breakpoint for the unrolled loop*/
>
> /*
> * Compute The Inverse Shift
>@@ -147,9 +147,9 @@ SYM_FUNC_START(__memmove)
> or t2, t1, t2
> REG_S t2, ((1 * SZREG) - (2 * SZREG))(t3)
>
>- bne t3, t6, 1b
>+ bne t3, a3, 1b
> 2:
>- mv t3, t6 /* Fix the dest pointer in case the loop was broken */
>+ mv t3, a3 /* Fix the dest pointer in case the loop was broken */
>
> add a1, t3, a5 /* Restore the src pointer */
> j .Lbyte_copy_forward /* Copy any remaining bytes */
>@@ -232,7 +232,7 @@ SYM_FUNC_START(__memmove)
> addi a1, a1, SZREG
> addi t3, t3, SZREG
> REG_S t1, (-1 * SZREG)(t3)
>- bne t3, t6, 1b
>+ bne t3, a3, 1b
>
> j .Lbyte_copy_forward /* Copy any remaining bytes */
>
>@@ -269,13 +269,13 @@ SYM_FUNC_START(__memmove)
> jalr zero, 0x0(t0) /* Return to multibyte copy loop */
>
> .Lbyte_copy_until_aligned_reverse:
>- beq t4, t6, 2f
>+ beq t4, a3, 2f
> 1:
> lb t1, -1(a4)
> addi a4, a4, -1
> addi t4, t4, -1
> sb t1, 0(t4)
>- bne t4, t6, 1b
>+ bne t4, a3, 1b
> 2:
> jalr zero, 0x0(t0) /* Return to multibyte copy loop */
>
>diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S
>index 062000c468c8..536d8ba8caa1 100644
>--- a/arch/riscv/lib/strncmp.S
>+++ b/arch/riscv/lib/strncmp.S
>@@ -63,7 +63,7 @@ strncmp_zbb:
> * a2 - number of characters to compare
> *
> * Clobbers
>- * t0, t1, t2, t3, t4, t5, t6
>+ * t0, t1, t2, t3, t4, t5
> */
>
> or t2, a0, a1
>@@ -73,12 +73,12 @@ strncmp_zbb:
> bnez t2, 3f
>
> /* Adjust limit for fast-path. */
>- andi t6, t4, -SZREG
>+ andi t0, t4, -SZREG
>
> /* Main loop for aligned string. */
> .p2align 3
> 1:
>- bge a0, t6, 3f
>+ bge a0, t0, 3f
> REG_L t0, 0(a0)
> REG_L t1, 0(a1)
> orc.b t3, t0
>diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
>index 4efea1b3326c..25fdedc0efdb 100644
>--- a/arch/riscv/lib/uaccess.S
>+++ b/arch/riscv/lib/uaccess.S
>@@ -27,16 +27,16 @@ EXPORT_SYMBOL(__asm_copy_from_user)
>
> SYM_FUNC_START(fallback_scalar_usercopy)
> /* Enable access to user memory */
>- li t6, SR_SUM
>- csrs CSR_STATUS, t6
>- mv t6, ra
>+ li a7, SR_SUM
>+ csrs CSR_STATUS, a7
>+ mv a7, ra
>
> call fallback_scalar_usercopy_sum_enabled
>
> /* Disable access to user memory */
>- mv ra, t6
>- li t6, SR_SUM
>- csrc CSR_STATUS, t6
>+ mv ra, a7
>+ li a7, SR_SUM
>+ csrc CSR_STATUS, a7
> ret
> SYM_FUNC_END(fallback_scalar_usercopy)
>
>@@ -117,7 +117,7 @@ SYM_FUNC_START(fallback_scalar_usercopy_sum_enabled)
> fixup REG_L a4, 0(a1), 10f
> fixup REG_L a5, SZREG(a1), 10f
> fixup REG_L a6, 2*SZREG(a1), 10f
>- fixup REG_L a7, 3*SZREG(a1), 10f
>+ fixup REG_L a2, 3*SZREG(a1), 10f
> fixup REG_L t1, 4*SZREG(a1), 10f
> fixup REG_L t2, 5*SZREG(a1), 10f
> fixup REG_L t3, 6*SZREG(a1), 10f
>@@ -125,7 +125,7 @@ SYM_FUNC_START(fallback_scalar_usercopy_sum_enabled)
> fixup REG_S a4, 0(a0), 10f
> fixup REG_S a5, SZREG(a0), 10f
> fixup REG_S a6, 2*SZREG(a0), 10f
>- fixup REG_S a7, 3*SZREG(a0), 10f
>+ fixup REG_S a2, 3*SZREG(a0), 10f
> fixup REG_S t1, 4*SZREG(a0), 10f
> fixup REG_S t2, 5*SZREG(a0), 10f
> fixup REG_S t3, 6*SZREG(a0), 10f
>@@ -217,8 +217,8 @@ SYM_FUNC_END(fallback_scalar_usercopy_sum_enabled)
> SYM_FUNC_START(__clear_user)
>
> /* Enable access to user memory */
>- li t6, SR_SUM
>- csrs CSR_STATUS, t6
>+ li t5, SR_SUM
>+ csrs CSR_STATUS, t5
>
> add a3, a0, a1
> addi t0, a0, SZREG-1
>@@ -240,7 +240,7 @@ SYM_FUNC_START(__clear_user)
>
> 3:
> /* Disable access to user memory */
>- csrc CSR_STATUS, t6
>+ csrc CSR_STATUS, t5
> li a0, 0
> ret
> 4: /* Edge case: unalignment */
>@@ -257,7 +257,7 @@ SYM_FUNC_START(__clear_user)
> /* Exception fixup code */
> 11:
> /* Disable access to user memory */
>- csrc CSR_STATUS, t6
>+ csrc CSR_STATUS, t5
> sub a0, a3, a0
> ret
> SYM_FUNC_END(__clear_user)
>diff --git a/arch/riscv/lib/uaccess_vector.S b/arch/riscv/lib/uaccess_vector.S
>index 03b5560609a2..401fde708c77 100644
>--- a/arch/riscv/lib/uaccess_vector.S
>+++ b/arch/riscv/lib/uaccess_vector.S
>@@ -22,16 +22,16 @@
>
> SYM_FUNC_START(__asm_vector_usercopy)
> /* Enable access to user memory */
>- li t6, SR_SUM
>- csrs CSR_STATUS, t6
>- mv t6, ra
>+ li a7, SR_SUM
>+ csrs CSR_STATUS, a7
>+ mv a7, ra
>
> call __asm_vector_usercopy_sum_enabled
>
> /* Disable access to user memory */
>- mv ra, t6
>- li t6, SR_SUM
>- csrc CSR_STATUS, t6
>+ mv ra, a7
>+ li a7, SR_SUM
>+ csrc CSR_STATUS, a7
> ret
> SYM_FUNC_END(__asm_vector_usercopy)
>
>--
>2.43.0
>
>
More information about the linux-riscv
mailing list