[PATCH v2 8/8] [NOT-FOR-UPSTREAM] Test program for misaligned load/store
Anup Patel
apatel at ventanamicro.com
Tue Jun 16 01:23:49 PDT 2026
On Fri, Jun 5, 2026 at 8:43 PM Bo Gan <ganboing at gmail.com> wrote:
>
> Build: gcc -o test-misaligned-ldst test-misaligned-ldst.c ldst.S
>
> Failure observed before the fix on QEMU with 64-bit Linux and 32-bit test
>
> ...
> loadfp 64, insn cfldsp, n=64, off=1, cmp=788796a5b4c3d2e1
> loadfp 64, insn cfldsp, n=64, off=2, cmp=69788796a5b4c3d2
> loadfp 64, insn cfldsp, n=64, off=3, cmp=5a69788796a5b4c3
> loadfp 64, insn cfldsp, n=64, off=4, cmp=4b5a69788796a5b4
> loadfp 64, insn cfldsp, n=64, off=5, cmp=3c4b5a69788796a5
> loadfp 64, insn cfldsp, n=64, off=6, cmp=2d3c4b5a69788796
> loadfp 64, insn cfldsp, n=64, off=7, cmp=1e2d3c4b5a697887
> loadfp 32, insn cflw, n=32, off=1, cmp=ffffffffb4c3d2e1
> ./test32: failed at load_f 1 4 0: 0
>
> With the patch series:
>
> ...
> loadfp 64, insn cfldsp, n=64, off=1, cmp=788796a5b4c3d2e1
> loadfp 64, insn cfldsp, n=64, off=2, cmp=69788796a5b4c3d2
> loadfp 64, insn cfldsp, n=64, off=3, cmp=5a69788796a5b4c3
> loadfp 64, insn cfldsp, n=64, off=4, cmp=4b5a69788796a5b4
> loadfp 64, insn cfldsp, n=64, off=5, cmp=3c4b5a69788796a5
> loadfp 64, insn cfldsp, n=64, off=6, cmp=2d3c4b5a69788796
> loadfp 64, insn cfldsp, n=64, off=7, cmp=1e2d3c4b5a697887
> loadfp 32, insn cflw, n=32, off=1, cmp=ffffffffb4c3d2e1
> loadfp 32, insn cflw, n=32, off=2, cmp=ffffffffa5b4c3d2
> loadfp 32, insn cflw, n=32, off=3, cmp=ffffffff96a5b4c3
> loadfp 32, insn cflw, n=32, off=5, cmp=ffffffff788796a5
> loadfp 32, insn cflw, n=32, off=6, cmp=ffffffff69788796
> loadfp 32, insn cflw, n=32, off=7, cmp=ffffffff5a697887
> loadfp 32, insn cflw, n=32, off=9, cmp=ffffffff3c4b5a69
> loadfp 32, insn cflw, n=32, off=10, cmp=ffffffff2d3c4b5a
> loadfp 32, insn cflw, n=32, off=11, cmp=ffffffff1e2d3c4b
> loadfp 32, insn cflwsp, n=64, off=1, cmp=ffffffffb4c3d2e1
> loadfp 32, insn cflwsp, n=64, off=2, cmp=ffffffffa5b4c3d2
> loadfp 32, insn cflwsp, n=64, off=3, cmp=ffffffff96a5b4c3
> loadfp 32, insn cflwsp, n=64, off=5, cmp=ffffffff788796a5
> loadfp 32, insn cflwsp, n=64, off=6, cmp=ffffffff69788796
> loadfp 32, insn cflwsp, n=64, off=7, cmp=ffffffff5a697887
> loadfp 32, insn cflwsp, n=64, off=9, cmp=ffffffff3c4b5a69
> loadfp 32, insn cflwsp, n=64, off=10, cmp=ffffffff2d3c4b5a
> loadfp 32, insn cflwsp, n=64, off=11, cmp=ffffffff1e2d3c4b
>
> <no failure>
> ---
> tests/ldst.S | 134 +++++++++++++++++++++++++++
> tests/ldst.h | 170 +++++++++++++++++++++++++++++++++++
> tests/test-misaligned-ldst.c | 154 +++++++++++++++++++++++++++++++
> 3 files changed, 458 insertions(+)
> create mode 100644 tests/ldst.S
> create mode 100644 tests/ldst.h
> create mode 100644 tests/test-misaligned-ldst.c
Instead of test-misaligned-ldst being user-space test, I
suggest adding this as another payload bare-metal app
under firmware/payloads and we can use FW_PAYLOAD_PATH
to select this bare-metal app instead of default test.bin.
Regards,
Anup
>
> diff --git a/tests/ldst.S b/tests/ldst.S
> new file mode 100644
> index 00000000..66f88e42
> --- /dev/null
> +++ b/tests/ldst.S
> @@ -0,0 +1,134 @@
> +.altmacro
> +
> +.macro mem_repeat name:req, ldst:req, op:req, rx:req, ry:req, len:req, signext=0, step=1, n=4096, max=2048
> +.globl mem_\name
> +.type mem_\name, @function
> +mem_\name\():
> +.set i, 0
> +.rept \n
> +.set j, \max - \n * \step + i * \step
> +1:
> + mem_\ldst \op \rx \ry j
> + ret
> +.ifne . - mem_\name - (. - 1b) * (i + 1)
> +.error "mem_\name is not aligned"
> +.endif
> +.set i, i + 1
> +.endr
> +.size mem_\name, . - mem_\name
> +
> +.align 3
> +.globl mem_\name\()_desc
> +.type mem_\name\()_desc @object
> +mem_\name\()_desc:
> + .byte \len, \signext, \step, (. - mem_\name) / \n
> + .half \max - \n * \step, \n
> +#if __riscv_xlen == 32
> + .word mem_\name
> +#elif __riscv_xlen == 64
> + .dword mem_\name
> +#endif
> +.size mem_\name\()_desc, . - mem_\name\()_desc
> +.endm
> +
> +.macro mem_load op rs rd imm
> + mv \rs, a0
> + \op \rd, \imm(\rs)
> + mv a0, \rd
> +.endm
> +
> +.macro mem_fpld op rs rd imm
> + mv \rs, a0
> + \op \rd, \imm(\rs)
> + fmv.d fa0, \rd
> +.endm
> +
> +.macro mem_store op rs1 rs2 imm
> + mv \rs2, a1
> + mv \rs1, a0
> + \op \rs2, \imm(\rs1)
> +.endm
> +
> +.macro mem_fpst op rs1 rs2 imm
> + fmv.d \rs2, fa0
> + mv \rs1, a0
> + \op \rs2, \imm(\rs1)
> +.endm
> +
> +.macro mem_ldsp op tmp rd imm
> + mv \tmp, sp
> + mv sp, a0
> + \op \rd, \imm(sp)
> + mv sp, \tmp
> + mv a0, \rd
> +.endm
> +
> +.macro mem_fpldsp op tmp rd imm
> + mv \tmp, sp
> + mv sp, a0
> + \op \rd, \imm(sp)
> + mv sp, \tmp
> + fmv.d fa0, \rd
> +.endm
> +
> +.macro mem_stsp op tmp rs2 imm
> + mv \tmp, sp
> + mv sp, a0
> + mv \rs2, a1
> + \op \rs2, \imm(sp)
> + mv sp, \tmp
> +.endm
> +
> +.macro mem_fpstsp op tmp rs2 imm
> + mv \tmp, sp
> + mv sp, a0
> + fmv.d \rs2, fa0
> + \op \rs2, \imm(sp)
> + mv sp, \tmp
> +.endm
> +
> +mem_repeat lb load lb t0 a1 1 1
> +mem_repeat lbu load lbu t1 a2 1
> +mem_repeat sb store sb t2 a3 1
> +mem_repeat lh load lh t4 a5 2 1
> +mem_repeat lhu load lhu t5 a6 2
> +mem_repeat sh store sh t6 a7 2
> +mem_repeat lw load lw a7 t6 4 1
> +#if __riscv_xlen == 64
> +mem_repeat lwu load lwu a6 t5 4
> +#endif
> +mem_repeat sw store sw a5 t4 4
> +#if __riscv_xlen == 64
> +mem_repeat ld load ld a4 t3 8 1
> +mem_repeat sd store sd a3 t2 8
> +#endif
> +mem_repeat flw fpld flw t3 ft8 4
> +mem_repeat fsw fpst fsw t4 ft9 4
> +mem_repeat fld fpld fld t5 ft10 8
> +mem_repeat fsd fpst fsd t6 ft11 8
> +#ifdef __riscv_zcb
> +mem_repeat clbu load c.lbu a5 a1 1 0 1 4 4
> +mem_repeat csb store c.sb a4 a2 1 0 1 4 4
> +mem_repeat clh load c.lh a3 a3 2 1 2 2 4
> +mem_repeat clhu load c.lhu a2 a4 2 0 2 2 4
> +mem_repeat csh store c.sh a1 a5 2 0 2 2 4
> +#endif
> +mem_repeat clw load c.lw a5 a1 4 1 4 32 128
> +mem_repeat csw store c.sw a4 a2 4 0 4 32 128
> +mem_repeat clwsp ldsp lw t0 t6 4 1 4 64 256
> +mem_repeat cswsp stsp sw t1 t5 4 0 4 64 256
> +mem_repeat cfld fpld c.fld a3 fa2 8 0 8 32 256
> +mem_repeat cfsd fpst c.fsd a2 fa3 8 0 8 32 256
> +mem_repeat cfldsp fpldsp fld t2 ft10 8 0 8 64 512
> +mem_repeat cfsdsp fpstsp fsd t3 ft11 8 0 8 64 512
> +#if __riscv_xlen == 32
> +mem_repeat cflw fpld c.flw a5 fa4 4 0 4 32 128
> +mem_repeat cfsw fpst c.fsw a4 fa5 4 0 4 32 128
> +mem_repeat cflwsp fpldsp flw t3 ft11 4 0 4 64 256
> +mem_repeat cfswsp fpstsp fsw t4 ft10 4 0 4 64 256
> +#elif __riscv_xlen == 64
> +mem_repeat cld load c.ld a5 a4 8 1 8 32 256
> +mem_repeat csd store c.sd a4 a5 8 0 8 32 256
> +mem_repeat cldsp ldsp ld t3 t6 8 1 8 64 512
> +mem_repeat csdsp stsp sd t4 t5 8 0 8 64 512
> +#endif
> diff --git a/tests/ldst.h b/tests/ldst.h
> new file mode 100644
> index 00000000..3d4b6062
> --- /dev/null
> +++ b/tests/ldst.h
> @@ -0,0 +1,170 @@
> +#include <inttypes.h>
> +
> +typedef struct {
> + uint8_t len;
> + uint8_t signext;
> + uint8_t imm_step;
> + uint8_t isize;
> + int16_t imm_base;
> + uint16_t n;
> + void *fp;
> +} mem_op_desc;
> +
> +typedef union {
> + unsigned long u_long;
> + long i_long;
> + uint32_t u32[2];
> + int32_t i32[2];
> + uint64_t u64;
> + int64_t i64;
> + float f32[2];
> + double f64;
> + uint8_t bytes[8];
> +} ldst_val;
> +
> +extern mem_op_desc mem_lb_desc;
> +extern mem_op_desc mem_lbu_desc;
> +extern mem_op_desc mem_sb_desc;
> +extern mem_op_desc mem_lh_desc;
> +extern mem_op_desc mem_lhu_desc;
> +extern mem_op_desc mem_sh_desc;
> +extern mem_op_desc mem_lw_desc;
> +extern mem_op_desc mem_lwu_desc;
> +extern mem_op_desc mem_sw_desc;
> +extern mem_op_desc mem_ld_desc;
> +extern mem_op_desc mem_sd_desc;
> +extern mem_op_desc mem_flw_desc;
> +extern mem_op_desc mem_fsw_desc;
> +extern mem_op_desc mem_fld_desc;
> +extern mem_op_desc mem_fsd_desc;
> +extern mem_op_desc mem_clbu_desc;
> +extern mem_op_desc mem_csb_desc;
> +extern mem_op_desc mem_clh_desc;
> +extern mem_op_desc mem_clhu_desc;
> +extern mem_op_desc mem_csh_desc;
> +extern mem_op_desc mem_clw_desc;
> +extern mem_op_desc mem_csw_desc;
> +extern mem_op_desc mem_clwsp_desc;
> +extern mem_op_desc mem_cswsp_desc;
> +extern mem_op_desc mem_cfld_desc;
> +extern mem_op_desc mem_cfsd_desc;
> +extern mem_op_desc mem_cfldsp_desc;
> +extern mem_op_desc mem_cfsdsp_desc;
> +extern mem_op_desc mem_cflw_desc;
> +extern mem_op_desc mem_cfsw_desc;
> +extern mem_op_desc mem_cflwsp_desc;
> +extern mem_op_desc mem_cfswsp_desc;
> +extern mem_op_desc mem_cld_desc;
> +extern mem_op_desc mem_csd_desc;
> +extern mem_op_desc mem_cldsp_desc;
> +extern mem_op_desc mem_csdsp_desc;
> +
> +typedef struct ldst_func {
> + const char *name;
> + const mem_op_desc *desc;
> +} ldst_func;
> +
> +#define DEF_LDST_FUNC(x) { #x, &mem_ ## x ## _desc }
> +
> +static const ldst_func load_funcs[] = {
> + DEF_LDST_FUNC(lb),
> + DEF_LDST_FUNC(lbu),
> + DEF_LDST_FUNC(lh),
> + DEF_LDST_FUNC(lhu),
> + DEF_LDST_FUNC(lw),
> +#ifdef __riscv_zcb
> + DEF_LDST_FUNC(clbu),
> + DEF_LDST_FUNC(clh),
> + DEF_LDST_FUNC(clhu),
> +#endif
> + DEF_LDST_FUNC(clw),
> + DEF_LDST_FUNC(clwsp),
> +#if __riscv_xlen == 64
> + DEF_LDST_FUNC(lwu),
> + DEF_LDST_FUNC(ld),
> + DEF_LDST_FUNC(cld),
> + DEF_LDST_FUNC(cldsp),
> +#endif
> +};
> +
> +static const ldst_func store_funcs[] = {
> + DEF_LDST_FUNC(sb),
> + DEF_LDST_FUNC(sh),
> + DEF_LDST_FUNC(sw),
> +#ifdef __riscv_zcb
> + DEF_LDST_FUNC(csb),
> + DEF_LDST_FUNC(csh),
> +#endif
> + DEF_LDST_FUNC(csw),
> + DEF_LDST_FUNC(cswsp),
> +#if __riscv_xlen == 64
> + DEF_LDST_FUNC(sd),
> + DEF_LDST_FUNC(csd),
> + DEF_LDST_FUNC(csdsp),
> +#endif
> +};
> +
> +static const ldst_func loadfp_funcs[] = {
> +#if __riscv_xlen == 32
> + DEF_LDST_FUNC(cflw),
> + DEF_LDST_FUNC(cflwsp),
> +#endif
> + DEF_LDST_FUNC(flw),
> + DEF_LDST_FUNC(fld),
> + DEF_LDST_FUNC(cfld),
> + DEF_LDST_FUNC(cfldsp),
> +};
> +
> +static const ldst_func storefp_funcs[] = {
> +#if __riscv_xlen == 32
> + DEF_LDST_FUNC(cfsw),
> + DEF_LDST_FUNC(cfswsp),
> +#endif
> + DEF_LDST_FUNC(fsw),
> + DEF_LDST_FUNC(fsd),
> + DEF_LDST_FUNC(cfsd),
> + DEF_LDST_FUNC(cfsdsp),
> +};
> +
> +static inline unsigned long load_i(const void *p, unsigned func, unsigned sel)
> +{
> + typedef unsigned long (*func_i)(const void *p);
> + const mem_op_desc *desc = load_funcs[func].desc;
> +
> + long imm = (long)desc->imm_base + desc->imm_step * sel;
> + func_i f = desc->fp + desc->isize * sel;
> +
> + return f(p - imm);
> +}
> +
> +static inline void store_i(void *p, unsigned long val, unsigned func, unsigned sel)
> +{
> + typedef void (*func_i)(void *p, unsigned long val);
> + const mem_op_desc *desc = store_funcs[func].desc;
> +
> + long imm = (long)desc->imm_base + desc->imm_step * sel;
> + func_i f = desc->fp + desc->isize * sel;
> + f(p - imm, val);
> +}
> +
> +static inline double load_f(const void *p, unsigned func, unsigned sel)
> +{
> + typedef double (*func_f)(const void *p);
> + const mem_op_desc *desc = loadfp_funcs[func].desc;
> +
> + long imm = (long)desc->imm_base + desc->imm_step * sel;
> + func_f f = desc->fp + desc->isize * sel;
> +
> + return f(p - imm);
> +}
> +
> +static inline void store_f(void *p, double val, unsigned func, unsigned sel)
> +{
> + typedef void (*func_f)(void *p, double val);
> + const mem_op_desc *desc = storefp_funcs[func].desc;
> +
> + long imm = (long)desc->imm_base + desc->imm_step * sel;
> + func_f f = desc->fp + desc->isize * sel;
> +
> + f(p - imm, val);
> +}
> diff --git a/tests/test-misaligned-ldst.c b/tests/test-misaligned-ldst.c
> new file mode 100644
> index 00000000..ccbd0d36
> --- /dev/null
> +++ b/tests/test-misaligned-ldst.c
> @@ -0,0 +1,154 @@
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <stdint.h>
> +#include <error.h>
> +#include "ldst.h"
> +
> +#define ARR_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
> +
> +static const uint8_t patt[16] = {
> + 0xf0, 0xe1, 0xd2, 0xc3, 0xb4, 0xa5, 0x96, 0x87,
> + 0x78, 0x69, 0x5a, 0x4b, 0x3c, 0x2d, 0x1e, 0x0f,
> +};
> +
> +static const uint64_t load_val[16] = {
> + 0x8796a5b4c3d2e1f0ULL,
> + 0x788796a5b4c3d2e1ULL,
> + 0x69788796a5b4c3d2ULL,
> + 0x5a69788796a5b4c3ULL,
> + 0x4b5a69788796a5b4ULL,
> + 0x3c4b5a69788796a5ULL,
> + 0x2d3c4b5a69788796ULL,
> + 0x1e2d3c4b5a697887ULL,
> + 0x0f1e2d3c4b5a6978ULL,
> + 0x0f1e2d3c4b5a69ULL,
> + 0x0f1e2d3c4b5aULL,
> + 0x0f1e2d3c4bULL,
> + 0x0f1e2d3cULL,
> + 0x0f1e2dULL,
> + 0x0f1eULL,
> + 0x0fULL,
> +};
> +
> +int main()
> +{
> + unsigned i, j, k;
> + //ldst_val val;
> +
> + // Test int read
> + for (j = 0; j < ARR_SIZE(load_funcs); ++j) {
> + const mem_op_desc *desc = load_funcs[j].desc;
> + unsigned shift = 8 * (sizeof(long) - desc->len);
> +
> + for (i = 0; i < ARR_SIZE(patt) - desc->len; ++i) {
> + unsigned long expected;
> + if (desc->len != 1 && i % desc->len == 0)
> + continue;
> +
> + expected = load_val[i];
> + if (desc->signext)
> + expected = (long)(expected << shift) >> shift;
> + else
> + expected = (expected << shift) >> shift;
> +
> + printf("load %c%u, insn %s, n=%u, off=%u, cmp=%lx\n",
> + desc->signext ? 'i' : 'u',
> + desc->len * 8, load_funcs[j].name, desc->n, i, expected);
> + fflush(stdout);
> + for (k = 0; k < desc->n; ++k) {
> + unsigned long read = load_i(&patt[i], j, k);
> + if (read != expected)
> + error(1, 0, "failed at load_i %u %u %u: %lx",
> + i, j, k, read);
> + }
> + }
> + }
> + // Test fp load
> + for (j = 0; j < ARR_SIZE(loadfp_funcs); ++j) {
> + const mem_op_desc *desc = loadfp_funcs[j].desc;
> +
> + for (i = 0; i < ARR_SIZE(patt) - desc->len; ++i) {
> + ldst_val expected, read;
> +
> + expected.u64 = load_val[i];
> + if (desc->len == 4) // float
> + expected.i32[1] = -1;
> +
> + if (i % desc->len == 0)
> + continue;
> +
> + printf("loadfp %u, insn %s, n=%u, off=%u, cmp=%" PRIx64 "\n",
> + desc->len * 8, loadfp_funcs[j].name, desc->n, i, expected.u64);
> + fflush(stdout);
> + for (k = 0; k < desc->n; ++k) {
> + read.f64 = load_f(&patt[i], j, k);
> + if (read.u64 != expected.u64)
> + error(1, 0, "failed at load_f %u %u %u: %" PRIx64,
> + i, j, k, read.u64);
> + }
> + }
> + }
> + // Test int store
> + for (j = 0; j < ARR_SIZE(store_funcs); ++j) {
> + const mem_op_desc *desc = store_funcs[j].desc;
> +
> + for (i = 0; i < ARR_SIZE(patt) - desc->len; ++i) {
> +#pragma GCC diagnostic push
> +#pragma GCC diagnostic ignored "-Woverflow"
> + ldst_val val = { 0x8899aabbccddeeffULL };
> +#pragma GCC diagnostic pop
> + unsigned end = i + desc->len;
> +
> + if (desc->len != 1 && i % desc->len == 0)
> + continue;
> +
> + memcpy(val.bytes, &patt[i], desc->len);
> + printf("store %u, insn %s, n=%u, off=%u, src=%lx\n",
> + desc->len * 8, store_funcs[j].name, desc->n, i, val.u_long);
> + fflush(stdout);
> +
> + for (k = 0; k < desc->n; ++k) {
> + uint8_t buff[ARR_SIZE(patt)] = {};
> +
> + memcpy(buff, patt, i);
> + memcpy(&buff[end], &patt[end], ARR_SIZE(patt) - end);
> + store_i(&buff[i], val.u_long, j, k);
> + if (memcmp(buff, patt, sizeof(buff)))
> + error(1, 0, "faild at store_i %u %u %u", i, j, k);
> + }
> +
> + }
> + }
> + // Test fp store
> + for (j = 0; j < ARR_SIZE(storefp_funcs); ++j) {
> + const mem_op_desc *desc = storefp_funcs[j].desc;
> +
> + for (i = 0; i < ARR_SIZE(patt) - desc->len; ++i) {
> + ldst_val val;
> + unsigned end = i + desc->len;
> +
> + val.u32[0] = 0xccddeeffUL;
> + val.u32[1] = 0x8899aabbUL;
> +
> + if (i % desc->len == 0)
> + continue;
> +
> + memcpy(val.bytes, &patt[i], desc->len);
> + printf("storefp %u, insn %s, n=%u, off=%u, src=%" PRIx64 "\n",
> + desc->len * 8, storefp_funcs[j].name, desc->n, i, val.u64);
> + fflush(stdout);
> +
> + for (k = 0; k < desc->n; ++k) {
> + uint8_t buff[ARR_SIZE(patt)] = {};
> +
> + memcpy(buff, patt, i);
> + memcpy(&buff[end], &patt[end], ARR_SIZE(patt) - end);
> + store_f(&buff[i], val.f64, j, k);
> + if (memcmp(buff, patt, sizeof(buff)))
> + error(1, 0, "faild at store_f %u %u %u", i, j, k);
> + }
> + }
> + }
> + return 0;
> +}
> --
> 2.34.1
>
>
> --
> opensbi mailing list
> opensbi at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/opensbi
More information about the opensbi
mailing list