[PATCH 1/1] lib: sbi_misaligned_ldst: Add handling of vector load/store

Clément Léger cleger at rivosinc.com
Thu Dec 5 03:42:13 PST 2024



On 03/12/2024 16:28, Greentime Hu wrote:
> Clément Léger <cleger at rivosinc.com> 於 2024年12月3日 週二 下午4:22寫道:
>>
>>
>>
>> On 03/12/2024 07:02, Nylon Chen wrote:
>>> Clément Léger <cleger at rivosinc.com> 於 2024年11月29日 週五 下午10:50寫道:
>>>>
>>>> Hey Nylon,
>>>>
>>>> Do you plan to backport this patch to Linux as well ?
>>> Hi Clément,
>>>
>>> Are you asking about the RISC-V Non-MMU related changes in this patch?
>>
>> Hi Nylon,
>>
>> I meant the whole series (ie handling misaligned vector in kernel).
>>
>> Thanks !
>>
> Hi Clément,
> 
> I guest Nylon is trying to ask should we delegate the unaligned access
> exception types to S-mode because we are doing it in M-mode and that
> is transparent to S-mode. Or we just port it to the M-mode
> Linux(non-mmu case).

Hi,

Linux is already able to emulate misaligned access by itself in S-mode !
As well as in M-mode. The firmware feature SBI 3.0 extension allows
S-mode to request misaligned exception delegation to S-mode. The
rationale is that's it's better to have a known behavior in kernel and
be able to catch any emulated misaligned access to improve application
performances rather than transparently emulating then. This also allows
not to rely on potential unknown SBI behavior (some might not implement
misaligned accesses emulation).

So my question was really about porting vector misaligned access
emulation code to S-mode :)

Thanks !

Clément

> 
>>>
>>> Thanks
>>>
>>> Nylon
>>>>
>>>> Thanks !
>>>>
>>>> Clément
>>>>
>>>> On 16/10/2024 12:35, Nylon Chen wrote:
>>>>> Add exception handling vector instructions from
>>>>>
>>>>> the vector extension to the sbi_misaligned_ldst library.
>>>>>
>>>>> This implementation references the misaligned_vec_ldst
>>>>> implementation in the riscv-pk project.
>>>>>
>>>>> Co-developed-by: Zong Li <zong.li at sifive.com>
>>>>> Signed-off-by: Zong Li <zong.li at sifive.com>
>>>>> Signed-off-by: Nylon Chen <nylon.chen at sifive.com>
>>>>> Reviewed-by: Andy Chiu <andy.chiu at sifive.com>
>>>>> Reviewed-by: Anup Patel <anup at brainfault.org>
>>>>> ---
>>>>>  Makefile                     |  15 +-
>>>>>  include/sbi/riscv_encoding.h | 385 ++++++++++++++++++++++++++++++++++-
>>>>>  include/sbi/sbi_trap_ldst.h  |   9 +
>>>>>  lib/sbi/Kconfig              |   4 +
>>>>>  lib/sbi/objects.mk           |   1 +
>>>>>  lib/sbi/sbi_trap_ldst.c      |  23 ++-
>>>>>  lib/sbi/sbi_trap_v_ldst.c    | 341 +++++++++++++++++++++++++++++++
>>>>>  7 files changed, 766 insertions(+), 12 deletions(-)
>>>>>  create mode 100644 lib/sbi/sbi_trap_v_ldst.c
>>>>>
>>>>> diff --git a/Makefile b/Makefile
>>>>> index f0012f6..20cae08 100644
>>>>> --- a/Makefile
>>>>> +++ b/Makefile
>>>>> @@ -189,6 +189,9 @@ CC_SUPPORT_STRICT_ALIGN := $(shell $(CC) $(CLANG_TARGET) $(RELAX_FLAG) -nostdlib
>>>>>  # Check whether the assembler and the compiler support the Zicsr and Zifencei extensions
>>>>>  CC_SUPPORT_ZICSR_ZIFENCEI := $(shell $(CC) $(CLANG_TARGET) $(RELAX_FLAG) -nostdlib -march=rv$(OPENSBI_CC_XLEN)imafd_zicsr_zifencei -x c /dev/null -o /dev/null 2>&1 | grep "zicsr\|zifencei" > /dev/null && echo n || echo y)
>>>>>
>>>>> +# Check whether the assembler and the compiler support the Vector extension
>>>>> +CC_SUPPORT_VECT := $(shell echo | $(CC) -dM -E -march=rv$(OPENSBI_CC_XLEN)gv - | grep -q riscv.*vector && echo y || echo n)
>>>>> +
>>>>>  ifneq ($(OPENSBI_LD_PIE),y)
>>>>>  $(error Your linker does not support creating PIEs, opensbi requires this.)
>>>>>  endif
>>>>> @@ -294,10 +297,16 @@ ifndef PLATFORM_RISCV_ABI
>>>>>  endif
>>>>>  ifndef PLATFORM_RISCV_ISA
>>>>>    ifneq ($(PLATFORM_RISCV_TOOLCHAIN_DEFAULT), 1)
>>>>> -    ifeq ($(CC_SUPPORT_ZICSR_ZIFENCEI), y)
>>>>> -      PLATFORM_RISCV_ISA = rv$(PLATFORM_RISCV_XLEN)imafdc_zicsr_zifencei
>>>>> +    PLATFORM_RISCV_ISA := rv$(PLATFORM_RISCV_XLEN)imafdc
>>>>> +ifdef CONFIG_SUPPORT_VECTOR
>>>>> +    ifeq ($(CC_SUPPORT_VECT), y)
>>>>> +      PLATFORM_RISCV_ISA := $(PLATFORM_RISCV_ISA)v
>>>>>      else
>>>>> -      PLATFORM_RISCV_ISA = rv$(PLATFORM_RISCV_XLEN)imafdc
>>>>> +      $(warning Vector extension requested but not supported by the compiler)
>>>>> +    endif
>>>>> +endif
>>>>> +    ifeq ($(CC_SUPPORT_ZICSR_ZIFENCEI), y)
>>>>> +      PLATFORM_RISCV_ISA := $(PLATFORM_RISCV_ISA)_zicsr_zifencei
>>>>>      endif
>>>>>    else
>>>>>      PLATFORM_RISCV_ISA = $(OPENSBI_CC_ISA)
>>>>> diff --git a/include/sbi/riscv_encoding.h b/include/sbi/riscv_encoding.h
>>>>> index 980abdb..bf58a8d 100644
>>>>> --- a/include/sbi/riscv_encoding.h
>>>>> +++ b/include/sbi/riscv_encoding.h
>>>>> @@ -756,6 +756,12 @@
>>>>>  #define CSR_MVIPH                    0x319
>>>>>  #define CSR_MIPH                     0x354
>>>>>
>>>>> +/* Vector extension registers */
>>>>> +#define CSR_VSTART                     0x8
>>>>> +#define CSR_VL                         0xc20
>>>>> +#define CSR_VTYPE                      0xc21
>>>>> +#define CSR_VLENB                      0xc22
>>>>> +
>>>>>  /* ===== Trap/Exception Causes ===== */
>>>>>
>>>>>  #define CAUSE_MISALIGNED_FETCH               0x0
>>>>> @@ -883,11 +889,364 @@
>>>>>  #define INSN_MASK_FENCE_TSO          0xffffffff
>>>>>  #define INSN_MATCH_FENCE_TSO         0x8330000f
>>>>>
>>>>> +#define INSN_MASK_VECTOR_UNIT_STRIDE         0xfdf0707f
>>>>> +#define INSN_MASK_VECTOR_FAULT_ONLY_FIRST    0xfdf0707f
>>>>> +#define INSN_MASK_VECTOR_STRIDE              0xfc00707f
>>>>> +#define INSN_MASK_VECTOR_WHOLE_REG           0xfff0707f
>>>>> +#define INSN_MASK_VECTOR_INDEXED             0xfc00707f
>>>>> +
>>>>> +#define INSN_MATCH_VLUXSEG(n, bits) ((((n) - 1) << 29) | 0x04000007 | \
>>>>> +             ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>>>>> +#define INSN_MATCH_VSUXSEG(n, bits) ((((n) - 1) << 29) | 0x04000027 | \
>>>>> +             ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>>>>> +#define INSN_MATCH_VLOXSEG(n, bits) ((((n) - 1) << 29) | 0x0c000007 | \
>>>>> +             ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>>>>> +#define INSN_MATCH_VSOXSEG(n, bits) ((((n) - 1) << 29) | 0x0c000027 | \
>>>>> +             ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>>>>> +#define INSN_MATCH_VLSSEG(n, bits)  ((((n) - 1) << 29) | 0x08000007 | \
>>>>> +             ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>>>>> +#define INSN_MATCH_VSSSEG(n, bits)  ((((n) - 1) << 29) | 0x08000027 | \
>>>>> +             ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>>>>> +#define INSN_MATCH_VSSEG(n, bits)   ((((n) - 1) << 29) | 0x00004027 | \
>>>>> +             ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>>>>> +#define INSN_MATCH_VLSEG(n, bits)   ((((n) - 1) << 29) | 0x00004007 | \
>>>>> +             ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>>>>> +#define INSN_MATCH_VLSEGFF(n, bits) ((((n) - 1) << 29) | 0x1000007 | \
>>>>> +             ((bits) == 16 ? 5 : (bits) == 32 ? 6 : 7) << 12)
>>>>> +
>>>>> +#define INSN_MATCH_VLE16V              0x00005007
>>>>> +#define INSN_MATCH_VLE32V              0x00006007
>>>>> +#define INSN_MATCH_VLE64V              0x00007007
>>>>> +#define INSN_MATCH_VSE16V              0x00005027
>>>>> +#define INSN_MATCH_VSE32V              0x00006027
>>>>> +#define INSN_MATCH_VSE64V              0x00007027
>>>>> +#define INSN_MATCH_VLSE16V             0x08005007
>>>>> +#define INSN_MATCH_VLSE32V             0x08006007
>>>>> +#define INSN_MATCH_VLSE64V             0x08007007
>>>>> +#define INSN_MATCH_VSSE16V             0x08005027
>>>>> +#define INSN_MATCH_VSSE32V             0x08006027
>>>>> +#define INSN_MATCH_VSSE64V             0x08007027
>>>>> +#define INSN_MATCH_VLOXEI16V           0x0c005007
>>>>> +#define INSN_MATCH_VLOXEI32V           0x0c006007
>>>>> +#define INSN_MATCH_VLOXEI64V           0x0c007007
>>>>> +#define INSN_MATCH_VSOXEI16V           0x0c005027
>>>>> +#define INSN_MATCH_VSOXEI32V           0x0c006027
>>>>> +#define INSN_MATCH_VSOXEI64V           0x0c007027
>>>>> +#define INSN_MATCH_VLUXEI16V           0x04005007
>>>>> +#define INSN_MATCH_VLUXEI32V           0x04006007
>>>>> +#define INSN_MATCH_VLUXEI64V           0x04007007
>>>>> +#define INSN_MATCH_VSUXEI16V           0x04005027
>>>>> +#define INSN_MATCH_VSUXEI32V           0x04006027
>>>>> +#define INSN_MATCH_VSUXEI64V           0x04007027
>>>>> +#define INSN_MATCH_VLE16FFV            0x01005007
>>>>> +#define INSN_MATCH_VLE32FFV            0x01006007
>>>>> +#define INSN_MATCH_VLE64FFV            0x01007007
>>>>> +#define INSN_MATCH_VL1RE8V             0x02800007
>>>>> +#define INSN_MATCH_VL1RE16V            0x02805007
>>>>> +#define INSN_MATCH_VL1RE32V            0x02806007
>>>>> +#define INSN_MATCH_VL1RE64V            0x02807007
>>>>> +#define INSN_MATCH_VL2RE8V             0x22800007
>>>>> +#define INSN_MATCH_VL2RE16V            0x22805007
>>>>> +#define INSN_MATCH_VL2RE32V            0x22806007
>>>>> +#define INSN_MATCH_VL2RE64V            0x22807007
>>>>> +#define INSN_MATCH_VL4RE8V             0x62800007
>>>>> +#define INSN_MATCH_VL4RE16V            0x62805007
>>>>> +#define INSN_MATCH_VL4RE32V            0x62806007
>>>>> +#define INSN_MATCH_VL4RE64V            0x62807007
>>>>> +#define INSN_MATCH_VL8RE8V             0xe2800007
>>>>> +#define INSN_MATCH_VL8RE16V            0xe2805007
>>>>> +#define INSN_MATCH_VL8RE32V            0xe2806007
>>>>> +#define INSN_MATCH_VL8RE64V            0xe2807007
>>>>> +#define INSN_MATCH_VS1RV               0x02800027
>>>>> +#define INSN_MATCH_VS2RV               0x22800027
>>>>> +#define INSN_MATCH_VS4RV               0x62800027
>>>>> +#define INSN_MATCH_VS8RV               0xe2800027
>>>>> +
>>>>> +#define INSN_MASK_VECTOR_LOAD_STORE  0x7f
>>>>> +#define INSN_MATCH_VECTOR_LOAD               0x07
>>>>> +#define INSN_MATCH_VECTOR_STORE              0x27
>>>>> +
>>>>> +#define IS_VECTOR_LOAD_STORE(insn) \
>>>>> +     ((((insn) & INSN_MASK_VECTOR_LOAD_STORE) == INSN_MATCH_VECTOR_LOAD) || \
>>>>> +     (((insn) & INSN_MASK_VECTOR_LOAD_STORE) == INSN_MATCH_VECTOR_STORE))
>>>>> +
>>>>> +#define IS_VECTOR_INSN_MATCH(insn, match, mask) \
>>>>> +     (((insn) & (mask)) == ((match) & (mask)))
>>>>> +
>>>>> +#define IS_UNIT_STRIDE_MATCH(insn, match) \
>>>>> +     IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_UNIT_STRIDE)
>>>>> +
>>>>> +#define IS_STRIDE_MATCH(insn, match) \
>>>>> +     IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_STRIDE)
>>>>> +
>>>>> +#define IS_INDEXED_MATCH(insn, match) \
>>>>> +     IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_INDEXED)
>>>>> +
>>>>> +#define IS_FAULT_ONLY_FIRST_MATCH(insn, match) \
>>>>> +     IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_FAULT_ONLY_FIRST)
>>>>> +
>>>>> +#define IS_WHOLE_REG_MATCH(insn, match) \
>>>>> +     IS_VECTOR_INSN_MATCH(insn, match, INSN_MASK_VECTOR_WHOLE_REG)
>>>>> +
>>>>> +#define IS_UNIT_STRIDE_LOAD(insn) ( \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLE16V) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLE32V) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLE64V) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(2, 16)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(3, 16)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(4, 16)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(5, 16)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(6, 16)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(7, 16)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(8, 16)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(2, 32)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(3, 32)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(4, 32)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(5, 32)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(6, 32)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(7, 32)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(8, 32)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(2, 64)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(3, 64)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(4, 64)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(5, 64)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(6, 64)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(7, 64)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VLSEG(8, 64)))
>>>>> +
>>>>> +#define IS_UNIT_STRIDE_STORE(insn) ( \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSE16V) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSE32V) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSE64V) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(2, 16)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(3, 16)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(4, 16)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(5, 16)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(6, 16)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(7, 16)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(8, 16)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(2, 32)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(3, 32)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(4, 32)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(5, 32)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(6, 32)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(7, 32)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(8, 32)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(2, 64)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(3, 64)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(4, 64)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(5, 64)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(6, 64)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(7, 64)) || \
>>>>> +     IS_UNIT_STRIDE_MATCH(insn, INSN_MATCH_VSSEG(8, 64)))
>>>>> +
>>>>> +#define IS_STRIDE_LOAD(insn) ( \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSE16V) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSE32V) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSE64V) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(2, 16)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(3, 16)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(4, 16)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(5, 16)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(6, 16)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(7, 16)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(8, 16)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(2, 32)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(3, 32)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(4, 32)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(5, 32)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(6, 32)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(7, 32)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(8, 32)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(2, 64)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(3, 64)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(4, 64)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(5, 64)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(6, 64)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(7, 64)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VLSSEG(8, 64)))
>>>>> +
>>>>> +#define IS_STRIDE_STORE(insn) ( \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSE16V) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSE32V) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSE64V) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(2, 16)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(3, 16)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(4, 16)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(5, 16)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(6, 16)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(7, 16)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(8, 16)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(2, 32)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(3, 32)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(4, 32)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(5, 32)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(6, 32)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(7, 32)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(8, 32)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(2, 64)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(3, 64)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(4, 64)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(5, 64)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(6, 64)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(7, 64)) || \
>>>>> +     IS_STRIDE_MATCH(insn, INSN_MATCH_VSSSEG(8, 64)))
>>>>> +
>>>>> +#define IS_INDEXED_LOAD(insn) ( \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXEI16V) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXEI32V) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXEI64V) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXEI16V) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXEI32V) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXEI64V) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(2, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(3, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(4, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(5, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(6, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(7, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(8, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(2, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(3, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(4, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(5, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(6, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(7, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(8, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(2, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(3, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(4, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(5, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(6, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(7, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLUXSEG(8, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(2, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(3, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(4, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(5, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(6, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(7, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(8, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(2, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(3, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(4, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(5, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(6, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(7, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(8, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(2, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(3, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(4, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(5, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(6, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(7, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VLOXSEG(8, 64)))
>>>>> +
>>>>> +#define IS_INDEXED_STORE(insn) ( \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXEI16V) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXEI32V) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXEI64V) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXEI16V) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXEI32V) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXEI64V) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(2, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(3, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(4, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(5, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(6, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(7, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(8, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(2, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(3, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(4, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(5, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(6, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(7, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(8, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(2, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(3, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(4, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(5, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(6, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(7, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSUXSEG(8, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(2, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(3, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(4, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(5, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(6, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(7, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(8, 16)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(2, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(3, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(4, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(5, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(6, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(7, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(8, 32)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(2, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(3, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(4, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(5, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(6, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(7, 64)) || \
>>>>> +     IS_INDEXED_MATCH(insn, INSN_MATCH_VSOXSEG(8, 64)))
>>>>> +
>>>>> +#define IS_FAULT_ONLY_FIRST_LOAD(insn) ( \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLE16FFV) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLE32FFV) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLE64FFV) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(2, 16)) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(3, 16)) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(4, 16)) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(5, 16)) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(6, 16)) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(7, 16)) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(8, 16)) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(2, 32)) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(3, 32)) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(4, 32)) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(5, 32)) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(6, 32)) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(7, 32)) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(8, 32)) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(2, 64)) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(3, 64)) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(4, 64)) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(5, 64)) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(6, 64)) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(7, 64)) || \
>>>>> +     IS_FAULT_ONLY_FIRST_MATCH(insn, INSN_MATCH_VLSEGFF(8, 64)))
>>>>> +
>>>>> +     #define IS_WHOLE_REG_LOAD(insn) ( \
>>>>> +     IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE8V) || \
>>>>> +     IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE16V) || \
>>>>> +     IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE32V) || \
>>>>> +     IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL1RE64V) || \
>>>>> +     IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE8V) || \
>>>>> +     IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE16V) || \
>>>>> +     IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE32V) || \
>>>>> +     IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL2RE64V) || \
>>>>> +     IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE8V) || \
>>>>> +     IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE16V) || \
>>>>> +     IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE32V) || \
>>>>> +     IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL4RE64V) || \
>>>>> +     IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE8V) || \
>>>>> +     IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE16V) || \
>>>>> +     IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE32V) || \
>>>>> +     IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VL8RE64V))
>>>>> +
>>>>> +#define IS_WHOLE_REG_STORE(insn) ( \
>>>>> +     IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS1RV) || \
>>>>> +     IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS2RV) || \
>>>>> +     IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS4RV) || \
>>>>> +     IS_WHOLE_REG_MATCH(insn, INSN_MATCH_VS8RV))
>>>>> +
>>>>> +
>>>>>  #if __riscv_xlen == 64
>>>>>
>>>>>  /* 64-bit read for VS-stage address translation (RV64) */
>>>>>  #define INSN_PSEUDO_VS_LOAD          0x00003000
>>>>> -
>>>>>  /* 64-bit write for VS-stage address translation (RV64) */
>>>>>  #define INSN_PSEUDO_VS_STORE 0x00003020
>>>>>
>>>>> @@ -903,6 +1262,12 @@
>>>>>  #error "Unexpected __riscv_xlen"
>>>>>  #endif
>>>>>
>>>>> +#define VM_MASK                              0x1
>>>>> +#define VIEW_MASK                            0x3
>>>>> +#define VSEW_MASK                            0x3
>>>>> +#define VLMUL_MASK                           0x7
>>>>> +#define VD_MASK                              0x1f
>>>>> +#define VS2_MASK                             0x1f
>>>>>  #define INSN_16BIT_MASK                      0x3
>>>>>  #define INSN_32BIT_MASK                      0x1c
>>>>>
>>>>> @@ -921,6 +1286,12 @@
>>>>>  #endif
>>>>>  #define REGBYTES                     (1 << LOG_REGBYTES)
>>>>>
>>>>> +#define SH_VSEW                      3
>>>>> +#define SH_VIEW                      12
>>>>> +#define SH_VD                                7
>>>>> +#define SH_VS2                               20
>>>>> +#define SH_VM                                25
>>>>> +#define SH_MEW                               28
>>>>>  #define SH_RD                                7
>>>>>  #define SH_RS1                               15
>>>>>  #define SH_RS2                               20
>>>>> @@ -974,6 +1345,18 @@
>>>>>  #define IMM_S(insn)                  (((s32)(insn) >> 25 << 5) | \
>>>>>                                        (s32)(((insn) >> 7) & 0x1f))
>>>>>
>>>>> +#define IS_MASKED(insn)              (((insn >> SH_VM) & VM_MASK) == 0)
>>>>> +#define GET_VD(insn)                 ((insn >> SH_VD) & VD_MASK)
>>>>> +#define GET_VS2(insn)                        ((insn >> SH_VS2) & VS2_MASK)
>>>>> +#define GET_VIEW(insn)                       (((insn) >> SH_VIEW) & VIEW_MASK)
>>>>> +#define GET_MEW(insn)                        (((insn) >> SH_MEW) & 1)
>>>>> +#define GET_VSEW(vtype)              (((vtype) >> SH_VSEW) & VSEW_MASK)
>>>>> +#define GET_VLMUL(vtype)             ((vtype) & VLMUL_MASK)
>>>>> +#define GET_LEN(view)                        (1UL << (view))
>>>>> +#define GET_NF(insn)                 (1 + ((insn >> 29) & 7))
>>>>> +#define GET_VEMUL(vlmul, view, vsew) ((vlmul + view - vsew) & 7)
>>>>> +#define GET_EMUL(vemul)              (1UL << ((vemul) >= 4 ? 0 : (vemul)))
>>>>> +
>>>>>  #define MASK_FUNCT3                  0x7000
>>>>>  #define MASK_RS1                     0xf8000
>>>>>  #define MASK_CSR                     0xfff00000
>>>>> diff --git a/include/sbi/sbi_trap_ldst.h b/include/sbi/sbi_trap_ldst.h
>>>>> index 8aee316..a6a6c75 100644
>>>>> --- a/include/sbi/sbi_trap_ldst.h
>>>>> +++ b/include/sbi/sbi_trap_ldst.h
>>>>> @@ -28,4 +28,13 @@ int sbi_load_access_handler(struct sbi_trap_context *tcntx);
>>>>>
>>>>>  int sbi_store_access_handler(struct sbi_trap_context *tcntx);
>>>>>
>>>>> +ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
>>>>> +                              ulong addr_offset);
>>>>> +
>>>>> +int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
>>>>> +                              struct sbi_trap_context *tcntx);
>>>>> +
>>>>> +int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
>>>>> +                              struct sbi_trap_context *tcntx);
>>>>> +
>>>>>  #endif
>>>>> diff --git a/lib/sbi/Kconfig b/lib/sbi/Kconfig
>>>>> index bd8ba2b..f4427fd 100644
>>>>> --- a/lib/sbi/Kconfig
>>>>> +++ b/lib/sbi/Kconfig
>>>>> @@ -66,4 +66,8 @@ config SBI_ECALL_SSE
>>>>>       bool "SSE extension"
>>>>>       default y
>>>>>
>>>>> +config SUPPORT_VECTOR
>>>>> +       bool "Enable vector extension"
>>>>> +       default n
>>>>> +
>>>>>  endmenu
>>>>> diff --git a/lib/sbi/objects.mk b/lib/sbi/objects.mk
>>>>> index 535aa70..9dbe218 100644
>>>>> --- a/lib/sbi/objects.mk
>>>>> +++ b/lib/sbi/objects.mk
>>>>> @@ -89,6 +89,7 @@ libsbi-objs-y += sbi_timer.o
>>>>>  libsbi-objs-y += sbi_tlb.o
>>>>>  libsbi-objs-y += sbi_trap.o
>>>>>  libsbi-objs-y += sbi_trap_ldst.o
>>>>> +libsbi-objs-y += sbi_trap_v_ldst.o
>>>>>  libsbi-objs-y += sbi_unpriv.o
>>>>>  libsbi-objs-y += sbi_expected_trap.o
>>>>>  libsbi-objs-y += sbi_cppc.o
>>>>> diff --git a/lib/sbi/sbi_trap_ldst.c b/lib/sbi/sbi_trap_ldst.c
>>>>> index f9c0241..d3a4d24 100644
>>>>> --- a/lib/sbi/sbi_trap_ldst.c
>>>>> +++ b/lib/sbi/sbi_trap_ldst.c
>>>>> @@ -32,7 +32,7 @@ typedef int (*sbi_trap_ld_emulator)(int rlen, union sbi_ldst_data *out_val,
>>>>>  typedef int (*sbi_trap_st_emulator)(int wlen, union sbi_ldst_data in_val,
>>>>>                                   struct sbi_trap_context *tcntx);
>>>>>
>>>>> -static ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
>>>>> +ulong sbi_misaligned_tinst_fixup(ulong orig_tinst, ulong new_tinst,
>>>>>                                       ulong addr_offset)
>>>>>  {
>>>>>       if (new_tinst == INSN_PSEUDO_VS_LOAD ||
>>>>> @@ -52,7 +52,7 @@ static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
>>>>>       ulong insn, insn_len;
>>>>>       union sbi_ldst_data val = { 0 };
>>>>>       struct sbi_trap_info uptrap;
>>>>> -     int rc, fp = 0, shift = 0, len = 0;
>>>>> +     int rc, fp = 0, shift = 0, len = 0, vector = 0;
>>>>>
>>>>>       if (orig_trap->tinst & 0x1) {
>>>>>               /*
>>>>> @@ -144,6 +144,9 @@ static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
>>>>>               len = 2;
>>>>>               shift = 8 * (sizeof(ulong) - len);
>>>>>               insn = RVC_RS2S(insn) << SH_RD;
>>>>> +     } else if (IS_VECTOR_LOAD_STORE(insn)) {
>>>>> +             vector = 1;
>>>>> +             emu = sbi_misaligned_v_ld_emulator;
>>>>>       } else {
>>>>>               return sbi_trap_redirect(regs, orig_trap);
>>>>>       }
>>>>> @@ -152,14 +155,16 @@ static int sbi_trap_emulate_load(struct sbi_trap_context *tcntx,
>>>>>       if (rc <= 0)
>>>>>               return rc;
>>>>>
>>>>> -     if (!fp)
>>>>> -             SET_RD(insn, regs, ((long)(val.data_ulong << shift)) >> shift);
>>>>> +     if (!vector) {
>>>>> +             if (!fp)
>>>>> +                     SET_RD(insn, regs, ((long)(val.data_ulong << shift)) >> shift);
>>>>>  #ifdef __riscv_flen
>>>>> -     else if (len == 8)
>>>>> -             SET_F64_RD(insn, regs, val.data_u64);
>>>>> -     else
>>>>> -             SET_F32_RD(insn, regs, val.data_ulong);
>>>>> +             else if (len == 8)
>>>>> +                     SET_F64_RD(insn, regs, val.data_u64);
>>>>> +             else
>>>>> +                     SET_F32_RD(insn, regs, val.data_ulong);
>>>>>  #endif
>>>>> +     }
>>>>>
>>>>>       regs->mepc += insn_len;
>>>>>
>>>>> @@ -248,6 +253,8 @@ static int sbi_trap_emulate_store(struct sbi_trap_context *tcntx,
>>>>>       } else if ((insn & INSN_MASK_C_SH) == INSN_MATCH_C_SH) {
>>>>>               len             = 2;
>>>>>               val.data_ulong = GET_RS2S(insn, regs);
>>>>> +     } else if (IS_VECTOR_LOAD_STORE(insn)) {
>>>>> +             emu = sbi_misaligned_v_st_emulator;
>>>>>       } else {
>>>>>               return sbi_trap_redirect(regs, orig_trap);
>>>>>       }
>>>>> diff --git a/lib/sbi/sbi_trap_v_ldst.c b/lib/sbi/sbi_trap_v_ldst.c
>>>>> new file mode 100644
>>>>> index 0000000..9ca16d5
>>>>> --- /dev/null
>>>>> +++ b/lib/sbi/sbi_trap_v_ldst.c
>>>>> @@ -0,0 +1,341 @@
>>>>> +/*
>>>>> + * SPDX-License-Identifier: BSD-2-Clause
>>>>> + *
>>>>> + * Copyright (c) 2024 SiFive Inc.
>>>>> + *
>>>>> + * Authors:
>>>>> + *   Andrew Waterman <andrew at sifive.com>
>>>>> + *   Nylon Chen <nylon.chen at sifive.com>
>>>>> + *   Zong Li <nylon.chen at sifive.com>
>>>>> + */
>>>>> +
>>>>> +#include <sbi/riscv_asm.h>
>>>>> +#include <sbi/riscv_encoding.h>
>>>>> +#include <sbi/sbi_error.h>
>>>>> +#include <sbi/sbi_trap_ldst.h>
>>>>> +#include <sbi/sbi_trap.h>
>>>>> +#include <sbi/sbi_unpriv.h>
>>>>> +#include <sbi/sbi_trap.h>
>>>>> +
>>>>> +#ifdef CONFIG_SUPPORT_VECTOR
>>>>> +#define VLEN_MAX 65536
>>>>> +
>>>>> +static inline void set_vreg(ulong vlenb, ulong which, ulong pos, ulong size, const uint8_t *bytes)
>>>>> +{
>>>>> +     pos += (which % 8) * vlenb;
>>>>> +     bytes -= pos;
>>>>> +
>>>>> +     asm volatile (
>>>>> +             "       .option push\n\t"
>>>>> +             "       .option arch, +v\n\t"
>>>>> +             "       vsetvli x0, %0, e8, m8, tu, ma\n\t"
>>>>> +             "       .option pop\n\t"
>>>>> +             :: "r" (pos + size));
>>>>> +
>>>>> +     csr_write(CSR_VSTART, pos);
>>>>> +
>>>>> +     switch (which / 8) {
>>>>> +     case 0:
>>>>> +             asm volatile (
>>>>> +                     "       .option push\n\t"
>>>>> +                     "       .option arch, +v\n\t"
>>>>> +                     "       vle8.v v0,  (%0)\n\t"
>>>>> +                     "       .option pop\n\t"
>>>>> +                     :: "r" (bytes) : "memory");
>>>>> +             break;
>>>>> +     case 1:
>>>>> +             asm volatile (
>>>>> +                     "       .option push\n\t"
>>>>> +                     "       .option arch, +v\n\t"
>>>>> +                     "       vle8.v v8,  (%0)\n\t"
>>>>> +                     "       .option pop\n\t"
>>>>> +                     :: "r" (bytes) : "memory");
>>>>> +             break;
>>>>> +     case 2:
>>>>> +             asm volatile (
>>>>> +                     "       .option push\n\t"
>>>>> +                     "       .option arch, +v\n\t"
>>>>> +                     "       vle8.v v16,  (%0)\n\t"
>>>>> +                     "       .option pop\n\t"
>>>>> +                     :: "r" (bytes) : "memory");
>>>>> +             break;
>>>>> +     case 3:
>>>>> +             asm volatile (
>>>>> +                     "       .option push\n\t"
>>>>> +                     "       .option arch, +v\n\t"
>>>>> +                     "       vle8.v v24,  (%0)\n\t"
>>>>> +                     "       .option pop\n\t"
>>>>> +                     :: "r" (bytes) : "memory");
>>>>> +             break;
>>>>> +     default:
>>>>> +             break;
>>>>> +     }
>>>>> +}
>>>>> +
>>>>> +static inline void get_vreg(ulong vlenb, ulong which, ulong pos, ulong size, uint8_t *bytes)
>>>>> +{
>>>>> +     pos += (which % 8) * vlenb;
>>>>> +     bytes -= pos;
>>>>> +
>>>>> +     asm volatile (
>>>>> +             "       .option push\n\t"
>>>>> +             "       .option arch, +v\n\t"
>>>>> +             "       vsetvli x0, %0, e8, m8, tu, ma\n\t"
>>>>> +             "       .option pop\n\t"
>>>>> +             :: "r" (pos + size));
>>>>> +
>>>>> +     csr_write(CSR_VSTART, pos);
>>>>> +
>>>>> +     switch (which / 8) {
>>>>> +     case 0:
>>>>> +             asm volatile (
>>>>> +                     "       .option push\n\t"
>>>>> +                     "       .option arch, +v\n\t"
>>>>> +                     "       vse8.v v0,  (%0)\n\t"
>>>>> +                     "       .option pop\n\t"
>>>>> +                     :: "r" (bytes) : "memory");
>>>>> +             break;
>>>>> +     case 1:
>>>>> +             asm volatile (
>>>>> +                     "       .option push\n\t"
>>>>> +                     "       .option arch, +v\n\t"
>>>>> +                     "       vse8.v v8,  (%0)\n\t"
>>>>> +                     "       .option pop\n\t"
>>>>> +                     :: "r" (bytes) : "memory");
>>>>> +             break;
>>>>> +     case 2:
>>>>> +             asm volatile (
>>>>> +                     "       .option push\n\t"
>>>>> +                     "       .option arch, +v\n\t"
>>>>> +                     "       vse8.v v16, (%0)\n\t"
>>>>> +                     "       .option pop\n\t"
>>>>> +                     :: "r" (bytes) : "memory");
>>>>> +             break;
>>>>> +     case 3:
>>>>> +             asm volatile (
>>>>> +                             ".option push\n\t"
>>>>> +                             ".option arch, +v\n\t"
>>>>> +                             "vse8.v v24, (%0)\n\t"
>>>>> +                             ".option pop\n\t"
>>>>> +                             :: "r" (bytes) : "memory");
>>>>> +             break;
>>>>> +     default:
>>>>> +             break;
>>>>> +     }
>>>>> +}
>>>>> +
>>>>> +static inline void vsetvl(ulong vl, ulong vtype)
>>>>> +{
>>>>> +     asm volatile (
>>>>> +             "       .option push\n\t"
>>>>> +             "       .option arch, +v\n\t"
>>>>> +             "       vsetvl x0, %0, %1\n\t"
>>>>> +             "       .option pop\n\t"
>>>>> +                     :: "r" (vl), "r" (vtype));
>>>>> +}
>>>>> +
>>>>> +int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
>>>>> +                              struct sbi_trap_context *tcntx)
>>>>> +{
>>>>> +     const struct sbi_trap_info *orig_trap = &tcntx->trap;
>>>>> +     struct sbi_trap_regs *regs = &tcntx->regs;
>>>>> +     struct sbi_trap_info uptrap;
>>>>> +     ulong insn = sbi_get_insn(regs->mepc, &uptrap);
>>>>> +     ulong vl = csr_read(CSR_VL);
>>>>> +     ulong vtype = csr_read(CSR_VTYPE);
>>>>> +     ulong vlenb = csr_read(CSR_VLENB);
>>>>> +     ulong vstart = csr_read(CSR_VSTART);
>>>>> +     ulong base = GET_RS1(insn, regs);
>>>>> +     ulong stride = GET_RS2(insn, regs);
>>>>> +     ulong vd = GET_VD(insn);
>>>>> +     ulong vs2 = GET_VS2(insn);
>>>>> +     ulong view = GET_VIEW(insn);
>>>>> +     ulong vsew = GET_VSEW(vtype);
>>>>> +     ulong vlmul = GET_VLMUL(vtype);
>>>>> +     bool illegal = GET_MEW(insn);
>>>>> +     bool masked = IS_MASKED(insn);
>>>>> +     uint8_t mask[VLEN_MAX / 8];
>>>>> +     uint8_t bytes[8 * sizeof(uint64_t)];
>>>>> +     ulong len = GET_LEN(view);
>>>>> +     ulong nf = GET_NF(insn);
>>>>> +     ulong vemul = GET_VEMUL(vlmul, view, vsew);
>>>>> +     ulong emul = GET_EMUL(vemul);
>>>>> +
>>>>> +     if (IS_UNIT_STRIDE_LOAD(insn) || IS_FAULT_ONLY_FIRST_LOAD(insn)) {
>>>>> +             stride = nf * len;
>>>>> +     } else if (IS_WHOLE_REG_LOAD(insn)) {
>>>>> +             vl = (nf * vlenb) >> view;
>>>>> +             nf = 1;
>>>>> +             vemul = 0;
>>>>> +             emul = 1;
>>>>> +             stride = nf * len;
>>>>> +     } else if (IS_INDEXED_LOAD(insn)) {
>>>>> +             len = 1 << vsew;
>>>>> +             vemul = (vlmul + vsew - vsew) & 7;
>>>>> +             emul = 1 << ((vemul & 4) ? 0 : vemul);
>>>>> +             stride = nf * len;
>>>>> +     }
>>>>> +
>>>>> +     if (illegal || vlenb > VLEN_MAX / 8) {
>>>>> +             struct sbi_trap_info trap = {
>>>>> +                     uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION,
>>>>> +                     uptrap.tval = insn,
>>>>> +             };
>>>>> +             return sbi_trap_redirect(regs, &trap);
>>>>> +     }
>>>>> +
>>>>> +     if (masked)
>>>>> +             get_vreg(vlenb, 0, 0, vlenb, mask);
>>>>> +
>>>>> +     do {
>>>>> +             if (!masked || ((mask[vstart / 8] >> (vstart % 8)) & 1)) {
>>>>> +                     // compute element address
>>>>> +                     ulong addr = base + vstart * stride;
>>>>> +
>>>>> +                     if (IS_INDEXED_LOAD(insn)) {
>>>>> +                             ulong offset = 0;
>>>>> +
>>>>> +                             get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
>>>>> +                             addr = base + offset;
>>>>> +                     }
>>>>> +
>>>>> +                     csr_write(CSR_VSTART, vstart);
>>>>> +
>>>>> +                     // obtain load data from memory
>>>>> +                     for (ulong seg = 0; seg < nf; seg++) {
>>>>> +                             for (ulong i = 0; i < len; i++) {
>>>>> +                                     bytes[seg * len + i] =
>>>>> +                                             sbi_load_u8((void *)(addr + seg * len + i),
>>>>> +                                                         &uptrap);
>>>>> +
>>>>> +                                     if (uptrap.cause) {
>>>>> +                                             if (IS_FAULT_ONLY_FIRST_LOAD(insn) && vstart != 0) {
>>>>> +                                                     vl = vstart;
>>>>> +                                                     break;
>>>>> +                                             }
>>>>> +                                             vsetvl(vl, vtype);
>>>>> +                                             uptrap.tinst = sbi_misaligned_tinst_fixup(
>>>>> +                                                     orig_trap->tinst, uptrap.tinst, i);
>>>>> +                                             return sbi_trap_redirect(regs, &uptrap);
>>>>> +                                     }
>>>>> +                             }
>>>>> +                     }
>>>>> +
>>>>> +                     // write load data to regfile
>>>>> +                     for (ulong seg = 0; seg < nf; seg++)
>>>>> +                             set_vreg(vlenb, vd + seg * emul, vstart * len,
>>>>> +                                      len, &bytes[seg * len]);
>>>>> +             }
>>>>> +     } while (++vstart < vl);
>>>>> +
>>>>> +     // restore clobbered vl/vtype
>>>>> +     vsetvl(vl, vtype);
>>>>> +
>>>>> +     return vl;
>>>>> +}
>>>>> +
>>>>> +int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
>>>>> +                              struct sbi_trap_context *tcntx)
>>>>> +{
>>>>> +     const struct sbi_trap_info *orig_trap = &tcntx->trap;
>>>>> +     struct sbi_trap_regs *regs = &tcntx->regs;
>>>>> +     struct sbi_trap_info uptrap;
>>>>> +     ulong insn = sbi_get_insn(regs->mepc, &uptrap);
>>>>> +     ulong vl = csr_read(CSR_VL);
>>>>> +     ulong vtype = csr_read(CSR_VTYPE);
>>>>> +     ulong vlenb = csr_read(CSR_VLENB);
>>>>> +     ulong vstart = csr_read(CSR_VSTART);
>>>>> +     ulong base = GET_RS1(insn, regs);
>>>>> +     ulong stride = GET_RS2(insn, regs);
>>>>> +     ulong vd = GET_VD(insn);
>>>>> +     ulong vs2 = GET_VS2(insn);
>>>>> +     ulong view = GET_VIEW(insn);
>>>>> +     ulong vsew = GET_VSEW(vtype);
>>>>> +     ulong vlmul = GET_VLMUL(vtype);
>>>>> +     bool illegal = GET_MEW(insn);
>>>>> +     bool masked = IS_MASKED(insn);
>>>>> +     uint8_t mask[VLEN_MAX / 8];
>>>>> +     uint8_t bytes[8 * sizeof(uint64_t)];
>>>>> +     ulong len = GET_LEN(view);
>>>>> +     ulong nf = GET_NF(insn);
>>>>> +     ulong vemul = GET_VEMUL(vlmul, view, vsew);
>>>>> +     ulong emul = GET_EMUL(vemul);
>>>>> +
>>>>> +     if (IS_UNIT_STRIDE_STORE(insn)) {
>>>>> +             stride = nf * len;
>>>>> +     } else if (IS_WHOLE_REG_STORE(insn)) {
>>>>> +             vl = (nf * vlenb) >> view;
>>>>> +             nf = 1;
>>>>> +             vemul = 0;
>>>>> +             emul = 1;
>>>>> +             stride = nf * len;
>>>>> +     } else if (IS_INDEXED_STORE(insn)) {
>>>>> +             len = 1 << vsew;
>>>>> +             vemul = (vlmul + vsew - vsew) & 7;
>>>>> +             emul = 1 << ((vemul & 4) ? 0 : vemul);
>>>>> +             stride = nf * len;
>>>>> +     }
>>>>> +
>>>>> +     if (illegal || vlenb > VLEN_MAX / 8) {
>>>>> +             struct sbi_trap_info trap = {
>>>>> +                     uptrap.cause = CAUSE_ILLEGAL_INSTRUCTION,
>>>>> +                     uptrap.tval = insn,
>>>>> +             };
>>>>> +             return sbi_trap_redirect(regs, &trap);
>>>>> +     }
>>>>> +
>>>>> +     if (masked)
>>>>> +             get_vreg(vlenb, 0, 0, vlenb, mask);
>>>>> +
>>>>> +     do {
>>>>> +             if (!masked || ((mask[vstart / 8] >> (vstart % 8)) & 1)) {
>>>>> +                     // compute element address
>>>>> +                     ulong addr = base + vstart * stride;
>>>>> +
>>>>> +                     if (IS_INDEXED_STORE(insn)) {
>>>>> +                             ulong offset = 0;
>>>>> +
>>>>> +                             get_vreg(vlenb, vs2, vstart << view, 1 << view, (uint8_t *)&offset);
>>>>> +                             addr = base + offset;
>>>>> +                     }
>>>>> +
>>>>> +                     // obtain store data from regfile
>>>>> +                     for (ulong seg = 0; seg < nf; seg++)
>>>>> +                             get_vreg(vlenb, vd + seg * emul, vstart * len,
>>>>> +                                      len, &bytes[seg * len]);
>>>>> +
>>>>> +                     csr_write(CSR_VSTART, vstart);
>>>>> +
>>>>> +                     // write store data to memory
>>>>> +                     for (ulong seg = 0; seg < nf; seg++) {
>>>>> +                             for (ulong i = 0; i < len; i++) {
>>>>> +                                     sbi_store_u8((void *)(addr + seg * len + i),
>>>>> +                                                  bytes[seg * len + i], &uptrap);
>>>>> +                                     if (uptrap.cause) {
>>>>> +                                             vsetvl(vl, vtype);
>>>>> +                                             uptrap.tinst = sbi_misaligned_tinst_fixup(
>>>>> +                                                     orig_trap->tinst, uptrap.tinst, i);
>>>>> +                                             return sbi_trap_redirect(regs, &uptrap);
>>>>> +                                     }
>>>>> +                             }
>>>>> +                     }
>>>>> +             }
>>>>> +     } while (++vstart < vl);
>>>>> +
>>>>> +     // restore clobbered vl/vtype
>>>>> +     vsetvl(vl, vtype);
>>>>> +
>>>>> +     return vl;
>>>>> +}
>>>>> +#else
>>>>> +int sbi_misaligned_v_ld_emulator(int rlen, union sbi_ldst_data *out_val,
>>>>> +                              struct sbi_trap_context *tcntx)
>>>>> +{
>>>>> +     return 0;
>>>>> +}
>>>>> +int sbi_misaligned_v_st_emulator(int wlen, union sbi_ldst_data in_val,
>>>>> +                              struct sbi_trap_context *tcntx)
>>>>> +{
>>>>> +     return 0;
>>>>> +}
>>>>> +#endif /* CONFIG_SUPPORT_VECTOR  */
>>>>
>>




More information about the opensbi mailing list