[PATCH 04/15] arc: TCG and decoder glue code and helpers

Richard Henderson richard.henderson at linaro.org
Tue Dec 1 16:35:19 EST 2020


On 11/11/20 10:17 AM, cupertinomiranda at gmail.com wrote:
> From: Cupertino Miranda <cmiranda at synopsys.com>
> 
> Signed-off-by: Cupertino Miranda <cmiranda at synopsys.com>
> ---
>  target/arc/extra_mapping.def   |  40 ++
>  target/arc/helper.c            | 293 +++++++++++++
>  target/arc/helper.h            |  46 ++
>  target/arc/op_helper.c         | 749 +++++++++++++++++++++++++++++++++
>  target/arc/semfunc_mapping.def | 329 +++++++++++++++
>  5 files changed, 1457 insertions(+)
>  create mode 100644 target/arc/extra_mapping.def
>  create mode 100644 target/arc/helper.c
>  create mode 100644 target/arc/helper.h
>  create mode 100644 target/arc/op_helper.c
>  create mode 100644 target/arc/semfunc_mapping.def

Not an ideal patch split, since nothing uses the def files at this point.  But
looking forward to the end product, they seem to be exactly what I was talking
about re string manipulation vs patch 3.

In particular, arc_map_opcode should be done at qemu build/compile time.  And
not for nothing, a linear search through strings during translation is really
beyond the pale.

> +#if defined(CONFIG_USER_ONLY)
> +
> +void arc_cpu_do_interrupt(CPUState *cs)
> +{
> +    ARCCPU *cpu = ARC_CPU(cs);
> +    CPUARCState *env = &cpu->env;
> +
> +    cs->exception_index = -1;
> +    CPU_ILINK(env) = env->pc;
> +}

There are no user-only interrupts.

> +    /*
> +     * NOTE: Special LP_END exception. Immediatelly return code execution to

Immediately.

> +    /* 15. The PC is set with the appropriate exception vector. */
> +    env->pc = cpu_ldl_code(env, env->intvec + offset);
> +    CPU_PCL(env) = env->pc & 0xfffffffe;

You should be using address_space_ldl, and handling any error.  As it is, if
this load fails you'll get another interrupt, bringing you right back here, etc.

> +DEF_HELPER_1(debug, void, env)
> +DEF_HELPER_2(norm, i32, env, i32)
> +DEF_HELPER_2(normh, i32, env, i32)
> +DEF_HELPER_2(ffs, i32, env, i32)
> +DEF_HELPER_2(fls, i32, env, i32)
> +DEF_HELPER_2(lr, tl, env, i32)
> +DEF_HELPER_3(sr, void, env, i32, i32)
> +DEF_HELPER_2(halt, noreturn, env, i32)
> +DEF_HELPER_1(rtie, void, env)
> +DEF_HELPER_1(flush, void, env)
> +DEF_HELPER_4(raise_exception, noreturn, env, i32, i32, i32)
> +DEF_HELPER_2(zol_verify, void, env, i32)
> +DEF_HELPER_2(fake_exception, void, env, i32)
> +DEF_HELPER_2(set_status32, void, env, i32)
> +DEF_HELPER_1(get_status32, i32, env)
> +DEF_HELPER_3(carry_add_flag, i32, i32, i32, i32)
> +DEF_HELPER_3(overflow_add_flag, i32, i32, i32, i32)
> +DEF_HELPER_3(overflow_sub_flag, i32, i32, i32, i32)
> +
> +DEF_HELPER_2(enter, void, env, i32)
> +DEF_HELPER_2(leave, void, env, i32)
> +
> +DEF_HELPER_3(mpymu, i32, env, i32, i32)
> +DEF_HELPER_3(mpym, i32, env, i32, i32)
> +
> +DEF_HELPER_3(repl_mask, i32, i32, i32, i32)

Use DEF_HELPER_FLAGS_* when possible.

> +target_ulong helper_norm(CPUARCState *env, uint32_t src1)
> +{
> +    int i;
> +    int32_t tmp = (int32_t) src1;
> +    if (tmp == 0 || tmp == -1) {
> +        return 0;
> +    }
> +    for (i = 0; i <= 31; i++) {
> +        if ((tmp >> i) == 0) {
> +            break;
> +        }
> +        if ((tmp >> i) == -1) {
> +            break;
> +        }
> +    }
> +    return i;
> +}

The spec says 0/-1 -> 0x1f, not 0.

That said, this appears to be clrsb32(src1), which could also be expanded
inline with two uses of tcg_gen_clz_i32.

> +target_ulong helper_normh(CPUARCState *env, uint32_t src1)
> +{
> +    int i;
> +    for (i = 0; i <= 15; i++) {
> +        if (src1 >> i == 0) {
> +            break;
> +        }
> +        if (src1 >> i == -1) {
> +            break;
> +        }
> +    }
> +    return i;
> +}

Similarly.


> +
> +target_ulong helper_ffs(CPUARCState *env, uint32_t src)
> +{
> +    int i;
> +    if (src == 0) {
> +        return 31;
> +    }
> +    for (i = 0; i <= 31; i++) {
> +        if (((src >> i) & 1) != 0) {
> +            break;
> +        }
> +    }
> +    return i;
> +}

This should use tcg_gen_ctz_i32.

> +target_ulong helper_fls(CPUARCState *env, uint32_t src)
> +{
> +    int i;
> +    if (src == 0) {
> +        return 0;
> +    }
> +
> +    for (i = 31; i >= 0; i--) {
> +        if (((src >> i) & 1) != 0) {
> +            break;
> +        }
> +    }
> +    return i;
> +}

This should use tcg_gen_clz_i32.

> +
> +static void report_aux_reg_error(uint32_t aux)
> +{
> +    if (((aux >= ARC_BCR1_START) && (aux <= ARC_BCR1_END)) ||
> +        ((aux >= ARC_BCR2_START) && (aux <= ARC_BCR2_END))) {
> +        qemu_log_mask(LOG_UNIMP, "Undefined BCR 0x%03x\n", aux);
> +    }
> +
> +    error_report("Undefined AUX register 0x%03x, aborting", aux);
> +    exit(EXIT_FAILURE);
> +}

Do not exit on failure, or error_report for that matter -- raise an exception.
 Or...

> +void helper_sr(CPUARCState *env, uint32_t val, uint32_t aux)
> +{
> +    struct arc_aux_reg_detail *aux_reg_detail =
> +        arc_aux_reg_struct_for_address(aux, ARC_OPCODE_ARCv2HS);
> +
> +    if (aux_reg_detail == NULL) {
> +        report_aux_reg_error(aux);
> +    }

... on the off-chance the above is a qemu bug and shouldn't happen, just
g_assert(aux_reg_detail != NULL).

> +static target_ulong get_identity(CPUARCState *env)
> +{
> +    target_ulong chipid = 0xffff, arcnum = 0, arcver, res;
> +
> +    switch (env->family) {
> +    case ARC_OPCODE_ARC700:
> +        arcver = 0x34;
> +        break;
> +
> +    case ARC_OPCODE_ARCv2EM:
> +        arcver = 0x44;
> +        break;
> +
> +    case ARC_OPCODE_ARCv2HS:
> +        arcver = 0x54;
> +        break;
> +
> +    default:
> +        arcver = 0;
> +
> +    }
> +
> +    /* TODO: in SMP, arcnum depends on the cpu instance. */
> +    res = ((chipid & 0xFFFF) << 16) | ((arcnum & 0xFF) << 8) | (arcver & 0xFF);
> +    return res;
> +}

Perhaps this should just be a constant on ArcCPU?

> +target_ulong helper_lr(CPUARCState *env, uint32_t aux)
> +{
> +    target_ulong result = 0;
> +
> +    struct arc_aux_reg_detail *aux_reg_detail =
> +        arc_aux_reg_struct_for_address(aux, ARC_OPCODE_ARCv2HS);
> +
> +    if (aux_reg_detail == NULL) {
> +        report_aux_reg_error(aux);
> +    }

Similar commentary for helper_sr.

> +void QEMU_NORETURN helper_halt(CPUARCState *env, uint32_t npc)
> +{
> +    CPUState *cs = env_cpu(env);
> +    if (env->stat.Uf) {
> +        cs->exception_index = EXCP_PRIVILEGEV;
> +        env->causecode = 0;
> +        env->param = 0;
> +         /* Restore PC such that we point at the faulty instruction.  */
> +        env->eret = env->pc;

Any reason not to handle Uf at translate time?  Or at least create a single
helper function for that here.  But it seems like translate will have to do a
lot of priv checking anyway and will already have that handy.

> +void helper_flush(CPUARCState *env)
> +{
> +    tb_flush((CPUState *)env_archcpu(env));
> +}

env_cpu(env), no cast required.

> +void QEMU_NORETURN helper_raise_exception(CPUARCState *env,
> +                                          uint32_t index,
> +                                          uint32_t causecode,
> +                                          uint32_t param)
> +{
> +    CPUState *cs = env_cpu(env);
> +    /* Cannot restore state here. */
> +    /* cpu_restore_state(cs, GETPC(), true); */

Not a very good comment, because you *could* restore state here, but some user
of the function wants to record different state.

Alternately, the function is being used incorrectly, e.g.

> +void helper_zol_verify(CPUARCState *env, uint32_t npc)
> +{
> +    if (npc == env->lpe) {
> +        if (env->r[60] > 1) {
> +            env->r[60] -= 1;
> +            helper_raise_exception(env, (uint32_t) EXCP_LPEND_REACHED, 0,
> +                                   env->lps);

... here, which would have needed to pass in GETPC from here, and not use the
value from the inner call.

In general, you really shouldn't make calls from one helper_* to another,
because that way lies confusion.

The comment should be cleaned up to indicate the actual constraints.

> +uint32_t helper_carry_add_flag(uint32_t dest, uint32_t b, uint32_t c)
> +{
> +    uint32_t t1, t2, t3;
> +
> +    t1 = b & c;
> +    t2 = b & (~dest);
> +    t3 = c & (~dest);
> +    t1 = t1 | t2 | t3;
> +    return (t1 >> 31) & 1;
> +}
> +
> +uint32_t helper_overflow_add_flag(uint32_t dest, uint32_t b, uint32_t c)
> +{
> +    dest >>= 31;
> +    b >>= 31;
> +    c >>= 31;
> +    if ((dest == 0 && b == 1 && c == 1)
> +        || (dest == 1 && b == 0 && c == 0)) {
> +        return 1;
> +    } else {
> +        return 0;
> +    }
> +}
> +
> +uint32_t helper_overflow_sub_flag(uint32_t dest, uint32_t b, uint32_t c)
> +{
> +    dest >>= 31;
> +    b >>= 31;
> +    c >>= 31;
> +    if ((dest == 1 && b == 0 && c == 1)
> +        || (dest == 0 && b == 1 && c == 0)) {
> +        return 1;
> +    } else {
> +        return 0;
> +    }
> +}

There's nothing in here that can't be done with tcg_gen_add2_i32 and
tcg_gen_sub2_i32.

> +uint32_t helper_repl_mask(uint32_t dest, uint32_t src, uint32_t mask)
> +{
> +    uint32_t ret = dest & (~mask);
> +    ret |= (src & mask);
> +
> +    return ret;
> +}

    tcg_gen_and_i32(tmp, src, mask);
    tcg_gen_andc_i32(dest, dest, mask);
    tcg_gen_or_i32(dest, dest, tmp);

> +uint32_t helper_mpymu(CPUARCState *env, uint32_t b, uint32_t c)
> +{
> +    uint64_t _b = (uint64_t) b;
> +    uint64_t _c = (uint64_t) c;
> +
> +    return (uint32_t) ((_b * _c) >> 32);
> +}

tcg_gen_mulu2_i32(tmp, dest, b, c);

> +uint32_t helper_mpym(CPUARCState *env, uint32_t b, uint32_t c)
> +{
> +    int64_t _b = (int64_t) ((int32_t) b);
> +    int64_t _c = (int64_t) ((int32_t) c);
> +
> +    /*
> +     * fprintf(stderr, "B = 0x%llx, C = 0x%llx, result = 0x%llx\n",
> +     *         _b, _c, _b * _c);
> +     */
> +    return (_b * _c) >> 32;

tcg_gen_muls2_i32(tmp, dest, b, c);

> +void helper_enter(CPUARCState *env, uint32_t u6)
> +{
> +    /* nothing to do? then bye-bye! */
> +    if (!u6) {
> +        return;
> +    }
> +
> +    uint8_t regs       = u6 & 0x0f; /* u[3:0] determines registers to save */
> +    bool    save_fp    = u6 & 0x10; /* u[4] indicates if fp must be saved  */
> +    bool    save_blink = u6 & 0x20; /* u[5] indicates saving of blink      */
> +    uint8_t stack_size = 4 * (regs + save_fp + save_blink);
> +
> +    /* number of regs to be saved must be sane */
> +    check_enter_leave_nr_regs(env, regs, GETPC());

Both of these checks could be translate time.

> +    /* this cannot be executed in a delay/execution slot */
> +    check_delay_or_execution_slot(env, GETPC());

As could this.

> +    /* stack must be a multiple of 4 (32 bit aligned) */
> +    check_addr_is_word_aligned(env, CPU_SP(env) - stack_size, GETPC());
> +
> +    uint32_t tmp_sp = CPU_SP(env);
> +
> +    if (save_fp) {
> +        tmp_sp -= 4;
> +        cpu_stl_data(env, tmp_sp, CPU_FP(env));
> +    }

And what if these stores raise an exception?  I doubt you're going to get an
exception at the correct pc.

> +void helper_leave(CPUARCState *env, uint32_t u7)

Similarly.  I think that both of these could be implemented entirely in
translate, which is what

> +    bool restore_fp    = u7 & 0x10; /* u[4] indicates if fp must be saved  */
> +    bool restore_blink = u7 & 0x20; /* u[5] indicates saving of blink      */
> +    bool jump_to_blink = u7 & 0x40; /* u[6] should we jump to blink?       */

these bits strongly imply.


r~



More information about the linux-snps-arc mailing list