[PATCH 05/15] arc: TCG instruction generator and hand-definitions

Cupertino Miranda Cupertino.Miranda at synopsys.com
Fri Jan 15 12:11:34 EST 2021


> On 11/11/20 10:17 AM, cupertinomiranda at gmail.com wrote:
>> +/*
>> + * The macro to add boiler plate code for conditional execution.
>> + * It will add tcg_gen codes only if there is a condition to
>> + * be checked (ctx->insn.cc != 0). This macro assumes that there
>> + * is a "ctx" variable of type "DisasCtxt *" in context. Remember
>> + * to pair it with CC_EPILOGUE macro.
>> + */
>> +#define CC_PROLOGUE                                   \
>> +  TCGv cc = tcg_temp_local_new();                     \
>> +  TCGLabel *done = gen_new_label();                   \
>> +  do {                                                \
>> +    if (ctx->insn.cc) {                               \
>> +        arc_gen_verifyCCFlag(ctx, cc);                \
>> +        tcg_gen_brcondi_tl(TCG_COND_NE, cc, 1, done); \
>> +    }                                                 \
>> +  } while (0)
>> +
>> +/*
>> + * The finishing counter part of CC_PROLUGE. This is supposed
>> + * to be put at the end of the function using it.
>> + */
>> +#define CC_EPILOGUE          \
>> +    if (ctx->insn.cc) {      \
>> +        gen_set_label(done); \
>> +    }                        \
>> +    tcg_temp_free(cc)
> 
> Why would this need to be boiler-plate?  Why would this not simply exist in
> exactly one location?
> 
> You don't need a tcg_temp_local_new, because the cc value is not used past the
> branch.  You should free the temp immediately after the branch.
> 

I wonder if what you thought was to move those macros to functions and 
call it when CC_PROLOGUE and CC_EPILOGUE are used.
I think the macros were choosen due to the sharing of the 'cc' and 
'done' variables in both CC_PROLOGUE AND CC_EPILOGUE.

>> +void gen_goto_tb(DisasContext *ctx, int n, TCGv dest)
>> +{
>> +    tcg_gen_mov_tl(cpu_pc, dest);
>> +    tcg_gen_andi_tl(cpu_pcl, dest, 0xfffffffc);
>> +    if (ctx->base.singlestep_enabled) {
>> +        gen_helper_debug(cpu_env);
>> +    }
>> +    tcg_gen_exit_tb(NULL, 0);
> 
> Missing else.  This is dead code for single-step.
Goes a little above my knowledge of QEMU internals to be honest.
Do you have a recommendation what we should be doing here ?

> 
>> +void arc_translate_init(void)
>> +{
>> +    int i;
>> +    static int init_not_done = 1;
>> +
>> +    if (init_not_done == 0) {
>> +        return;
>> +    }
> 
> Useless.  This will only be called once.
> 
>> +#define ARC_REG_OFFS(x) offsetof(CPUARCState, x)
>> +
>> +#define NEW_ARC_REG(x) \
>> +        tcg_global_mem_new_i32(cpu_env, offsetof(CPUARCState, x), #x)
>> +
>> +    cpu_S1f = NEW_ARC_REG(macmod.S1);
>> +    cpu_S2f = NEW_ARC_REG(macmod.S2);
>> +    cpu_CSf = NEW_ARC_REG(macmod.CS);
>> +
>> +    cpu_Zf  = NEW_ARC_REG(stat.Zf);
>> +    cpu_Lf  = NEW_ARC_REG(stat.Lf);
>> +    cpu_Nf  = NEW_ARC_REG(stat.Nf);
>> +    cpu_Cf  = NEW_ARC_REG(stat.Cf);
>> +    cpu_Vf  = NEW_ARC_REG(stat.Vf);
>> +    cpu_Uf  = NEW_ARC_REG(stat.Uf);
>> +    cpu_DEf = NEW_ARC_REG(stat.DEf);
>> +    cpu_ESf = NEW_ARC_REG(stat.ESf);
>> +    cpu_AEf = NEW_ARC_REG(stat.AEf);
>> +    cpu_Hf  = NEW_ARC_REG(stat.Hf);
>> +    cpu_IEf = NEW_ARC_REG(stat.IEf);
>> +    cpu_Ef  = NEW_ARC_REG(stat.Ef);
>> +
>> +    cpu_is_delay_slot_instruction = NEW_ARC_REG(stat.is_delay_slot_instruction);
>> +
>> +    cpu_l1_Zf = NEW_ARC_REG(stat_l1.Zf);
>> +    cpu_l1_Lf = NEW_ARC_REG(stat_l1.Lf);
>> +    cpu_l1_Nf = NEW_ARC_REG(stat_l1.Nf);
>> +    cpu_l1_Cf = NEW_ARC_REG(stat_l1.Cf);
>> +    cpu_l1_Vf = NEW_ARC_REG(stat_l1.Vf);
>> +    cpu_l1_Uf = NEW_ARC_REG(stat_l1.Uf);
>> +    cpu_l1_DEf = NEW_ARC_REG(stat_l1.DEf);
>> +    cpu_l1_AEf = NEW_ARC_REG(stat_l1.AEf);
>> +    cpu_l1_Hf = NEW_ARC_REG(stat_l1.Hf);
>> +
>> +    cpu_er_Zf = NEW_ARC_REG(stat_er.Zf);
>> +    cpu_er_Lf = NEW_ARC_REG(stat_er.Lf);
>> +    cpu_er_Nf = NEW_ARC_REG(stat_er.Nf);
>> +    cpu_er_Cf = NEW_ARC_REG(stat_er.Cf);
>> +    cpu_er_Vf = NEW_ARC_REG(stat_er.Vf);
>> +    cpu_er_Uf = NEW_ARC_REG(stat_er.Uf);
>> +    cpu_er_DEf = NEW_ARC_REG(stat_er.DEf);
>> +    cpu_er_AEf = NEW_ARC_REG(stat_er.AEf);
>> +    cpu_er_Hf = NEW_ARC_REG(stat_er.Hf);
>> +
>> +    cpu_eret = NEW_ARC_REG(eret);
>> +    cpu_erbta = NEW_ARC_REG(erbta);
>> +    cpu_ecr = NEW_ARC_REG(ecr);
>> +    cpu_efa = NEW_ARC_REG(efa);
>> +    cpu_bta = NEW_ARC_REG(bta);
>> +    cpu_lps = NEW_ARC_REG(lps);
>> +    cpu_lpe = NEW_ARC_REG(lpe);
>> +    cpu_pc = NEW_ARC_REG(pc);
>> +    cpu_npc = NEW_ARC_REG(npc);
>> +
>> +    cpu_bta_l1 = NEW_ARC_REG(bta_l1);
>> +    cpu_bta_l2 = NEW_ARC_REG(bta_l2);
>> +
>> +    cpu_intvec = NEW_ARC_REG(intvec);
>> +
>> +    for (i = 0; i < 64; i++) {
>> +        char name[16];
>> +
>> +        sprintf(name, "r[%d]", i);
>> +
>> +        cpu_r[i] = tcg_global_mem_new_i32(cpu_env,
>> +                                          ARC_REG_OFFS(r[i]),
>> +                                          strdup(name));
>> +    }
>> +
>> +    cpu_gp     = cpu_r[26];
>> +    cpu_fp     = cpu_r[27];
>> +    cpu_sp     = cpu_r[28];
>> +    cpu_ilink1 = cpu_r[29];
>> +    cpu_ilink2 = cpu_r[30];
>> +    cpu_blink  = cpu_r[31];
>> +    cpu_acclo  = cpu_r[58];
>> +    cpu_acchi  = cpu_r[59];
>> +    cpu_lpc    = cpu_r[60];
>> +    cpu_limm   = cpu_r[62];
>> +    cpu_pcl    = cpu_r[63];
> 
> You shouldn't need two pointers to these.  Either use cpu_r[PCL] (preferred) or
> #define cpu_pcl cpu_r[63].
I will change it to macros instead, if you don't mind.
> You could put all of these into a const static table.
What do you mean, can we make the effect of tcg_global_mem_new_i32 as 
constant ?

>> +static void init_constants(void)
>> +{
>> +#define SEMANTIC_FUNCTION(...)
>> +#define MAPPING(...)
>> +#define CONSTANT(NAME, MNEMONIC, OP_NUM, VALUE) \
>> +  add_constant_operand(MAP_##MNEMONIC##_##NAME, OP_NUM, VALUE);
>> +#include "target/arc/semfunc_mapping.def"
>> +#include "target/arc/extra_mapping.def"
>> +#undef MAPPING
>> +#undef CONSTANT
>> +#undef SEMANTIC_FUNCTION
>> +}
> 
> Ew.  Yet another thing that can be done at build time.
As far as I remember it, there was no way I could generate this table 
using the C pre-processor. Do you suggest to make this table using an 
external tool ?


> 
>> +            int32_t limm = operand.value;
>> +            if (operand.type & ARC_OPERAND_LIMM) {
>> +                limm = ctx->insn.limm;
>> +                tcg_gen_movi_tl(cpu_limm, limm);
>> +                ret = cpu_r[62];
>> +            } else {
>> +                ret = tcg_const_local_i32(limm);
>> +            }
>> +        }
>> +    }
>> +
>> +  return ret;
> 
> Why are you using locals for everything?  Is it because you have no proper
> control over your use of branching?

Initially we though locals the good way to define temporaries. :-(
What should be the best ? We will need to change a lot of code for this.

> 
>> +    qemu_log_mask(CPU_LOG_TB_IN_ASM,
>> +                  "CPU in sleep mode, waiting for an IRQ.\n");
> 
> Incorrect level at which to log this.
> 
> You wanted the logging at runtime, not translate. Which suggests you'd be
> better off moving this entire function to a helper.
> 
>> +/* Return from exception. */
>> +static void gen_rtie(DisasContext *ctx)
>> +{
>> +    tcg_gen_movi_tl(cpu_pc, ctx->cpc);
>> +    gen_helper_rtie(cpu_env);
>> +    tcg_gen_mov_tl(cpu_pc, cpu_pcl);
>> +    gen_goto_tb(ctx, 1, cpu_pc);
>> +}
> 
> You must return to the main loop here, not goto_tb.  You must return to the
> main loop every time your interrupt mask changes, so that pending interrupts
> may be accepted.
> 
"gen_goto_tb" calls in the end "tcg_gen_exit_tb(NULL, 0)", is it not the 
same ?
We need to investigate this implementation further. A quick change to 
gen_rtie broke linux booting.
Can you recomend some target that implements the loop exit on rtie as 
you suggest ?



More information about the linux-snps-arc mailing list