Message ID | 20200825205950.730499-65-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | target/microblaze improvements | expand |
On Tue, Aug 25, 2020 at 01:59:37PM -0700, Richard Henderson wrote: > Split this out of the normal branch instructions, as it requires > special handling. End the TB only for an instruction barrier. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/microblaze/insns.decode | 2 + > target/microblaze/translate.c | 81 ++++++++++++++++++---------------- > 2 files changed, 45 insertions(+), 38 deletions(-) > > diff --git a/target/microblaze/insns.decode b/target/microblaze/insns.decode > index 53da2b75aa..77b073be9e 100644 > --- a/target/microblaze/insns.decode > +++ b/target/microblaze/insns.decode > @@ -124,6 +124,8 @@ lwea 110010 ..... ..... ..... 0001 000 0000 @typea > lwx 110010 ..... ..... ..... 1000 000 0000 @typea > lwi 111010 ..... ..... ................ @typeb > > +mbar 101110 imm:5 00010 0000 0000 0000 0100 > + > mul 010000 ..... ..... ..... 000 0000 0000 @typea > mulh 010000 ..... ..... ..... 000 0000 0001 @typea > mulhu 010000 ..... ..... ..... 000 0000 0011 @typea > diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c > index fc1c661368..a391e80fb9 100644 > --- a/target/microblaze/translate.c > +++ b/target/microblaze/translate.c > @@ -1166,6 +1166,48 @@ static bool trans_brki(DisasContext *dc, arg_typeb_br *arg) > return true; > } > > +static bool trans_mbar(DisasContext *dc, arg_mbar *arg) > +{ > + int mbar_imm = arg->imm; > + > + /* > + * Instruction access memory barrier. > + * End the TB so that we recognize self-modified code immediately. > + */ > + if (mbar_imm & 1) { > + dc->cpustate_changed = 1; > + } This should be (mbar_imm & 1) == 0 But even with that fixed it fails some of our tests because interrupts that should become visible within a couple of cycles after a memory data barrier can now be delayed longer. I think we should always break the TB. > + /* Data access memory barrier. */ > + if (mbar_imm & 2) { > + tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); > + } This should be (mbar_imm & 2) == 0 > + > + /* Sleep. */ > + if (mbar_imm & 16) { > + TCGv_i32 tmp_1; > + > + if (trap_userspace(dc, true)) { > + /* Sleep is a privileged instruction. */ > + return true; > + } > + > + t_sync_flags(dc); > + > + tmp_1 = tcg_const_i32(1); > + tcg_gen_st_i32(tmp_1, cpu_env, > + -offsetof(MicroBlazeCPU, env) > + +offsetof(CPUState, halted)); > + tcg_temp_free_i32(tmp_1); > + > + tcg_gen_movi_i32(cpu_pc, dc->base.pc_next + 4); > + > + gen_raise_exception(dc, EXCP_HLT); > + } > + return true; > +} > + > + > static void msr_read(DisasContext *dc, TCGv_i32 d) > { > TCGv_i32 t; > @@ -1447,50 +1489,13 @@ static void dec_bcc(DisasContext *dc) > > static void dec_br(DisasContext *dc) > { > - unsigned int dslot, link, abs, mbar; > + unsigned int dslot, link, abs; > uint32_t add_pc; > > dslot = dc->ir & (1 << 20); > abs = dc->ir & (1 << 19); > link = dc->ir & (1 << 18); > > - /* Memory barrier. */ > - mbar = (dc->ir >> 16) & 31; > - if (mbar == 2 && dc->imm == 4) { > - uint16_t mbar_imm = dc->rd; > - > - /* Data access memory barrier. */ > - if ((mbar_imm & 2) == 0) { > - tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); > - } > - > - /* mbar IMM & 16 decodes to sleep. */ > - if (mbar_imm & 16) { > - TCGv_i32 tmp_1; > - > - if (trap_userspace(dc, true)) { > - /* Sleep is a privileged instruction. */ > - return; > - } > - > - t_sync_flags(dc); > - > - tmp_1 = tcg_const_i32(1); > - tcg_gen_st_i32(tmp_1, cpu_env, > - -offsetof(MicroBlazeCPU, env) > - +offsetof(CPUState, halted)); > - tcg_temp_free_i32(tmp_1); > - > - tcg_gen_movi_i32(cpu_pc, dc->base.pc_next + 4); > - > - gen_raise_exception(dc, EXCP_HLT); > - return; > - } > - /* Break the TB. */ > - dc->cpustate_changed = 1; > - return; > - } > - > if (dslot && dec_setup_dslot(dc)) { > return; > } > -- > 2.25.1 >
On 8/27/20 2:24 AM, Edgar E. Iglesias wrote: >> + /* >> + * Instruction access memory barrier. >> + * End the TB so that we recognize self-modified code immediately. >> + */ >> + if (mbar_imm & 1) { >> + dc->cpustate_changed = 1; >> + } > > This should be (mbar_imm & 1) == 0 > But even with that fixed it fails some of our tests because interrupts > that should become visible within a couple of cycles after a > memory data barrier can now be delayed longer. > > I think we should always break the TB. Ok. I assume this follows a write to something like an interrupt controller register? > >> + /* Data access memory barrier. */ >> + if (mbar_imm & 2) { >> + tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); >> + } > > This should be (mbar_imm & 2) == 0 Oops. ;-) Applying the following incremental patch. r~ diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c index a391e80fb9..1e2bb529ac 100644 --- a/target/microblaze/translate.c +++ b/target/microblaze/translate.c @@ -1170,16 +1170,8 @@ static bool trans_mbar(DisasContext *dc, arg_mbar *arg) { int mbar_imm = arg->imm; - /* - * Instruction access memory barrier. - * End the TB so that we recognize self-modified code immediately. - */ - if (mbar_imm & 1) { - dc->cpustate_changed = 1; - } - /* Data access memory barrier. */ - if (mbar_imm & 2) { + if ((mbar_imm & 2) == 0) { tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); } @@ -1204,6 +1196,19 @@ static bool trans_mbar(DisasContext *dc, arg_mbar *arg) gen_raise_exception(dc, EXCP_HLT); } + + /* + * If !(mbar_imm & 1), this is an instruction access memory barrier + * and we need to end the TB so that we recognize self-modified + * code immediately. + * + * However, there are some data mbars that need the TB break + * (and return to main loop) to recognize interrupts right away. + * E.g. recognizing a change to an interrupt controller register. + * + * Therefore, choose to end the TB always. + */ + dc->cpustate_changed = 1; return true; }
On Thu, Aug 27, 2020 at 02:58:06AM -0700, Richard Henderson wrote: > On 8/27/20 2:24 AM, Edgar E. Iglesias wrote: > >> + /* > >> + * Instruction access memory barrier. > >> + * End the TB so that we recognize self-modified code immediately. > >> + */ > >> + if (mbar_imm & 1) { > >> + dc->cpustate_changed = 1; > >> + } > > > > This should be (mbar_imm & 1) == 0 > > But even with that fixed it fails some of our tests because interrupts > > that should become visible within a couple of cycles after a > > memory data barrier can now be delayed longer. > > > > I think we should always break the TB. > > Ok. I assume this follows a write to something like an interrupt controller > register? yes, kind of. It's a memory store to a device that raises an interrupt as a result of that store. The interrupt propagates to the CPU and on real HW if the pipeline depth of the core is small, it gets taken within a couple of cycles after the barrier completes. In QEMU, that delay can become very long if we don't break the TB. Architecturally, it would be wrong to make such assumptions about the pipeline but there's code out there already.. > > > > >> + /* Data access memory barrier. */ > >> + if (mbar_imm & 2) { > >> + tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); > >> + } > > > > This should be (mbar_imm & 2) == 0 > > Oops. ;-) > > Applying the following incremental patch. Thanks! With your incremental patch: Reviewed-by: Edgar E. Iglesias <edgar.iglesias@xilinx.com> > > > r~ > > > diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c > index a391e80fb9..1e2bb529ac 100644 > --- a/target/microblaze/translate.c > +++ b/target/microblaze/translate.c > @@ -1170,16 +1170,8 @@ static bool trans_mbar(DisasContext *dc, arg_mbar *arg) > { > int mbar_imm = arg->imm; > > - /* > - * Instruction access memory barrier. > - * End the TB so that we recognize self-modified code immediately. > - */ > - if (mbar_imm & 1) { > - dc->cpustate_changed = 1; > - } > - > /* Data access memory barrier. */ > - if (mbar_imm & 2) { > + if ((mbar_imm & 2) == 0) { > tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); > } > > @@ -1204,6 +1196,19 @@ static bool trans_mbar(DisasContext *dc, arg_mbar *arg) > > gen_raise_exception(dc, EXCP_HLT); > } > + > + /* > + * If !(mbar_imm & 1), this is an instruction access memory barrier > + * and we need to end the TB so that we recognize self-modified > + * code immediately. > + * > + * However, there are some data mbars that need the TB break > + * (and return to main loop) to recognize interrupts right away. > + * E.g. recognizing a change to an interrupt controller register. > + * > + * Therefore, choose to end the TB always. > + */ > + dc->cpustate_changed = 1; > return true; > } >
On 8/27/20 3:08 AM, Edgar E. Iglesias wrote: >> Ok. I assume this follows a write to something like an interrupt controller >> register? > > yes, kind of. It's a memory store to a device that raises an interrupt as a > result of that store. The interrupt propagates to the CPU and on real HW if > the pipeline depth of the core is small, it gets taken within a couple of > cycles after the barrier completes. In QEMU, that delay can become very long > if we don't break the TB. Ok, yeah, same idea. The data store alters the set of interrupts pending. > Architecturally, it would be wrong to make such assumptions about the pipeline > but there's code out there already.. Yes indeed. r~
diff --git a/target/microblaze/insns.decode b/target/microblaze/insns.decode index 53da2b75aa..77b073be9e 100644 --- a/target/microblaze/insns.decode +++ b/target/microblaze/insns.decode @@ -124,6 +124,8 @@ lwea 110010 ..... ..... ..... 0001 000 0000 @typea lwx 110010 ..... ..... ..... 1000 000 0000 @typea lwi 111010 ..... ..... ................ @typeb +mbar 101110 imm:5 00010 0000 0000 0000 0100 + mul 010000 ..... ..... ..... 000 0000 0000 @typea mulh 010000 ..... ..... ..... 000 0000 0001 @typea mulhu 010000 ..... ..... ..... 000 0000 0011 @typea diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c index fc1c661368..a391e80fb9 100644 --- a/target/microblaze/translate.c +++ b/target/microblaze/translate.c @@ -1166,6 +1166,48 @@ static bool trans_brki(DisasContext *dc, arg_typeb_br *arg) return true; } +static bool trans_mbar(DisasContext *dc, arg_mbar *arg) +{ + int mbar_imm = arg->imm; + + /* + * Instruction access memory barrier. + * End the TB so that we recognize self-modified code immediately. + */ + if (mbar_imm & 1) { + dc->cpustate_changed = 1; + } + + /* Data access memory barrier. */ + if (mbar_imm & 2) { + tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); + } + + /* Sleep. */ + if (mbar_imm & 16) { + TCGv_i32 tmp_1; + + if (trap_userspace(dc, true)) { + /* Sleep is a privileged instruction. */ + return true; + } + + t_sync_flags(dc); + + tmp_1 = tcg_const_i32(1); + tcg_gen_st_i32(tmp_1, cpu_env, + -offsetof(MicroBlazeCPU, env) + +offsetof(CPUState, halted)); + tcg_temp_free_i32(tmp_1); + + tcg_gen_movi_i32(cpu_pc, dc->base.pc_next + 4); + + gen_raise_exception(dc, EXCP_HLT); + } + return true; +} + + static void msr_read(DisasContext *dc, TCGv_i32 d) { TCGv_i32 t; @@ -1447,50 +1489,13 @@ static void dec_bcc(DisasContext *dc) static void dec_br(DisasContext *dc) { - unsigned int dslot, link, abs, mbar; + unsigned int dslot, link, abs; uint32_t add_pc; dslot = dc->ir & (1 << 20); abs = dc->ir & (1 << 19); link = dc->ir & (1 << 18); - /* Memory barrier. */ - mbar = (dc->ir >> 16) & 31; - if (mbar == 2 && dc->imm == 4) { - uint16_t mbar_imm = dc->rd; - - /* Data access memory barrier. */ - if ((mbar_imm & 2) == 0) { - tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); - } - - /* mbar IMM & 16 decodes to sleep. */ - if (mbar_imm & 16) { - TCGv_i32 tmp_1; - - if (trap_userspace(dc, true)) { - /* Sleep is a privileged instruction. */ - return; - } - - t_sync_flags(dc); - - tmp_1 = tcg_const_i32(1); - tcg_gen_st_i32(tmp_1, cpu_env, - -offsetof(MicroBlazeCPU, env) - +offsetof(CPUState, halted)); - tcg_temp_free_i32(tmp_1); - - tcg_gen_movi_i32(cpu_pc, dc->base.pc_next + 4); - - gen_raise_exception(dc, EXCP_HLT); - return; - } - /* Break the TB. */ - dc->cpustate_changed = 1; - return; - } - if (dslot && dec_setup_dslot(dc)) { return; }
Split this out of the normal branch instructions, as it requires special handling. End the TB only for an instruction barrier. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/microblaze/insns.decode | 2 + target/microblaze/translate.c | 81 ++++++++++++++++++---------------- 2 files changed, 45 insertions(+), 38 deletions(-) -- 2.25.1