Message ID | 20190508000641.19090-35-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | tcg: Move the softmmu tlb to CPUNegativeOffsetState | expand |
On Tue, May 7, 2019 at 5:35 PM Richard Henderson <richard.henderson@linaro.org> wrote: > > Amusingly, we had already ignored the comment to keep this value at the > end of CPUState. This restores the minimum negative offset from TCG_AREG0 > for code generation. > > For the couple of uses within qom/cpu.c, add a pointer from the CPUState > object to the IcountDecr object within CPUNegativeOffsetState. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Alistair > --- > include/exec/cpu-all.h | 1 + > include/exec/cpu-defs.h | 3 ++- > include/exec/gen-icount.h | 16 ++++++++++------ > include/qom/cpu.h | 40 ++++++++++++++++++--------------------- > accel/tcg/cpu-exec.c | 23 +++++++++++----------- > accel/tcg/tcg-all.c | 6 ++---- > accel/tcg/translate-all.c | 8 ++++---- > cpus.c | 9 +++++---- > qom/cpu.c | 4 ++-- > 9 files changed, 56 insertions(+), 54 deletions(-) > > diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h > index 5ae83405c8..253dd1d9a5 100644 > --- a/include/exec/cpu-all.h > +++ b/include/exec/cpu-all.h > @@ -380,6 +380,7 @@ int cpu_exec(CPUState *cpu); > static inline void cpu_set_cpustate_pointers(ArchCPU *cpu) > { > cpu->parent_obj.env_ptr = &cpu->env; > + cpu->parent_obj.icount_decr_ptr = &cpu->neg.icount_decr; > } > > /** > diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h > index ad97991faf..3971910653 100644 > --- a/include/exec/cpu-defs.h > +++ b/include/exec/cpu-defs.h > @@ -33,6 +33,7 @@ > #include "exec/hwaddr.h" > #endif > #include "exec/memattrs.h" > +#include "qom/cpu.h" > > #include "cpu-param.h" > > @@ -232,7 +233,7 @@ typedef struct CPUTLB { > * before CPUArchState, as a field named "neg". > */ > typedef struct CPUNegativeOffsetState { > - /* Empty */ > + IcountDecr icount_decr; > } CPUNegativeOffsetState; > > #endif > diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h > index 9cfa6ccce5..f7669b6841 100644 > --- a/include/exec/gen-icount.h > +++ b/include/exec/gen-icount.h > @@ -5,8 +5,6 @@ > > /* Helpers for instruction counting code generation. */ > > -#define ENV_OFFSET offsetof(ArchCPU, env) > - > static TCGOp *icount_start_insn; > > static inline void gen_tb_start(TranslationBlock *tb) > @@ -21,7 +19,8 @@ static inline void gen_tb_start(TranslationBlock *tb) > } > > tcg_gen_ld_i32(count, cpu_env, > - -ENV_OFFSET + offsetof(CPUState, icount_decr.u32)); > + offsetof(ArchCPU, neg.icount_decr.u32) - > + offsetof(ArchCPU, env)); > > if (tb_cflags(tb) & CF_USE_ICOUNT) { > imm = tcg_temp_new_i32(); > @@ -39,7 +38,8 @@ static inline void gen_tb_start(TranslationBlock *tb) > > if (tb_cflags(tb) & CF_USE_ICOUNT) { > tcg_gen_st16_i32(count, cpu_env, > - -ENV_OFFSET + offsetof(CPUState, icount_decr.u16.low)); > + offsetof(ArchCPU, neg.icount_decr.u16.low) - > + offsetof(ArchCPU, env)); > } > > tcg_temp_free_i32(count); > @@ -60,14 +60,18 @@ static inline void gen_tb_end(TranslationBlock *tb, int num_insns) > static inline void gen_io_start(void) > { > TCGv_i32 tmp = tcg_const_i32(1); > - tcg_gen_st_i32(tmp, cpu_env, -ENV_OFFSET + offsetof(CPUState, can_do_io)); > + tcg_gen_st_i32(tmp, cpu_env, > + offsetof(ArchCPU, parent_obj.can_do_io) - > + offsetof(ArchCPU, env)); > tcg_temp_free_i32(tmp); > } > > static inline void gen_io_end(void) > { > TCGv_i32 tmp = tcg_const_i32(0); > - tcg_gen_st_i32(tmp, cpu_env, -ENV_OFFSET + offsetof(CPUState, can_do_io)); > + tcg_gen_st_i32(tmp, cpu_env, > + offsetof(ArchCPU, parent_obj.can_do_io) - > + offsetof(ArchCPU, env)); > tcg_temp_free_i32(tmp); > } > > diff --git a/include/qom/cpu.h b/include/qom/cpu.h > index 08abcbd3fe..0d8fc1caa6 100644 > --- a/include/qom/cpu.h > +++ b/include/qom/cpu.h > @@ -226,17 +226,25 @@ typedef struct CPUClass { > bool gdb_stop_before_watchpoint; > } CPUClass; > > +/* > + * Low 16 bits: number of cycles left, used only in icount mode. > + * High 16 bits: Set to -1 to force TCG to stop executing linked TBs > + * for this CPU and return to its top level loop (even in non-icount mode). > + * This allows a single read-compare-cbranch-write sequence to test > + * for both decrementer underflow and exceptions. > + */ > +typedef union IcountDecr { > + uint32_t u32; > + struct { > #ifdef HOST_WORDS_BIGENDIAN > -typedef struct icount_decr_u16 { > - uint16_t high; > - uint16_t low; > -} icount_decr_u16; > + uint16_t high; > + uint16_t low; > #else > -typedef struct icount_decr_u16 { > - uint16_t low; > - uint16_t high; > -} icount_decr_u16; > + uint16_t low; > + uint16_t high; > #endif > + } u16; > +} IcountDecr; > > typedef struct CPUBreakpoint { > vaddr pc; > @@ -308,11 +316,6 @@ struct qemu_work_item; > * @crash_occurred: Indicates the OS reported a crash (panic) for this CPU > * @singlestep_enabled: Flags for single-stepping. > * @icount_extra: Instructions until next timer event. > - * @icount_decr: Low 16 bits: number of cycles left, only used in icount mode. > - * High 16 bits: Set to -1 to force TCG to stop executing linked TBs for this > - * CPU and return to its top level loop (even in non-icount mode). > - * This allows a single read-compare-cbranch-write sequence to test > - * for both decrementer underflow and exceptions. > * @can_do_io: Nonzero if memory-mapped IO is safe. Deterministic execution > * requires that IO only be performed on the last instruction of a TB > * so that interrupts take effect immediately. > @@ -322,6 +325,7 @@ struct qemu_work_item; > * @as: Pointer to the first AddressSpace, for the convenience of targets which > * only have a single AddressSpace > * @env_ptr: Pointer to subclass-specific CPUArchState field. > + * @icount_decr_ptr: Pointer to IcountDecr field within subclass. > * @gdb_regs: Additional GDB registers. > * @gdb_num_regs: Number of total registers accessible to GDB. > * @gdb_num_g_regs: Number of registers in GDB 'g' packets. > @@ -380,6 +384,7 @@ struct CPUState { > MemoryRegion *memory; > > void *env_ptr; /* CPUArchState */ > + IcountDecr *icount_decr_ptr; > > /* Accessed in parallel; all accesses must be atomic */ > struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; > @@ -434,15 +439,6 @@ struct CPUState { > > bool ignore_memory_transaction_failures; > > - /* Note that this is accessed at the start of every TB via a negative > - offset from AREG0. Leave this field at the end so as to make the > - (absolute value) offset as small as possible. This reduces code > - size, especially for hosts without large memory offsets. */ > - union { > - uint32_t u32; > - icount_decr_u16 u16; > - } icount_decr; > - > struct hax_vcpu_state *hax_vcpu; > > int hvf_fd; > diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c > index 45ef41ebb2..032a62672e 100644 > --- a/accel/tcg/cpu-exec.c > +++ b/accel/tcg/cpu-exec.c > @@ -54,7 +54,7 @@ typedef struct SyncClocks { > #define MAX_DELAY_PRINT_RATE 2000000000LL > #define MAX_NB_PRINTS 100 > > -static void align_clocks(SyncClocks *sc, const CPUState *cpu) > +static void align_clocks(SyncClocks *sc, CPUState *cpu) > { > int64_t cpu_icount; > > @@ -62,7 +62,7 @@ static void align_clocks(SyncClocks *sc, const CPUState *cpu) > return; > } > > - cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low; > + cpu_icount = cpu->icount_extra + cpu_neg(cpu)->icount_decr.u16.low; > sc->diff_clk += cpu_icount_to_ns(sc->last_cpu_icount - cpu_icount); > sc->last_cpu_icount = cpu_icount; > > @@ -105,15 +105,15 @@ static void print_delay(const SyncClocks *sc) > } > } > > -static void init_delay_params(SyncClocks *sc, > - const CPUState *cpu) > +static void init_delay_params(SyncClocks *sc, CPUState *cpu) > { > if (!icount_align_option) { > return; > } > sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); > sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - sc->realtime_clock; > - sc->last_cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low; > + sc->last_cpu_icount > + = cpu->icount_extra + cpu_neg(cpu)->icount_decr.u16.low; > if (sc->diff_clk < max_delay) { > max_delay = sc->diff_clk; > } > @@ -467,7 +467,7 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret) > if (cpu->exception_index < 0) { > #ifndef CONFIG_USER_ONLY > if (replay_has_exception() > - && cpu->icount_decr.u16.low + cpu->icount_extra == 0) { > + && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) { > /* try to cause an exception pending in the log */ > cpu_exec_nocache(cpu, 1, tb_find(cpu, NULL, 0, curr_cflags()), true); > } > @@ -525,7 +525,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, > * Ensure zeroing happens before reading cpu->exit_request or > * cpu->interrupt_request (see also smp_wmb in cpu_exit()) > */ > - atomic_mb_set(&cpu->icount_decr.u16.high, 0); > + atomic_mb_set(&cpu_neg(cpu)->icount_decr.u16.high, 0); > > if (unlikely(atomic_read(&cpu->interrupt_request))) { > int interrupt_request; > @@ -596,8 +596,9 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, > } > > /* Finally, check if we need to exit to the main loop. */ > - if (unlikely(atomic_read(&cpu->exit_request) > - || (use_icount && cpu->icount_decr.u16.low + cpu->icount_extra == 0))) { > + if (unlikely(atomic_read(&cpu->exit_request)) > + || (use_icount > + && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0)) { > atomic_set(&cpu->exit_request, 0); > if (cpu->exception_index == -1) { > cpu->exception_index = EXCP_INTERRUPT; > @@ -624,7 +625,7 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb, > } > > *last_tb = NULL; > - insns_left = atomic_read(&cpu->icount_decr.u32); > + insns_left = atomic_read(&cpu_neg(cpu)->icount_decr.u32); > if (insns_left < 0) { > /* Something asked us to stop executing chained TBs; just > * continue round the main loop. Whatever requested the exit > @@ -643,7 +644,7 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb, > cpu_update_icount(cpu); > /* Refill decrementer and continue execution. */ > insns_left = MIN(0xffff, cpu->icount_budget); > - cpu->icount_decr.u16.low = insns_left; > + cpu_neg(cpu)->icount_decr.u16.low = insns_left; > cpu->icount_extra = cpu->icount_budget - insns_left; > if (!cpu->icount_extra) { > /* Execute any remaining instructions, then let the main loop > diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c > index 3d25bdcc17..9b215dcc5a 100644 > --- a/accel/tcg/tcg-all.c > +++ b/accel/tcg/tcg-all.c > @@ -28,13 +28,12 @@ > #include "sysemu/sysemu.h" > #include "qom/object.h" > #include "qemu-common.h" > -#include "qom/cpu.h" > +#include "cpu.h" > #include "sysemu/cpus.h" > #include "qemu/main-loop.h" > > unsigned long tcg_tb_size; > > -#ifndef CONFIG_USER_ONLY > /* mask must never be zero, except for A20 change call */ > static void tcg_handle_interrupt(CPUState *cpu, int mask) > { > @@ -51,7 +50,7 @@ static void tcg_handle_interrupt(CPUState *cpu, int mask) > if (!qemu_cpu_is_self(cpu)) { > qemu_cpu_kick(cpu); > } else { > - atomic_set(&cpu->icount_decr.u16.high, -1); > + atomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1); > if (use_icount && > !cpu->can_do_io > && (mask & ~old_mask) != 0) { > @@ -59,7 +58,6 @@ static void tcg_handle_interrupt(CPUState *cpu, int mask) > } > } > } > -#endif > > static int tcg_init(MachineState *ms) > { > diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c > index 52d94facf0..e24ee3a172 100644 > --- a/accel/tcg/translate-all.c > +++ b/accel/tcg/translate-all.c > @@ -364,7 +364,7 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, > assert(use_icount); > /* Reset the cycle counter to the start of the block > and shift if to the number of actually executed instructions */ > - cpu->icount_decr.u16.low += num_insns - i; > + cpu_neg(cpu)->icount_decr.u16.low += num_insns - i; > } > restore_state_to_opc(env, tb, data); > > @@ -2200,7 +2200,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) > if ((env->hflags & MIPS_HFLAG_BMASK) != 0 > && env->active_tc.PC != tb->pc) { > env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4); > - cpu->icount_decr.u16.low++; > + cpu_neg(cpu)->icount_decr.u16.low++; > env->hflags &= ~MIPS_HFLAG_BMASK; > n = 2; > } > @@ -2208,7 +2208,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) > if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0 > && env->pc != tb->pc) { > env->pc -= 2; > - cpu->icount_decr.u16.low++; > + cpu_neg(cpu)->icount_decr.u16.low++; > env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL); > n = 2; > } > @@ -2382,7 +2382,7 @@ void cpu_interrupt(CPUState *cpu, int mask) > { > g_assert(qemu_mutex_iothread_locked()); > cpu->interrupt_request |= mask; > - atomic_set(&cpu->icount_decr.u16.high, -1); > + atomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1); > } > > /* > diff --git a/cpus.c b/cpus.c > index e58e7ab0f6..150f5dd047 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -238,7 +238,8 @@ void qemu_tcg_configure(QemuOpts *opts, Error **errp) > */ > static int64_t cpu_get_icount_executed(CPUState *cpu) > { > - return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra); > + return (cpu->icount_budget - > + (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra)); > } > > /* > @@ -1386,12 +1387,12 @@ static void prepare_icount_for_run(CPUState *cpu) > * each vCPU execution. However u16.high can be raised > * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt > */ > - g_assert(cpu->icount_decr.u16.low == 0); > + g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0); > g_assert(cpu->icount_extra == 0); > > cpu->icount_budget = tcg_get_icount_limit(); > insns_left = MIN(0xffff, cpu->icount_budget); > - cpu->icount_decr.u16.low = insns_left; > + cpu_neg(cpu)->icount_decr.u16.low = insns_left; > cpu->icount_extra = cpu->icount_budget - insns_left; > > replay_mutex_lock(); > @@ -1405,7 +1406,7 @@ static void process_icount_data(CPUState *cpu) > cpu_update_icount(cpu); > > /* Reset the counters */ > - cpu->icount_decr.u16.low = 0; > + cpu_neg(cpu)->icount_decr.u16.low = 0; > cpu->icount_extra = 0; > cpu->icount_budget = 0; > > diff --git a/qom/cpu.c b/qom/cpu.c > index 3c5493c96c..6b4632abda 100644 > --- a/qom/cpu.c > +++ b/qom/cpu.c > @@ -115,7 +115,7 @@ void cpu_exit(CPUState *cpu) > atomic_set(&cpu->exit_request, 1); > /* Ensure cpu_exec will see the exit request after TCG has exited. */ > smp_wmb(); > - atomic_set(&cpu->icount_decr.u16.high, -1); > + atomic_set(&cpu->icount_decr_ptr->u16.high, -1); > } > > int cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu, > @@ -264,7 +264,7 @@ static void cpu_common_reset(CPUState *cpu) > cpu->mem_io_pc = 0; > cpu->mem_io_vaddr = 0; > cpu->icount_extra = 0; > - atomic_set(&cpu->icount_decr.u32, 0); > + atomic_set(&cpu->icount_decr_ptr->u32, 0); > cpu->can_do_io = 1; > cpu->exception_index = -1; > cpu->crash_occurred = false; > -- > 2.17.1 > >
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index 5ae83405c8..253dd1d9a5 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -380,6 +380,7 @@ int cpu_exec(CPUState *cpu); static inline void cpu_set_cpustate_pointers(ArchCPU *cpu) { cpu->parent_obj.env_ptr = &cpu->env; + cpu->parent_obj.icount_decr_ptr = &cpu->neg.icount_decr; } /** diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h index ad97991faf..3971910653 100644 --- a/include/exec/cpu-defs.h +++ b/include/exec/cpu-defs.h @@ -33,6 +33,7 @@ #include "exec/hwaddr.h" #endif #include "exec/memattrs.h" +#include "qom/cpu.h" #include "cpu-param.h" @@ -232,7 +233,7 @@ typedef struct CPUTLB { * before CPUArchState, as a field named "neg". */ typedef struct CPUNegativeOffsetState { - /* Empty */ + IcountDecr icount_decr; } CPUNegativeOffsetState; #endif diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h index 9cfa6ccce5..f7669b6841 100644 --- a/include/exec/gen-icount.h +++ b/include/exec/gen-icount.h @@ -5,8 +5,6 @@ /* Helpers for instruction counting code generation. */ -#define ENV_OFFSET offsetof(ArchCPU, env) - static TCGOp *icount_start_insn; static inline void gen_tb_start(TranslationBlock *tb) @@ -21,7 +19,8 @@ static inline void gen_tb_start(TranslationBlock *tb) } tcg_gen_ld_i32(count, cpu_env, - -ENV_OFFSET + offsetof(CPUState, icount_decr.u32)); + offsetof(ArchCPU, neg.icount_decr.u32) - + offsetof(ArchCPU, env)); if (tb_cflags(tb) & CF_USE_ICOUNT) { imm = tcg_temp_new_i32(); @@ -39,7 +38,8 @@ static inline void gen_tb_start(TranslationBlock *tb) if (tb_cflags(tb) & CF_USE_ICOUNT) { tcg_gen_st16_i32(count, cpu_env, - -ENV_OFFSET + offsetof(CPUState, icount_decr.u16.low)); + offsetof(ArchCPU, neg.icount_decr.u16.low) - + offsetof(ArchCPU, env)); } tcg_temp_free_i32(count); @@ -60,14 +60,18 @@ static inline void gen_tb_end(TranslationBlock *tb, int num_insns) static inline void gen_io_start(void) { TCGv_i32 tmp = tcg_const_i32(1); - tcg_gen_st_i32(tmp, cpu_env, -ENV_OFFSET + offsetof(CPUState, can_do_io)); + tcg_gen_st_i32(tmp, cpu_env, + offsetof(ArchCPU, parent_obj.can_do_io) - + offsetof(ArchCPU, env)); tcg_temp_free_i32(tmp); } static inline void gen_io_end(void) { TCGv_i32 tmp = tcg_const_i32(0); - tcg_gen_st_i32(tmp, cpu_env, -ENV_OFFSET + offsetof(CPUState, can_do_io)); + tcg_gen_st_i32(tmp, cpu_env, + offsetof(ArchCPU, parent_obj.can_do_io) - + offsetof(ArchCPU, env)); tcg_temp_free_i32(tmp); } diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 08abcbd3fe..0d8fc1caa6 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -226,17 +226,25 @@ typedef struct CPUClass { bool gdb_stop_before_watchpoint; } CPUClass; +/* + * Low 16 bits: number of cycles left, used only in icount mode. + * High 16 bits: Set to -1 to force TCG to stop executing linked TBs + * for this CPU and return to its top level loop (even in non-icount mode). + * This allows a single read-compare-cbranch-write sequence to test + * for both decrementer underflow and exceptions. + */ +typedef union IcountDecr { + uint32_t u32; + struct { #ifdef HOST_WORDS_BIGENDIAN -typedef struct icount_decr_u16 { - uint16_t high; - uint16_t low; -} icount_decr_u16; + uint16_t high; + uint16_t low; #else -typedef struct icount_decr_u16 { - uint16_t low; - uint16_t high; -} icount_decr_u16; + uint16_t low; + uint16_t high; #endif + } u16; +} IcountDecr; typedef struct CPUBreakpoint { vaddr pc; @@ -308,11 +316,6 @@ struct qemu_work_item; * @crash_occurred: Indicates the OS reported a crash (panic) for this CPU * @singlestep_enabled: Flags for single-stepping. * @icount_extra: Instructions until next timer event. - * @icount_decr: Low 16 bits: number of cycles left, only used in icount mode. - * High 16 bits: Set to -1 to force TCG to stop executing linked TBs for this - * CPU and return to its top level loop (even in non-icount mode). - * This allows a single read-compare-cbranch-write sequence to test - * for both decrementer underflow and exceptions. * @can_do_io: Nonzero if memory-mapped IO is safe. Deterministic execution * requires that IO only be performed on the last instruction of a TB * so that interrupts take effect immediately. @@ -322,6 +325,7 @@ struct qemu_work_item; * @as: Pointer to the first AddressSpace, for the convenience of targets which * only have a single AddressSpace * @env_ptr: Pointer to subclass-specific CPUArchState field. + * @icount_decr_ptr: Pointer to IcountDecr field within subclass. * @gdb_regs: Additional GDB registers. * @gdb_num_regs: Number of total registers accessible to GDB. * @gdb_num_g_regs: Number of registers in GDB 'g' packets. @@ -380,6 +384,7 @@ struct CPUState { MemoryRegion *memory; void *env_ptr; /* CPUArchState */ + IcountDecr *icount_decr_ptr; /* Accessed in parallel; all accesses must be atomic */ struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; @@ -434,15 +439,6 @@ struct CPUState { bool ignore_memory_transaction_failures; - /* Note that this is accessed at the start of every TB via a negative - offset from AREG0. Leave this field at the end so as to make the - (absolute value) offset as small as possible. This reduces code - size, especially for hosts without large memory offsets. */ - union { - uint32_t u32; - icount_decr_u16 u16; - } icount_decr; - struct hax_vcpu_state *hax_vcpu; int hvf_fd; diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 45ef41ebb2..032a62672e 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -54,7 +54,7 @@ typedef struct SyncClocks { #define MAX_DELAY_PRINT_RATE 2000000000LL #define MAX_NB_PRINTS 100 -static void align_clocks(SyncClocks *sc, const CPUState *cpu) +static void align_clocks(SyncClocks *sc, CPUState *cpu) { int64_t cpu_icount; @@ -62,7 +62,7 @@ static void align_clocks(SyncClocks *sc, const CPUState *cpu) return; } - cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low; + cpu_icount = cpu->icount_extra + cpu_neg(cpu)->icount_decr.u16.low; sc->diff_clk += cpu_icount_to_ns(sc->last_cpu_icount - cpu_icount); sc->last_cpu_icount = cpu_icount; @@ -105,15 +105,15 @@ static void print_delay(const SyncClocks *sc) } } -static void init_delay_params(SyncClocks *sc, - const CPUState *cpu) +static void init_delay_params(SyncClocks *sc, CPUState *cpu) { if (!icount_align_option) { return; } sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - sc->realtime_clock; - sc->last_cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low; + sc->last_cpu_icount + = cpu->icount_extra + cpu_neg(cpu)->icount_decr.u16.low; if (sc->diff_clk < max_delay) { max_delay = sc->diff_clk; } @@ -467,7 +467,7 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret) if (cpu->exception_index < 0) { #ifndef CONFIG_USER_ONLY if (replay_has_exception() - && cpu->icount_decr.u16.low + cpu->icount_extra == 0) { + && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) { /* try to cause an exception pending in the log */ cpu_exec_nocache(cpu, 1, tb_find(cpu, NULL, 0, curr_cflags()), true); } @@ -525,7 +525,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, * Ensure zeroing happens before reading cpu->exit_request or * cpu->interrupt_request (see also smp_wmb in cpu_exit()) */ - atomic_mb_set(&cpu->icount_decr.u16.high, 0); + atomic_mb_set(&cpu_neg(cpu)->icount_decr.u16.high, 0); if (unlikely(atomic_read(&cpu->interrupt_request))) { int interrupt_request; @@ -596,8 +596,9 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, } /* Finally, check if we need to exit to the main loop. */ - if (unlikely(atomic_read(&cpu->exit_request) - || (use_icount && cpu->icount_decr.u16.low + cpu->icount_extra == 0))) { + if (unlikely(atomic_read(&cpu->exit_request)) + || (use_icount + && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0)) { atomic_set(&cpu->exit_request, 0); if (cpu->exception_index == -1) { cpu->exception_index = EXCP_INTERRUPT; @@ -624,7 +625,7 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb, } *last_tb = NULL; - insns_left = atomic_read(&cpu->icount_decr.u32); + insns_left = atomic_read(&cpu_neg(cpu)->icount_decr.u32); if (insns_left < 0) { /* Something asked us to stop executing chained TBs; just * continue round the main loop. Whatever requested the exit @@ -643,7 +644,7 @@ static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb, cpu_update_icount(cpu); /* Refill decrementer and continue execution. */ insns_left = MIN(0xffff, cpu->icount_budget); - cpu->icount_decr.u16.low = insns_left; + cpu_neg(cpu)->icount_decr.u16.low = insns_left; cpu->icount_extra = cpu->icount_budget - insns_left; if (!cpu->icount_extra) { /* Execute any remaining instructions, then let the main loop diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c index 3d25bdcc17..9b215dcc5a 100644 --- a/accel/tcg/tcg-all.c +++ b/accel/tcg/tcg-all.c @@ -28,13 +28,12 @@ #include "sysemu/sysemu.h" #include "qom/object.h" #include "qemu-common.h" -#include "qom/cpu.h" +#include "cpu.h" #include "sysemu/cpus.h" #include "qemu/main-loop.h" unsigned long tcg_tb_size; -#ifndef CONFIG_USER_ONLY /* mask must never be zero, except for A20 change call */ static void tcg_handle_interrupt(CPUState *cpu, int mask) { @@ -51,7 +50,7 @@ static void tcg_handle_interrupt(CPUState *cpu, int mask) if (!qemu_cpu_is_self(cpu)) { qemu_cpu_kick(cpu); } else { - atomic_set(&cpu->icount_decr.u16.high, -1); + atomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1); if (use_icount && !cpu->can_do_io && (mask & ~old_mask) != 0) { @@ -59,7 +58,6 @@ static void tcg_handle_interrupt(CPUState *cpu, int mask) } } } -#endif static int tcg_init(MachineState *ms) { diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 52d94facf0..e24ee3a172 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -364,7 +364,7 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, assert(use_icount); /* Reset the cycle counter to the start of the block and shift if to the number of actually executed instructions */ - cpu->icount_decr.u16.low += num_insns - i; + cpu_neg(cpu)->icount_decr.u16.low += num_insns - i; } restore_state_to_opc(env, tb, data); @@ -2200,7 +2200,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && env->active_tc.PC != tb->pc) { env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4); - cpu->icount_decr.u16.low++; + cpu_neg(cpu)->icount_decr.u16.low++; env->hflags &= ~MIPS_HFLAG_BMASK; n = 2; } @@ -2208,7 +2208,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0 && env->pc != tb->pc) { env->pc -= 2; - cpu->icount_decr.u16.low++; + cpu_neg(cpu)->icount_decr.u16.low++; env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL); n = 2; } @@ -2382,7 +2382,7 @@ void cpu_interrupt(CPUState *cpu, int mask) { g_assert(qemu_mutex_iothread_locked()); cpu->interrupt_request |= mask; - atomic_set(&cpu->icount_decr.u16.high, -1); + atomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1); } /* diff --git a/cpus.c b/cpus.c index e58e7ab0f6..150f5dd047 100644 --- a/cpus.c +++ b/cpus.c @@ -238,7 +238,8 @@ void qemu_tcg_configure(QemuOpts *opts, Error **errp) */ static int64_t cpu_get_icount_executed(CPUState *cpu) { - return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra); + return (cpu->icount_budget - + (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra)); } /* @@ -1386,12 +1387,12 @@ static void prepare_icount_for_run(CPUState *cpu) * each vCPU execution. However u16.high can be raised * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt */ - g_assert(cpu->icount_decr.u16.low == 0); + g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0); g_assert(cpu->icount_extra == 0); cpu->icount_budget = tcg_get_icount_limit(); insns_left = MIN(0xffff, cpu->icount_budget); - cpu->icount_decr.u16.low = insns_left; + cpu_neg(cpu)->icount_decr.u16.low = insns_left; cpu->icount_extra = cpu->icount_budget - insns_left; replay_mutex_lock(); @@ -1405,7 +1406,7 @@ static void process_icount_data(CPUState *cpu) cpu_update_icount(cpu); /* Reset the counters */ - cpu->icount_decr.u16.low = 0; + cpu_neg(cpu)->icount_decr.u16.low = 0; cpu->icount_extra = 0; cpu->icount_budget = 0; diff --git a/qom/cpu.c b/qom/cpu.c index 3c5493c96c..6b4632abda 100644 --- a/qom/cpu.c +++ b/qom/cpu.c @@ -115,7 +115,7 @@ void cpu_exit(CPUState *cpu) atomic_set(&cpu->exit_request, 1); /* Ensure cpu_exec will see the exit request after TCG has exited. */ smp_wmb(); - atomic_set(&cpu->icount_decr.u16.high, -1); + atomic_set(&cpu->icount_decr_ptr->u16.high, -1); } int cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu, @@ -264,7 +264,7 @@ static void cpu_common_reset(CPUState *cpu) cpu->mem_io_pc = 0; cpu->mem_io_vaddr = 0; cpu->icount_extra = 0; - atomic_set(&cpu->icount_decr.u32, 0); + atomic_set(&cpu->icount_decr_ptr->u32, 0); cpu->can_do_io = 1; cpu->exception_index = -1; cpu->crash_occurred = false;
Amusingly, we had already ignored the comment to keep this value at the end of CPUState. This restores the minimum negative offset from TCG_AREG0 for code generation. For the couple of uses within qom/cpu.c, add a pointer from the CPUState object to the IcountDecr object within CPUNegativeOffsetState. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- include/exec/cpu-all.h | 1 + include/exec/cpu-defs.h | 3 ++- include/exec/gen-icount.h | 16 ++++++++++------ include/qom/cpu.h | 40 ++++++++++++++++++--------------------- accel/tcg/cpu-exec.c | 23 +++++++++++----------- accel/tcg/tcg-all.c | 6 ++---- accel/tcg/translate-all.c | 8 ++++---- cpus.c | 9 +++++---- qom/cpu.c | 4 ++-- 9 files changed, 56 insertions(+), 54 deletions(-) -- 2.17.1