Message ID | 20220925105124.82033-15-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | tcg: CPUTLBEntryFull and TARGET_TB_PCREL | expand |
Richard Henderson <richard.henderson@linaro.org> writes: > Wrap the bare TranslationBlock pointer into a structure. > > Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > include/hw/core/cpu.h | 8 ++++++-- > accel/tcg/cpu-exec.c | 9 ++++++--- > accel/tcg/cputlb.c | 2 +- > accel/tcg/translate-all.c | 4 ++-- > 4 files changed, 15 insertions(+), 8 deletions(-) > > diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h > index 9e47184513..ee5b75dea0 100644 > --- a/include/hw/core/cpu.h > +++ b/include/hw/core/cpu.h > @@ -232,6 +232,10 @@ struct hvf_vcpu_state; > #define TB_JMP_CACHE_BITS 12 > #define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS) > > +typedef struct { > + TranslationBlock *tb; > +} CPUJumpCache; > + I don't quite follow whats going on here. I see we add vaddr pc in a later patch but I don't quite see why a cache for looking up TBs gets a sidechan value added later. Is this because the vaddr will no longer match the tb->pc? Maybe a comment on the structure is needed? > /* work queue */ > > /* The union type allows passing of 64 bit target pointers on 32 bit > @@ -361,7 +365,7 @@ struct CPUState { > IcountDecr *icount_decr_ptr; > > /* Accessed in parallel; all accesses must be atomic */ > - TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; > + CPUJumpCache tb_jmp_cache[TB_JMP_CACHE_SIZE]; > > struct GDBRegisterState *gdb_regs; > int gdb_num_regs; > @@ -452,7 +456,7 @@ static inline void cpu_tb_jmp_cache_clear(CPUState *cpu) > unsigned int i; > > for (i = 0; i < TB_JMP_CACHE_SIZE; i++) { > - qatomic_set(&cpu->tb_jmp_cache[i], NULL); > + qatomic_set(&cpu->tb_jmp_cache[i].tb, NULL); > } > } > > diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c > index dd58a144a8..c6283d5798 100644 > --- a/accel/tcg/cpu-exec.c > +++ b/accel/tcg/cpu-exec.c > @@ -252,7 +252,7 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, > tcg_debug_assert(!(cflags & CF_INVALID)); > > hash = tb_jmp_cache_hash_func(pc); > - tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]); > + tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash].tb); > > if (likely(tb && > tb->pc == pc && > @@ -266,7 +266,7 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, > if (tb == NULL) { > return NULL; > } > - qatomic_set(&cpu->tb_jmp_cache[hash], tb); > + qatomic_set(&cpu->tb_jmp_cache[hash].tb, tb); > return tb; > } > > @@ -987,6 +987,8 @@ int cpu_exec(CPUState *cpu) > > tb = tb_lookup(cpu, pc, cs_base, flags, cflags); > if (tb == NULL) { > + uint32_t h; > + > mmap_lock(); > tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); > mmap_unlock(); > @@ -994,7 +996,8 @@ int cpu_exec(CPUState *cpu) > * We add the TB in the virtual pc hash table > * for the fast lookup > */ > - qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); > + h = tb_jmp_cache_hash_func(pc); > + qatomic_set(&cpu->tb_jmp_cache[h].tb, tb); > } > > #ifndef CONFIG_USER_ONLY > diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c > index f5e6ca2da2..fb8f3087f1 100644 > --- a/accel/tcg/cputlb.c > +++ b/accel/tcg/cputlb.c > @@ -103,7 +103,7 @@ static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr) > unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr); > > for (i = 0; i < TB_JMP_PAGE_SIZE; i++) { > - qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL); > + qatomic_set(&cpu->tb_jmp_cache[i0 + i].tb, NULL); > } > } > > diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c > index f429d33981..efa479ccf3 100644 > --- a/accel/tcg/translate-all.c > +++ b/accel/tcg/translate-all.c > @@ -1187,8 +1187,8 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) > /* remove the TB from the hash list */ > h = tb_jmp_cache_hash_func(tb->pc); > CPU_FOREACH(cpu) { > - if (qatomic_read(&cpu->tb_jmp_cache[h]) == tb) { > - qatomic_set(&cpu->tb_jmp_cache[h], NULL); > + if (qatomic_read(&cpu->tb_jmp_cache[h].tb) == tb) { > + qatomic_set(&cpu->tb_jmp_cache[h].tb, NULL); > } > }
On 9/29/22 06:46, Alex Bennée wrote: > > Richard Henderson <richard.henderson@linaro.org> writes: > >> Wrap the bare TranslationBlock pointer into a structure. >> >> Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> >> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> >> --- >> include/hw/core/cpu.h | 8 ++++++-- >> accel/tcg/cpu-exec.c | 9 ++++++--- >> accel/tcg/cputlb.c | 2 +- >> accel/tcg/translate-all.c | 4 ++-- >> 4 files changed, 15 insertions(+), 8 deletions(-) >> >> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h >> index 9e47184513..ee5b75dea0 100644 >> --- a/include/hw/core/cpu.h >> +++ b/include/hw/core/cpu.h >> @@ -232,6 +232,10 @@ struct hvf_vcpu_state; >> #define TB_JMP_CACHE_BITS 12 >> #define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS) >> >> +typedef struct { >> + TranslationBlock *tb; >> +} CPUJumpCache; >> + > > I don't quite follow whats going on here. I see we add vaddr pc in a > later patch but I don't quite see why a cache for looking up TBs gets a > sidechan value added later. > > Is this because the vaddr will no longer match the tb->pc? Maybe a > comment on the structure is needed? Correct, there will be no tb->pc, so the cpu has to remember the virtual address itself. This patch only wraps the current pointer into a structure. r~ > >> /* work queue */ >> >> /* The union type allows passing of 64 bit target pointers on 32 bit >> @@ -361,7 +365,7 @@ struct CPUState { >> IcountDecr *icount_decr_ptr; >> >> /* Accessed in parallel; all accesses must be atomic */ >> - TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; >> + CPUJumpCache tb_jmp_cache[TB_JMP_CACHE_SIZE]; >> >> struct GDBRegisterState *gdb_regs; >> int gdb_num_regs; >> @@ -452,7 +456,7 @@ static inline void cpu_tb_jmp_cache_clear(CPUState *cpu) >> unsigned int i; >> >> for (i = 0; i < TB_JMP_CACHE_SIZE; i++) { >> - qatomic_set(&cpu->tb_jmp_cache[i], NULL); >> + qatomic_set(&cpu->tb_jmp_cache[i].tb, NULL); >> } >> } >> >> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c >> index dd58a144a8..c6283d5798 100644 >> --- a/accel/tcg/cpu-exec.c >> +++ b/accel/tcg/cpu-exec.c >> @@ -252,7 +252,7 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, >> tcg_debug_assert(!(cflags & CF_INVALID)); >> >> hash = tb_jmp_cache_hash_func(pc); >> - tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]); >> + tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash].tb); >> >> if (likely(tb && >> tb->pc == pc && >> @@ -266,7 +266,7 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, >> if (tb == NULL) { >> return NULL; >> } >> - qatomic_set(&cpu->tb_jmp_cache[hash], tb); >> + qatomic_set(&cpu->tb_jmp_cache[hash].tb, tb); >> return tb; >> } >> >> @@ -987,6 +987,8 @@ int cpu_exec(CPUState *cpu) >> >> tb = tb_lookup(cpu, pc, cs_base, flags, cflags); >> if (tb == NULL) { >> + uint32_t h; >> + >> mmap_lock(); >> tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); >> mmap_unlock(); >> @@ -994,7 +996,8 @@ int cpu_exec(CPUState *cpu) >> * We add the TB in the virtual pc hash table >> * for the fast lookup >> */ >> - qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); >> + h = tb_jmp_cache_hash_func(pc); >> + qatomic_set(&cpu->tb_jmp_cache[h].tb, tb); >> } >> >> #ifndef CONFIG_USER_ONLY >> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c >> index f5e6ca2da2..fb8f3087f1 100644 >> --- a/accel/tcg/cputlb.c >> +++ b/accel/tcg/cputlb.c >> @@ -103,7 +103,7 @@ static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr) >> unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr); >> >> for (i = 0; i < TB_JMP_PAGE_SIZE; i++) { >> - qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL); >> + qatomic_set(&cpu->tb_jmp_cache[i0 + i].tb, NULL); >> } >> } >> >> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c >> index f429d33981..efa479ccf3 100644 >> --- a/accel/tcg/translate-all.c >> +++ b/accel/tcg/translate-all.c >> @@ -1187,8 +1187,8 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) >> /* remove the TB from the hash list */ >> h = tb_jmp_cache_hash_func(tb->pc); >> CPU_FOREACH(cpu) { >> - if (qatomic_read(&cpu->tb_jmp_cache[h]) == tb) { >> - qatomic_set(&cpu->tb_jmp_cache[h], NULL); >> + if (qatomic_read(&cpu->tb_jmp_cache[h].tb) == tb) { >> + qatomic_set(&cpu->tb_jmp_cache[h].tb, NULL); >> } >> } > >
Richard Henderson <richard.henderson@linaro.org> writes: > On 9/29/22 06:46, Alex Bennée wrote: >> Richard Henderson <richard.henderson@linaro.org> writes: >> >>> Wrap the bare TranslationBlock pointer into a structure. >>> >>> Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> >>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> >>> --- >>> include/hw/core/cpu.h | 8 ++++++-- >>> accel/tcg/cpu-exec.c | 9 ++++++--- >>> accel/tcg/cputlb.c | 2 +- >>> accel/tcg/translate-all.c | 4 ++-- >>> 4 files changed, 15 insertions(+), 8 deletions(-) >>> >>> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h >>> index 9e47184513..ee5b75dea0 100644 >>> --- a/include/hw/core/cpu.h >>> +++ b/include/hw/core/cpu.h >>> @@ -232,6 +232,10 @@ struct hvf_vcpu_state; >>> #define TB_JMP_CACHE_BITS 12 >>> #define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS) >>> +typedef struct { >>> + TranslationBlock *tb; >>> +} CPUJumpCache; >>> + >> I don't quite follow whats going on here. I see we add vaddr pc in a >> later patch but I don't quite see why a cache for looking up TBs gets a >> sidechan value added later. >> Is this because the vaddr will no longer match the tb->pc? Maybe a >> comment on the structure is needed? > > Correct, there will be no tb->pc, so the cpu has to remember the virtual address itself. > > This patch only wraps the current pointer into a structure. Sure - however we don't expand the comment when vaddr is added later. I'm also concerned that now we have two fields there is scope for skew. I guess the acquire/release semantics are to ensure we may fail safe but never get a false positive? > > > r~ > >> >>> /* work queue */ >>> /* The union type allows passing of 64 bit target pointers on >>> 32 bit >>> @@ -361,7 +365,7 @@ struct CPUState { >>> IcountDecr *icount_decr_ptr; >>> /* Accessed in parallel; all accesses must be atomic */ >>> - TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; >>> + CPUJumpCache tb_jmp_cache[TB_JMP_CACHE_SIZE]; >>> struct GDBRegisterState *gdb_regs; >>> int gdb_num_regs; >>> @@ -452,7 +456,7 @@ static inline void cpu_tb_jmp_cache_clear(CPUState *cpu) >>> unsigned int i; >>> for (i = 0; i < TB_JMP_CACHE_SIZE; i++) { >>> - qatomic_set(&cpu->tb_jmp_cache[i], NULL); >>> + qatomic_set(&cpu->tb_jmp_cache[i].tb, NULL); >>> } >>> } >>> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c >>> index dd58a144a8..c6283d5798 100644 >>> --- a/accel/tcg/cpu-exec.c >>> +++ b/accel/tcg/cpu-exec.c >>> @@ -252,7 +252,7 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, >>> tcg_debug_assert(!(cflags & CF_INVALID)); >>> hash = tb_jmp_cache_hash_func(pc); >>> - tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]); >>> + tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash].tb); >>> if (likely(tb && >>> tb->pc == pc && >>> @@ -266,7 +266,7 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, >>> if (tb == NULL) { >>> return NULL; >>> } >>> - qatomic_set(&cpu->tb_jmp_cache[hash], tb); >>> + qatomic_set(&cpu->tb_jmp_cache[hash].tb, tb); >>> return tb; >>> } >>> @@ -987,6 +987,8 @@ int cpu_exec(CPUState *cpu) >>> tb = tb_lookup(cpu, pc, cs_base, flags, cflags); >>> if (tb == NULL) { >>> + uint32_t h; >>> + >>> mmap_lock(); >>> tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); >>> mmap_unlock(); >>> @@ -994,7 +996,8 @@ int cpu_exec(CPUState *cpu) >>> * We add the TB in the virtual pc hash table >>> * for the fast lookup >>> */ >>> - qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); >>> + h = tb_jmp_cache_hash_func(pc); >>> + qatomic_set(&cpu->tb_jmp_cache[h].tb, tb); >>> } >>> #ifndef CONFIG_USER_ONLY >>> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c >>> index f5e6ca2da2..fb8f3087f1 100644 >>> --- a/accel/tcg/cputlb.c >>> +++ b/accel/tcg/cputlb.c >>> @@ -103,7 +103,7 @@ static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr) >>> unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr); >>> for (i = 0; i < TB_JMP_PAGE_SIZE; i++) { >>> - qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL); >>> + qatomic_set(&cpu->tb_jmp_cache[i0 + i].tb, NULL); >>> } >>> } >>> diff --git a/accel/tcg/translate-all.c >>> b/accel/tcg/translate-all.c >>> index f429d33981..efa479ccf3 100644 >>> --- a/accel/tcg/translate-all.c >>> +++ b/accel/tcg/translate-all.c >>> @@ -1187,8 +1187,8 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) >>> /* remove the TB from the hash list */ >>> h = tb_jmp_cache_hash_func(tb->pc); >>> CPU_FOREACH(cpu) { >>> - if (qatomic_read(&cpu->tb_jmp_cache[h]) == tb) { >>> - qatomic_set(&cpu->tb_jmp_cache[h], NULL); >>> + if (qatomic_read(&cpu->tb_jmp_cache[h].tb) == tb) { >>> + qatomic_set(&cpu->tb_jmp_cache[h].tb, NULL); >>> } >>> } >>
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h index 9e47184513..ee5b75dea0 100644 --- a/include/hw/core/cpu.h +++ b/include/hw/core/cpu.h @@ -232,6 +232,10 @@ struct hvf_vcpu_state; #define TB_JMP_CACHE_BITS 12 #define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS) +typedef struct { + TranslationBlock *tb; +} CPUJumpCache; + /* work queue */ /* The union type allows passing of 64 bit target pointers on 32 bit @@ -361,7 +365,7 @@ struct CPUState { IcountDecr *icount_decr_ptr; /* Accessed in parallel; all accesses must be atomic */ - TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; + CPUJumpCache tb_jmp_cache[TB_JMP_CACHE_SIZE]; struct GDBRegisterState *gdb_regs; int gdb_num_regs; @@ -452,7 +456,7 @@ static inline void cpu_tb_jmp_cache_clear(CPUState *cpu) unsigned int i; for (i = 0; i < TB_JMP_CACHE_SIZE; i++) { - qatomic_set(&cpu->tb_jmp_cache[i], NULL); + qatomic_set(&cpu->tb_jmp_cache[i].tb, NULL); } } diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index dd58a144a8..c6283d5798 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -252,7 +252,7 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, tcg_debug_assert(!(cflags & CF_INVALID)); hash = tb_jmp_cache_hash_func(pc); - tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash]); + tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash].tb); if (likely(tb && tb->pc == pc && @@ -266,7 +266,7 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, if (tb == NULL) { return NULL; } - qatomic_set(&cpu->tb_jmp_cache[hash], tb); + qatomic_set(&cpu->tb_jmp_cache[hash].tb, tb); return tb; } @@ -987,6 +987,8 @@ int cpu_exec(CPUState *cpu) tb = tb_lookup(cpu, pc, cs_base, flags, cflags); if (tb == NULL) { + uint32_t h; + mmap_lock(); tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); mmap_unlock(); @@ -994,7 +996,8 @@ int cpu_exec(CPUState *cpu) * We add the TB in the virtual pc hash table * for the fast lookup */ - qatomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb); + h = tb_jmp_cache_hash_func(pc); + qatomic_set(&cpu->tb_jmp_cache[h].tb, tb); } #ifndef CONFIG_USER_ONLY diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index f5e6ca2da2..fb8f3087f1 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -103,7 +103,7 @@ static void tb_jmp_cache_clear_page(CPUState *cpu, target_ulong page_addr) unsigned int i, i0 = tb_jmp_cache_hash_page(page_addr); for (i = 0; i < TB_JMP_PAGE_SIZE; i++) { - qatomic_set(&cpu->tb_jmp_cache[i0 + i], NULL); + qatomic_set(&cpu->tb_jmp_cache[i0 + i].tb, NULL); } } diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index f429d33981..efa479ccf3 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1187,8 +1187,8 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) /* remove the TB from the hash list */ h = tb_jmp_cache_hash_func(tb->pc); CPU_FOREACH(cpu) { - if (qatomic_read(&cpu->tb_jmp_cache[h]) == tb) { - qatomic_set(&cpu->tb_jmp_cache[h], NULL); + if (qatomic_read(&cpu->tb_jmp_cache[h].tb) == tb) { + qatomic_set(&cpu->tb_jmp_cache[h].tb, NULL); } }