Message ID | 20220812180806.2128593-15-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | accel/tcg: minimize tlb lookups during translate + user-only PROT_EXEC fixes | expand |
On Fri, 2022-08-12 at 11:07 -0700, Richard Henderson wrote: > We will want to re-use the result of get_page_addr_code > beyond the scope of tb_lookup. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > accel/tcg/cpu-exec.c | 34 ++++++++++++++++++++++++---------- > 1 file changed, 24 insertions(+), 10 deletions(-) > > diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c > index a9b7053274..889355b341 100644 > --- a/accel/tcg/cpu-exec.c > +++ b/accel/tcg/cpu-exec.c > @@ -209,13 +209,12 @@ static bool tb_lookup_cmp(const void *p, const > void *d) > } > > /* Might cause an exception, so have a longjmp destination ready */ > -static TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, > - target_ulong cs_base, > +static TranslationBlock *tb_lookup(CPUState *cpu, tb_page_addr_t > phys_pc, > + target_ulong pc, target_ulong > cs_base, > uint32_t flags, uint32_t cflags) > { > CPUArchState *env = cpu->env_ptr; > TranslationBlock *tb; > - tb_page_addr_t phys_pc; > struct tb_desc desc; > uint32_t jmp_hash, tb_hash; > > @@ -240,11 +239,8 @@ static TranslationBlock *tb_lookup(CPUState > *cpu, target_ulong pc, > desc.cflags = cflags; > desc.trace_vcpu_dstate = *cpu->trace_dstate; > desc.pc = pc; > - phys_pc = get_page_addr_code(desc.env, pc); > - if (phys_pc == -1) { > - return NULL; > - } > desc.phys_page1 = phys_pc & TARGET_PAGE_MASK; > + > tb_hash = tb_hash_func(phys_pc, pc, flags, cflags, *cpu- > >trace_dstate); > tb = qht_lookup_custom(&tb_ctx.htable, &desc, tb_hash, > tb_lookup_cmp); > if (tb == NULL) { > @@ -371,6 +367,7 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState > *env) > TranslationBlock *tb; > target_ulong cs_base, pc; > uint32_t flags, cflags; > + tb_page_addr_t phys_pc; > > cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); > > @@ -379,7 +376,12 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState > *env) > cpu_loop_exit(cpu); > } > > - tb = tb_lookup(cpu, pc, cs_base, flags, cflags); > + phys_pc = get_page_addr_code(env, pc); > + if (phys_pc == -1) { > + return tcg_code_gen_epilogue; > + } > + > + tb = tb_lookup(cpu, phys_pc, pc, cs_base, flags, cflags); > if (tb == NULL) { > return tcg_code_gen_epilogue; > } > @@ -482,6 +484,7 @@ void cpu_exec_step_atomic(CPUState *cpu) > TranslationBlock *tb; > target_ulong cs_base, pc; > uint32_t flags, cflags; > + tb_page_addr_t phys_pc; > int tb_exit; > > if (sigsetjmp(cpu->jmp_env, 0) == 0) { > @@ -504,7 +507,12 @@ void cpu_exec_step_atomic(CPUState *cpu) > * Any breakpoint for this insn will have been recognized > earlier. > */ > > - tb = tb_lookup(cpu, pc, cs_base, flags, cflags); > + phys_pc = get_page_addr_code(env, pc); > + if (phys_pc == -1) { > + tb = NULL; > + } else { > + tb = tb_lookup(cpu, phys_pc, pc, cs_base, flags, > cflags); > + } > if (tb == NULL) { > mmap_lock(); > tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); > @@ -949,6 +957,7 @@ int cpu_exec(CPUState *cpu) > TranslationBlock *tb; > target_ulong cs_base, pc; > uint32_t flags, cflags; > + tb_page_addr_t phys_pc; > > cpu_get_tb_cpu_state(cpu->env_ptr, &pc, &cs_base, > &flags); > > @@ -970,7 +979,12 @@ int cpu_exec(CPUState *cpu) > break; > } > > - tb = tb_lookup(cpu, pc, cs_base, flags, cflags); > + phys_pc = get_page_addr_code(cpu->env_ptr, pc); > + if (phys_pc == -1) { > + tb = NULL; > + } else { > + tb = tb_lookup(cpu, phys_pc, pc, cs_base, flags, > cflags); > + } > if (tb == NULL) { > mmap_lock(); > tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); This patch did not make it into v2, but having get_page_addr_code() before tb_lookup() in helper_lookup_tb_ptr() helped raise the exception when trying to execute a no-longer-executable TB. Was it dropped for performance reasons?
On 8/16/22 18:43, Ilya Leoshkevich wrote: > On Fri, 2022-08-12 at 11:07 -0700, Richard Henderson wrote: >> We will want to re-use the result of get_page_addr_code >> beyond the scope of tb_lookup. >> >> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> >> --- >> accel/tcg/cpu-exec.c | 34 ++++++++++++++++++++++++---------- >> 1 file changed, 24 insertions(+), 10 deletions(-) >> >> diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c >> index a9b7053274..889355b341 100644 >> --- a/accel/tcg/cpu-exec.c >> +++ b/accel/tcg/cpu-exec.c >> @@ -209,13 +209,12 @@ static bool tb_lookup_cmp(const void *p, const >> void *d) >> } >> >> /* Might cause an exception, so have a longjmp destination ready */ >> -static TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, >> - target_ulong cs_base, >> +static TranslationBlock *tb_lookup(CPUState *cpu, tb_page_addr_t >> phys_pc, >> + target_ulong pc, target_ulong >> cs_base, >> uint32_t flags, uint32_t cflags) >> { >> CPUArchState *env = cpu->env_ptr; >> TranslationBlock *tb; >> - tb_page_addr_t phys_pc; >> struct tb_desc desc; >> uint32_t jmp_hash, tb_hash; >> >> @@ -240,11 +239,8 @@ static TranslationBlock *tb_lookup(CPUState >> *cpu, target_ulong pc, >> desc.cflags = cflags; >> desc.trace_vcpu_dstate = *cpu->trace_dstate; >> desc.pc = pc; >> - phys_pc = get_page_addr_code(desc.env, pc); >> - if (phys_pc == -1) { >> - return NULL; >> - } >> desc.phys_page1 = phys_pc & TARGET_PAGE_MASK; >> + >> tb_hash = tb_hash_func(phys_pc, pc, flags, cflags, *cpu- >>> trace_dstate); >> tb = qht_lookup_custom(&tb_ctx.htable, &desc, tb_hash, >> tb_lookup_cmp); >> if (tb == NULL) { >> @@ -371,6 +367,7 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState >> *env) >> TranslationBlock *tb; >> target_ulong cs_base, pc; >> uint32_t flags, cflags; >> + tb_page_addr_t phys_pc; >> >> cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); >> >> @@ -379,7 +376,12 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState >> *env) >> cpu_loop_exit(cpu); >> } >> >> - tb = tb_lookup(cpu, pc, cs_base, flags, cflags); >> + phys_pc = get_page_addr_code(env, pc); >> + if (phys_pc == -1) { >> + return tcg_code_gen_epilogue; >> + } >> + >> + tb = tb_lookup(cpu, phys_pc, pc, cs_base, flags, cflags); >> if (tb == NULL) { >> return tcg_code_gen_epilogue; >> } >> @@ -482,6 +484,7 @@ void cpu_exec_step_atomic(CPUState *cpu) >> TranslationBlock *tb; >> target_ulong cs_base, pc; >> uint32_t flags, cflags; >> + tb_page_addr_t phys_pc; >> int tb_exit; >> >> if (sigsetjmp(cpu->jmp_env, 0) == 0) { >> @@ -504,7 +507,12 @@ void cpu_exec_step_atomic(CPUState *cpu) >> * Any breakpoint for this insn will have been recognized >> earlier. >> */ >> >> - tb = tb_lookup(cpu, pc, cs_base, flags, cflags); >> + phys_pc = get_page_addr_code(env, pc); >> + if (phys_pc == -1) { >> + tb = NULL; >> + } else { >> + tb = tb_lookup(cpu, phys_pc, pc, cs_base, flags, >> cflags); >> + } >> if (tb == NULL) { >> mmap_lock(); >> tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); >> @@ -949,6 +957,7 @@ int cpu_exec(CPUState *cpu) >> TranslationBlock *tb; >> target_ulong cs_base, pc; >> uint32_t flags, cflags; >> + tb_page_addr_t phys_pc; >> >> cpu_get_tb_cpu_state(cpu->env_ptr, &pc, &cs_base, >> &flags); >> >> @@ -970,7 +979,12 @@ int cpu_exec(CPUState *cpu) >> break; >> } >> >> - tb = tb_lookup(cpu, pc, cs_base, flags, cflags); >> + phys_pc = get_page_addr_code(cpu->env_ptr, pc); >> + if (phys_pc == -1) { >> + tb = NULL; >> + } else { >> + tb = tb_lookup(cpu, phys_pc, pc, cs_base, flags, >> cflags); >> + } >> if (tb == NULL) { >> mmap_lock(); >> tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); > > This patch did not make it into v2, but having get_page_addr_code() > before tb_lookup() in helper_lookup_tb_ptr() helped raise the exception > when trying to execute a no-longer-executable TB. > > Was it dropped for performance reasons? Ah, yes. I dropped it because I ran into some regression, and started minimizing the tree. Because of the extra lock that needed to be held (next patch, also dropped), I couldn't prove this actually helped. I think the bit that's causing your user-only failure at the moment is the jump cache. This patch hoisted the page table check before the jmp_cache. For system, cputlb.c takes care of flushing the jump cache with page table changes; we still don't have anything in user-only that takes care of that. r~
On Tue, 2022-08-16 at 20:42 -0500, Richard Henderson wrote: > On 8/16/22 18:43, Ilya Leoshkevich wrote: > > On Fri, 2022-08-12 at 11:07 -0700, Richard Henderson wrote: > > > We will want to re-use the result of get_page_addr_code > > > beyond the scope of tb_lookup. > > > > > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > > > --- > > > accel/tcg/cpu-exec.c | 34 ++++++++++++++++++++++++---------- > > > 1 file changed, 24 insertions(+), 10 deletions(-) > > > > > > diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c > > > index a9b7053274..889355b341 100644 > > > --- a/accel/tcg/cpu-exec.c > > > +++ b/accel/tcg/cpu-exec.c > > > @@ -209,13 +209,12 @@ static bool tb_lookup_cmp(const void *p, > > > const > > > void *d) > > > } > > > > > > /* Might cause an exception, so have a longjmp destination > > > ready */ > > > -static TranslationBlock *tb_lookup(CPUState *cpu, target_ulong > > > pc, > > > - target_ulong cs_base, > > > +static TranslationBlock *tb_lookup(CPUState *cpu, tb_page_addr_t > > > phys_pc, > > > + target_ulong pc, target_ulong > > > cs_base, > > > uint32_t flags, uint32_t > > > cflags) > > > { > > > CPUArchState *env = cpu->env_ptr; > > > TranslationBlock *tb; > > > - tb_page_addr_t phys_pc; > > > struct tb_desc desc; > > > uint32_t jmp_hash, tb_hash; > > > > > > @@ -240,11 +239,8 @@ static TranslationBlock *tb_lookup(CPUState > > > *cpu, target_ulong pc, > > > desc.cflags = cflags; > > > desc.trace_vcpu_dstate = *cpu->trace_dstate; > > > desc.pc = pc; > > > - phys_pc = get_page_addr_code(desc.env, pc); > > > - if (phys_pc == -1) { > > > - return NULL; > > > - } > > > desc.phys_page1 = phys_pc & TARGET_PAGE_MASK; > > > + > > > tb_hash = tb_hash_func(phys_pc, pc, flags, cflags, *cpu- > > > > trace_dstate); > > > tb = qht_lookup_custom(&tb_ctx.htable, &desc, tb_hash, > > > tb_lookup_cmp); > > > if (tb == NULL) { > > > @@ -371,6 +367,7 @@ const void > > > *HELPER(lookup_tb_ptr)(CPUArchState > > > *env) > > > TranslationBlock *tb; > > > target_ulong cs_base, pc; > > > uint32_t flags, cflags; > > > + tb_page_addr_t phys_pc; > > > > > > cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); > > > > > > @@ -379,7 +376,12 @@ const void > > > *HELPER(lookup_tb_ptr)(CPUArchState > > > *env) > > > cpu_loop_exit(cpu); > > > } > > > > > > - tb = tb_lookup(cpu, pc, cs_base, flags, cflags); > > > + phys_pc = get_page_addr_code(env, pc); > > > + if (phys_pc == -1) { > > > + return tcg_code_gen_epilogue; > > > + } > > > + > > > + tb = tb_lookup(cpu, phys_pc, pc, cs_base, flags, cflags); > > > if (tb == NULL) { > > > return tcg_code_gen_epilogue; > > > } > > > @@ -482,6 +484,7 @@ void cpu_exec_step_atomic(CPUState *cpu) > > > TranslationBlock *tb; > > > target_ulong cs_base, pc; > > > uint32_t flags, cflags; > > > + tb_page_addr_t phys_pc; > > > int tb_exit; > > > > > > if (sigsetjmp(cpu->jmp_env, 0) == 0) { > > > @@ -504,7 +507,12 @@ void cpu_exec_step_atomic(CPUState *cpu) > > > * Any breakpoint for this insn will have been > > > recognized > > > earlier. > > > */ > > > > > > - tb = tb_lookup(cpu, pc, cs_base, flags, cflags); > > > + phys_pc = get_page_addr_code(env, pc); > > > + if (phys_pc == -1) { > > > + tb = NULL; > > > + } else { > > > + tb = tb_lookup(cpu, phys_pc, pc, cs_base, flags, > > > cflags); > > > + } > > > if (tb == NULL) { > > > mmap_lock(); > > > tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); > > > @@ -949,6 +957,7 @@ int cpu_exec(CPUState *cpu) > > > TranslationBlock *tb; > > > target_ulong cs_base, pc; > > > uint32_t flags, cflags; > > > + tb_page_addr_t phys_pc; > > > > > > cpu_get_tb_cpu_state(cpu->env_ptr, &pc, &cs_base, > > > &flags); > > > > > > @@ -970,7 +979,12 @@ int cpu_exec(CPUState *cpu) > > > break; > > > } > > > > > > - tb = tb_lookup(cpu, pc, cs_base, flags, cflags); > > > + phys_pc = get_page_addr_code(cpu->env_ptr, pc); > > > + if (phys_pc == -1) { > > > + tb = NULL; > > > + } else { > > > + tb = tb_lookup(cpu, phys_pc, pc, cs_base, flags, > > > cflags); > > > + } > > > if (tb == NULL) { > > > mmap_lock(); > > > tb = tb_gen_code(cpu, pc, cs_base, flags, > > > cflags); > > > > This patch did not make it into v2, but having get_page_addr_code() > > before tb_lookup() in helper_lookup_tb_ptr() helped raise the > > exception > > when trying to execute a no-longer-executable TB. > > > > Was it dropped for performance reasons? > > Ah, yes. I dropped it because I ran into some regression, and > started minimizing the > tree. Because of the extra lock that needed to be held (next patch, > also dropped), I > couldn't prove this actually helped. > > I think the bit that's causing your user-only failure at the moment > is the jump cache. > This patch hoisted the page table check before the jmp_cache. For > system, cputlb.c takes > care of flushing the jump cache with page table changes; we still > don't have anything in > user-only that takes care of that. > > > r~ > Would something like this be okay? diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 27435b97dbd..9421c84d991 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1152,6 +1152,27 @@ static inline void tb_jmp_unlink(TranslationBlock *dest) qemu_spin_unlock(&dest->jmp_lock); } +static void cpu_tb_jmp_cache_remove(TranslationBlock *tb) +{ + CPUState *cpu; + uint32_t h; + + /* remove the TB from the hash list */ + if (TARGET_TB_PCREL) { + /* Any TB may be at any virtual address */ + CPU_FOREACH(cpu) { + cpu_tb_jmp_cache_clear(cpu); + } + } else { + h = tb_jmp_cache_hash_func(tb_pc(tb)); + CPU_FOREACH(cpu) { + if (qatomic_read(&cpu->tb_jmp_cache[h].tb) == tb) { + qatomic_set(&cpu->tb_jmp_cache[h].tb, NULL); + } + } + } +} + /* * In user-mode, call with mmap_lock held. * In !user-mode, if @rm_from_page_list is set, call with the TB's pages' @@ -1159,7 +1180,6 @@ static inline void tb_jmp_unlink(TranslationBlock *dest) */ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) { - CPUState *cpu; PageDesc *p; uint32_t h; tb_page_addr_t phys_pc; @@ -1190,20 +1210,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) } } - /* remove the TB from the hash list */ - if (TARGET_TB_PCREL) { - /* Any TB may be at any virtual address */ - CPU_FOREACH(cpu) { - cpu_tb_jmp_cache_clear(cpu); - } - } else { - h = tb_jmp_cache_hash_func(tb_pc(tb)); - CPU_FOREACH(cpu) { - if (qatomic_read(&cpu->tb_jmp_cache[h].tb) == tb) { - qatomic_set(&cpu->tb_jmp_cache[h].tb, NULL); - } - } - } + cpu_tb_jmp_cache_remove(tb); /* suppress this TB from the two jump lists */ tb_remove_from_jmp_list(tb, 0); @@ -2243,6 +2250,13 @@ void page_set_flags(target_ulong start, target_ulong end, int flags) (flags & PAGE_WRITE) && p->first_tb) { tb_invalidate_phys_page(addr, 0); + } else { + TranslationBlock *tb; + int n; + + PAGE_FOR_EACH_TB(p, tb, n) { + cpu_tb_jmp_cache_remove(tb); + } } if (reset_target_data) { g_free(p->target_data);
On 8/17/22 06:08, Ilya Leoshkevich wrote: > @@ -2243,6 +2250,13 @@ void page_set_flags(target_ulong start, > target_ulong end, int flags) > (flags & PAGE_WRITE) && > p->first_tb) { > tb_invalidate_phys_page(addr, 0); > + } else { > + TranslationBlock *tb; > + int n; > + > + PAGE_FOR_EACH_TB(p, tb, n) { > + cpu_tb_jmp_cache_remove(tb); > + } > } Here you would use tb_jmp_cache_clear_page(), which should be moved out of cputlb.c. r~
On Wed, 2022-08-17 at 08:15 -0500, Richard Henderson wrote: > On 8/17/22 06:08, Ilya Leoshkevich wrote: > > @@ -2243,6 +2250,13 @@ void page_set_flags(target_ulong start, > > target_ulong end, int flags) > > (flags & PAGE_WRITE) && > > p->first_tb) { > > tb_invalidate_phys_page(addr, 0); > > + } else { > > + TranslationBlock *tb; > > + int n; > > + > > + PAGE_FOR_EACH_TB(p, tb, n) { > > + cpu_tb_jmp_cache_remove(tb); > > + } > > } > > Here you would use tb_jmp_cache_clear_page(), which should be moved > out of cputlb.c. That was actually the first thing I tried. Unfortunately tb_jmp_cache_clear_page() relies on tb_jmp_cache_hash_func() returning the same top bits for addresses on the same page. This is not the case for qemu-user: there this property was traded for better hashing with quite impressive performance improvements (6f1653180f570).
On 8/17/22 08:27, Ilya Leoshkevich wrote: > On Wed, 2022-08-17 at 08:15 -0500, Richard Henderson wrote: >> On 8/17/22 06:08, Ilya Leoshkevich wrote: >>> @@ -2243,6 +2250,13 @@ void page_set_flags(target_ulong start, >>> target_ulong end, int flags) >>> (flags & PAGE_WRITE) && >>> p->first_tb) { >>> tb_invalidate_phys_page(addr, 0); >>> + } else { >>> + TranslationBlock *tb; >>> + int n; >>> + >>> + PAGE_FOR_EACH_TB(p, tb, n) { >>> + cpu_tb_jmp_cache_remove(tb); >>> + } >>> } >> >> Here you would use tb_jmp_cache_clear_page(), which should be moved >> out of cputlb.c. > > That was actually the first thing I tried. > > Unfortunately tb_jmp_cache_clear_page() relies on > tb_jmp_cache_hash_func() returning the same top bits for addresses on > the same page. This is not the case for qemu-user: there this property > was traded for better hashing with quite impressive performance > improvements (6f1653180f570). Oh my. Well, we could (1) revert that patch because the premise is wrong, (2) go with your per-tb clearing, (3) clear the whole thing with cpu_tb_jmp_cache_clear Ideally we'd have some benchmark numbers to inform the choice... r~
On 8/17/22 06:08, Ilya Leoshkevich wrote: > +static void cpu_tb_jmp_cache_remove(TranslationBlock *tb) > +{ > + CPUState *cpu; > + uint32_t h; > + > + /* remove the TB from the hash list */ > + if (TARGET_TB_PCREL) { > + /* Any TB may be at any virtual address */ > + CPU_FOREACH(cpu) { > + cpu_tb_jmp_cache_clear(cpu); > + } This comment is not currently true for user-only. Although there's an outstanding bug report about our failure to manage virtual aliasing in user-only... > + PAGE_FOR_EACH_TB(p, tb, n) { > + cpu_tb_jmp_cache_remove(tb); > + } You wouldn't want to call cpu_tb_jmp_cache_clear() 99 times for the 99 tb's on the page. For user-only, I think mprotect is rare enough that just clearing the whole cache once is sufficient. r~
On Wed, 2022-08-17 at 08:38 -0500, Richard Henderson wrote: > On 8/17/22 08:27, Ilya Leoshkevich wrote: > > On Wed, 2022-08-17 at 08:15 -0500, Richard Henderson wrote: > > > On 8/17/22 06:08, Ilya Leoshkevich wrote: > > > > @@ -2243,6 +2250,13 @@ void page_set_flags(target_ulong start, > > > > target_ulong end, int flags) > > > > (flags & PAGE_WRITE) && > > > > p->first_tb) { > > > > tb_invalidate_phys_page(addr, 0); > > > > + } else { > > > > + TranslationBlock *tb; > > > > + int n; > > > > + > > > > + PAGE_FOR_EACH_TB(p, tb, n) { > > > > + cpu_tb_jmp_cache_remove(tb); > > > > + } > > > > } > > > > > > Here you would use tb_jmp_cache_clear_page(), which should be > > > moved > > > out of cputlb.c. > > > > That was actually the first thing I tried. > > > > Unfortunately tb_jmp_cache_clear_page() relies on > > tb_jmp_cache_hash_func() returning the same top bits for addresses > > on > > the same page. This is not the case for qemu-user: there this > > property > > was traded for better hashing with quite impressive performance > > improvements (6f1653180f570). > > Oh my. Well, we could > > (1) revert that patch because the premise is wrong, > (2) go with your per-tb clearing, > (3) clear the whole thing with cpu_tb_jmp_cache_clear > > Ideally we'd have some benchmark numbers to inform the choice... FWIW 6f1653180f570 still looks useful. Reverting it caused 620.omnetpp_s to take ~4% more time. I ran the benchmark with reduced values in omnetpp.ini so as not to wait forever, therefore the real figures might be closer to what the commit message says. In any case this still shows that the patch has measurable impact.
On 8/17/22 09:07, Ilya Leoshkevich wrote: >> Oh my. Well, we could >> >> (1) revert that patch because the premise is wrong, >> (2) go with your per-tb clearing, >> (3) clear the whole thing with cpu_tb_jmp_cache_clear >> >> Ideally we'd have some benchmark numbers to inform the choice... > > FWIW 6f1653180f570 still looks useful. > Reverting it caused 620.omnetpp_s to take ~4% more time. > I ran the benchmark with reduced values in omnetpp.ini so as not to > wait forever, therefore the real figures might be closer to what the > commit message says. In any case this still shows that the patch has > measurable impact. Thanks for the testing. I think option (3) will be best for user-only, because mprotect/munmap of existing code pages is rare -- usually only at process startup. r~
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index a9b7053274..889355b341 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -209,13 +209,12 @@ static bool tb_lookup_cmp(const void *p, const void *d) } /* Might cause an exception, so have a longjmp destination ready */ -static TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, - target_ulong cs_base, +static TranslationBlock *tb_lookup(CPUState *cpu, tb_page_addr_t phys_pc, + target_ulong pc, target_ulong cs_base, uint32_t flags, uint32_t cflags) { CPUArchState *env = cpu->env_ptr; TranslationBlock *tb; - tb_page_addr_t phys_pc; struct tb_desc desc; uint32_t jmp_hash, tb_hash; @@ -240,11 +239,8 @@ static TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc, desc.cflags = cflags; desc.trace_vcpu_dstate = *cpu->trace_dstate; desc.pc = pc; - phys_pc = get_page_addr_code(desc.env, pc); - if (phys_pc == -1) { - return NULL; - } desc.phys_page1 = phys_pc & TARGET_PAGE_MASK; + tb_hash = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate); tb = qht_lookup_custom(&tb_ctx.htable, &desc, tb_hash, tb_lookup_cmp); if (tb == NULL) { @@ -371,6 +367,7 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env) TranslationBlock *tb; target_ulong cs_base, pc; uint32_t flags, cflags; + tb_page_addr_t phys_pc; cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); @@ -379,7 +376,12 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env) cpu_loop_exit(cpu); } - tb = tb_lookup(cpu, pc, cs_base, flags, cflags); + phys_pc = get_page_addr_code(env, pc); + if (phys_pc == -1) { + return tcg_code_gen_epilogue; + } + + tb = tb_lookup(cpu, phys_pc, pc, cs_base, flags, cflags); if (tb == NULL) { return tcg_code_gen_epilogue; } @@ -482,6 +484,7 @@ void cpu_exec_step_atomic(CPUState *cpu) TranslationBlock *tb; target_ulong cs_base, pc; uint32_t flags, cflags; + tb_page_addr_t phys_pc; int tb_exit; if (sigsetjmp(cpu->jmp_env, 0) == 0) { @@ -504,7 +507,12 @@ void cpu_exec_step_atomic(CPUState *cpu) * Any breakpoint for this insn will have been recognized earlier. */ - tb = tb_lookup(cpu, pc, cs_base, flags, cflags); + phys_pc = get_page_addr_code(env, pc); + if (phys_pc == -1) { + tb = NULL; + } else { + tb = tb_lookup(cpu, phys_pc, pc, cs_base, flags, cflags); + } if (tb == NULL) { mmap_lock(); tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); @@ -949,6 +957,7 @@ int cpu_exec(CPUState *cpu) TranslationBlock *tb; target_ulong cs_base, pc; uint32_t flags, cflags; + tb_page_addr_t phys_pc; cpu_get_tb_cpu_state(cpu->env_ptr, &pc, &cs_base, &flags); @@ -970,7 +979,12 @@ int cpu_exec(CPUState *cpu) break; } - tb = tb_lookup(cpu, pc, cs_base, flags, cflags); + phys_pc = get_page_addr_code(cpu->env_ptr, pc); + if (phys_pc == -1) { + tb = NULL; + } else { + tb = tb_lookup(cpu, phys_pc, pc, cs_base, flags, cflags); + } if (tb == NULL) { mmap_lock(); tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
We will want to re-use the result of get_page_addr_code beyond the scope of tb_lookup. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- accel/tcg/cpu-exec.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-)