Message ID | 20180614193147.29680-15-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | tcg queued patches | expand |
This patch breaks record/replay. I run execution recording of the WindowsXP machine with the following script: ./bin/qemu-system-i386 -d in_asm,exec -D xp_save.log -global apic-common.vapic=off \ -icount shift=7,rr=record,rrfile=xp0.replay \ -drive file=./images/xp_sp2.qcow2,if=none,id=img-direct,snapshot \ -drive driver=blkreplay,if=none,image=img-direct,id=img-replay \ -device ide-hd,drive=img-replay -net none -m 512M QEMU fails at some moment. Here are the contents of the log: ---------------- IN: 0x806ee2d0: 33 c0 xorl %eax, %eax 0x806ee2d2: 8a c1 movb %cl, %al 0x806ee2d4: 33 c9 xorl %ecx, %ecx 0x806ee2d6: 8a 88 58 e2 6e 80 movb -0x7f911da8(%eax), %cl 0x806ee2dc: 89 0d 80 00 fe ff movl %ecx, 0xfffe0080 0x806ee2e2: a1 80 00 fe ff movl 0xfffe0080, %eax 0x806ee2e7: c3 retl Trace 0: 0x7fdc103b16a0 [00000000/806ee2d0/0x4000b0] qemu: fatal: cpu_io_recompile: could not find TB for pc=0x7fec24fde2de EAX=00000001 EBX=00006901 ECX=0000003d EDX=00000ffc ESI=040d78c0 EDI=0000031f EBP=f878fc60 ESP=f878fc54 EIP=806ee2d0 EFL=00000202 [-------] CPL=0 II=0 A20=1 SMM=0 HLT=0 .... Pavel Dovgalyuk > -----Original Message----- > From: Richard Henderson [mailto:richard.henderson@linaro.org] > Sent: Thursday, June 14, 2018 10:32 PM > To: qemu-devel@nongnu.org > Cc: peter.maydell@linaro.org; Emilio G. Cota > Subject: [PULL, 14/18] translate-all: discard TB when tb_link_page returns an existing > matching TB > > From: "Emilio G. Cota" <cota@braap.org> > > Use the recently-gained QHT feature of returning the matching TB if it > already exists. This allows us to get rid of the lookup we perform > right after acquiring tb_lock. > > Suggested-by: Richard Henderson <richard.henderson@linaro.org> > Reviewed-by: Richard Henderson <richard.henderson@linaro.org> > Signed-off-by: Emilio G. Cota <cota@braap.org> > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > accel/tcg/cpu-exec.c | 14 ++------- > accel/tcg/translate-all.c | 50 +++++++++++++++++++++++++++------ > docs/devel/multi-thread-tcg.txt | 3 ++ > 3 files changed, 46 insertions(+), 21 deletions(-) > > diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c > index d75c35380a..45f6ebc65e 100644 > --- a/accel/tcg/cpu-exec.c > +++ b/accel/tcg/cpu-exec.c > @@ -245,10 +245,7 @@ void cpu_exec_step_atomic(CPUState *cpu) > if (tb == NULL) { > mmap_lock(); > tb_lock(); > - tb = tb_htable_lookup(cpu, pc, cs_base, flags, cf_mask); > - if (likely(tb == NULL)) { > - tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); > - } > + tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); > tb_unlock(); > mmap_unlock(); > } > @@ -398,14 +395,7 @@ static inline TranslationBlock *tb_find(CPUState *cpu, > tb_lock(); > acquired_tb_lock = true; > > - /* There's a chance that our desired tb has been translated while > - * taking the locks so we check again inside the lock. > - */ > - tb = tb_htable_lookup(cpu, pc, cs_base, flags, cf_mask); > - if (likely(tb == NULL)) { > - /* if no translated code available, then translate it now */ > - tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask); > - } > + tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask); > > mmap_unlock(); > /* We add the TB in the virtual pc hash table for the fast lookup */ > diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c > index c75298d08a..2585e6fd3e 100644 > --- a/accel/tcg/translate-all.c > +++ b/accel/tcg/translate-all.c > @@ -1581,18 +1581,30 @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb, > * (-1) to indicate that only one page contains the TB. > * > * Called with mmap_lock held for user-mode emulation. > + * > + * Returns a pointer @tb, or a pointer to an existing TB that matches @tb. > + * Note that in !user-mode, another thread might have already added a TB > + * for the same block of guest code that @tb corresponds to. In that case, > + * the caller should discard the original @tb, and use instead the returned TB. > */ > -static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, > - tb_page_addr_t phys_page2) > +static TranslationBlock * > +tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, > + tb_page_addr_t phys_page2) > { > PageDesc *p; > PageDesc *p2 = NULL; > + void *existing_tb = NULL; > uint32_t h; > > assert_memory_lock(); > > /* > * Add the TB to the page list, acquiring first the pages's locks. > + * We keep the locks held until after inserting the TB in the hash table, > + * so that if the insertion fails we know for sure that the TBs are still > + * in the page descriptors. > + * Note that inserting into the hash table first isn't an option, since > + * we can only insert TBs that are fully initialized. > */ > page_lock_pair(&p, phys_pc, &p2, phys_page2, 1); > tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK); > @@ -1602,21 +1614,33 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, > tb->page_addr[1] = -1; > } > > + /* add in the hash table */ > + h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK, > + tb->trace_vcpu_dstate); > + qht_insert(&tb_ctx.htable, tb, h, &existing_tb); > + > + /* remove TB from the page(s) if we couldn't insert it */ > + if (unlikely(existing_tb)) { > + tb_page_remove(p, tb); > + invalidate_page_bitmap(p); > + if (p2) { > + tb_page_remove(p2, tb); > + invalidate_page_bitmap(p2); > + } > + tb = existing_tb; > + } > + > if (p2) { > page_unlock(p2); > } > page_unlock(p); > > - /* add in the hash table */ > - h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK, > - tb->trace_vcpu_dstate); > - qht_insert(&tb_ctx.htable, tb, h, NULL); > - > #ifdef CONFIG_USER_ONLY > if (DEBUG_TB_CHECK_GATE) { > tb_page_check(); > } > #endif > + return tb; > } > > /* Called with mmap_lock held for user mode emulation. */ > @@ -1625,7 +1649,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, > uint32_t flags, int cflags) > { > CPUArchState *env = cpu->env_ptr; > - TranslationBlock *tb; > + TranslationBlock *tb, *existing_tb; > tb_page_addr_t phys_pc, phys_page2; > target_ulong virt_page2; > tcg_insn_unit *gen_code_buf; > @@ -1773,7 +1797,15 @@ TranslationBlock *tb_gen_code(CPUState *cpu, > * memory barrier is required before tb_link_page() makes the TB visible > * through the physical hash table and physical page list. > */ > - tb_link_page(tb, phys_pc, phys_page2); > + existing_tb = tb_link_page(tb, phys_pc, phys_page2); > + /* if the TB already exists, discard what we just translated */ > + if (unlikely(existing_tb != tb)) { > + uintptr_t orig_aligned = (uintptr_t)gen_code_buf; > + > + orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize); > + atomic_set(&tcg_ctx->code_gen_ptr, orig_aligned); > + return existing_tb; > + } > tcg_tb_insert(tb); > return tb; > } > diff --git a/docs/devel/multi-thread-tcg.txt b/docs/devel/multi-thread-tcg.txt > index faf8918b23..faf09c6069 100644 > --- a/docs/devel/multi-thread-tcg.txt > +++ b/docs/devel/multi-thread-tcg.txt > @@ -140,6 +140,9 @@ to atomically insert new elements. > The lookup caches are updated atomically and the lookup hash uses QHT > which is designed for concurrent safe lookup. > > +Parallel code generation is supported. QHT is used at insertion time > +as the synchronization point across threads, thereby ensuring that we only > +keep track of a single TranslationBlock for each guest code block. > > Memory maps and TLBs > --------------------
On Fri, Jun 29, 2018 at 10:25:03 +0300, Pavel Dovgalyuk wrote: > This patch breaks record/replay. > > I run execution recording of the WindowsXP machine with the following script: > > ./bin/qemu-system-i386 -d in_asm,exec -D xp_save.log -global apic-common.vapic=off \ > -icount shift=7,rr=record,rrfile=xp0.replay \ > -drive file=./images/xp_sp2.qcow2,if=none,id=img-direct,snapshot \ > -drive driver=blkreplay,if=none,image=img-direct,id=img-replay \ > -device ide-hd,drive=img-replay -net none -m 512M > > QEMU fails at some moment. Here are the contents of the log: > > ---------------- > IN: > 0x806ee2d0: 33 c0 xorl %eax, %eax > 0x806ee2d2: 8a c1 movb %cl, %al > 0x806ee2d4: 33 c9 xorl %ecx, %ecx > 0x806ee2d6: 8a 88 58 e2 6e 80 movb -0x7f911da8(%eax), %cl > 0x806ee2dc: 89 0d 80 00 fe ff movl %ecx, 0xfffe0080 > 0x806ee2e2: a1 80 00 fe ff movl 0xfffe0080, %eax > 0x806ee2e7: c3 retl > > Trace 0: 0x7fdc103b16a0 [00000000/806ee2d0/0x4000b0] > qemu: fatal: cpu_io_recompile: could not find TB for pc=0x7fec24fde2de Thanks for reporting. From code inspection I can see how this could happen: we're calling tcg_tb_remove for a TB that we did not just generate--we got an existing one instead. Note that CF_NOCACHE is not part of the CF_HASH mask, so this might explain why the problem only occurs for r/r. Can you reproduce this with any other guest? If not, I'd be happy to use your windows qcow2 file if you could share it with me off-list. Thanks, Emilio
> From: Emilio G. Cota [mailto:cota@braap.org] > On Fri, Jun 29, 2018 at 10:25:03 +0300, Pavel Dovgalyuk wrote: > > This patch breaks record/replay. > > > > I run execution recording of the WindowsXP machine with the following script: > > > > ./bin/qemu-system-i386 -d in_asm,exec -D xp_save.log -global apic-common.vapic=off \ > > -icount shift=7,rr=record,rrfile=xp0.replay \ > > -drive file=./images/xp_sp2.qcow2,if=none,id=img-direct,snapshot \ > > -drive driver=blkreplay,if=none,image=img-direct,id=img-replay \ > > -device ide-hd,drive=img-replay -net none -m 512M > > > > QEMU fails at some moment. Here are the contents of the log: > > > > ---------------- > > IN: > > 0x806ee2d0: 33 c0 xorl %eax, %eax > > 0x806ee2d2: 8a c1 movb %cl, %al > > 0x806ee2d4: 33 c9 xorl %ecx, %ecx > > 0x806ee2d6: 8a 88 58 e2 6e 80 movb -0x7f911da8(%eax), %cl > > 0x806ee2dc: 89 0d 80 00 fe ff movl %ecx, 0xfffe0080 > > 0x806ee2e2: a1 80 00 fe ff movl 0xfffe0080, %eax > > 0x806ee2e7: c3 retl > > > > Trace 0: 0x7fdc103b16a0 [00000000/806ee2d0/0x4000b0] > > qemu: fatal: cpu_io_recompile: could not find TB for pc=0x7fec24fde2de > > Thanks for reporting. > > From code inspection I can see how this could happen: we're calling > tcg_tb_remove for a TB that we did not just generate--we got an > existing one instead. Note that CF_NOCACHE is not part of > the CF_HASH mask, so this might explain why the problem only > occurs for r/r. Thanks. > Can you reproduce this with any other guest? If not, I'd be > happy to use your windows qcow2 file if you could share it > with me off-list. The same failure can be reproduced with linux-0.2.img, which was downloaded from QEMU site. I can't find it now, but I can upload this file if needed. Pavel Dovgalyuk
On Mon, Jul 02, 2018 at 08:52:14 +0300, Pavel Dovgalyuk wrote: > The same failure can be reproduced with linux-0.2.img, which was > downloaded from QEMU site. > I can't find it now, but I can upload this file if needed. Please upload it somewhere and share the full QEMU invocation needed to replicate. Thanks, Emilio
> From: Emilio G. Cota [mailto:cota@braap.org] > On Mon, Jul 02, 2018 at 08:52:14 +0300, Pavel Dovgalyuk wrote: > > The same failure can be reproduced with linux-0.2.img, which was > > downloaded from QEMU site. > > I can't find it now, but I can upload this file if needed. > > Please upload it somewhere and share the full QEMU invocation > needed to replicate. https://github.com/Dovgalyuk/qemu-images/blob/master/linux-0.2.img qemu-system-i386 -drive file=images/linux-0.2.img,if=none,snapshot,id=img -drive driver=blkreplay,if=none,id=rr,image=img -device ide-hd,drive=rr -net none -icount shift=5,rr=record,rrfile=linux02.rr Pavel Dovgalyuk
On Tue, Jul 03, 2018 at 08:38:52 +0300, Pavel Dovgalyuk wrote: > > From: Emilio G. Cota [mailto:cota@braap.org] > > On Mon, Jul 02, 2018 at 08:52:14 +0300, Pavel Dovgalyuk wrote: > > > The same failure can be reproduced with linux-0.2.img, which was > > > downloaded from QEMU site. > > > I can't find it now, but I can upload this file if needed. > > > > Please upload it somewhere and share the full QEMU invocation > > needed to replicate. > > https://github.com/Dovgalyuk/qemu-images/blob/master/linux-0.2.img > > qemu-system-i386 -drive file=images/linux-0.2.img,if=none,snapshot,id=img -drive > driver=blkreplay,if=none,id=rr,image=img -device ide-hd,drive=rr -net none -icount > shift=5,rr=record,rrfile=linux02.rr The appended patch fixes it for me. Can you please test on your windows image? The rationale is to honour CF_NOCACHE, so that we always return a new TB from tb_gen_code. Thanks, Emilio --- diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 170b957..49d77fa 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1446,7 +1446,8 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list) phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb_cflags(tb) & CF_HASH_MASK, tb->trace_vcpu_dstate); - if (!qht_remove(&tb_ctx.htable, tb, h)) { + if (!(tb->cflags & CF_NOCACHE) && + !qht_remove(&tb_ctx.htable, tb, h)) { return; } @@ -1604,8 +1605,6 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, { PageDesc *p; PageDesc *p2 = NULL; - void *existing_tb = NULL; - uint32_t h; assert_memory_lock(); @@ -1625,20 +1624,25 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, tb->page_addr[1] = -1; } - /* add in the hash table */ - h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK, - tb->trace_vcpu_dstate); - qht_insert(&tb_ctx.htable, tb, h, &existing_tb); + if (!(tb->cflags & CF_NOCACHE)) { + void *existing_tb = NULL; + uint32_t h; - /* remove TB from the page(s) if we couldn't insert it */ - if (unlikely(existing_tb)) { - tb_page_remove(p, tb); - invalidate_page_bitmap(p); - if (p2) { - tb_page_remove(p2, tb); - invalidate_page_bitmap(p2); + /* add in the hash table */ + h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK, + tb->trace_vcpu_dstate); + qht_insert(&tb_ctx.htable, tb, h, &existing_tb); + + /* remove TB from the page(s) if we couldn't insert it */ + if (unlikely(existing_tb)) { + tb_page_remove(p, tb); + invalidate_page_bitmap(p); + if (p2) { + tb_page_remove(p2, tb); + invalidate_page_bitmap(p2); + } + tb = existing_tb; } - tb = existing_tb; } if (p2 && p2 != p) {
> From: Emilio G. Cota [mailto:cota@braap.org] > On Tue, Jul 03, 2018 at 08:38:52 +0300, Pavel Dovgalyuk wrote: > > > From: Emilio G. Cota [mailto:cota@braap.org] > > > On Mon, Jul 02, 2018 at 08:52:14 +0300, Pavel Dovgalyuk wrote: > > > > The same failure can be reproduced with linux-0.2.img, which was > > > > downloaded from QEMU site. > > > > I can't find it now, but I can upload this file if needed. > > > > > > Please upload it somewhere and share the full QEMU invocation > > > needed to replicate. > > > > https://github.com/Dovgalyuk/qemu-images/blob/master/linux-0.2.img > > > > qemu-system-i386 -drive file=images/linux-0.2.img,if=none,snapshot,id=img -drive > > driver=blkreplay,if=none,id=rr,image=img -device ide-hd,drive=rr -net none -icount > > shift=5,rr=record,rrfile=linux02.rr > > The appended patch fixes it for me. Can you please test on your > windows image? > > The rationale is to honour CF_NOCACHE, so that we always return > a new TB from tb_gen_code. Works for me, thank you. Tested-by: Pavel Dovgalyuk <Pavel.Dovgaluk@ispras.ru> Pavel Dovgalyuk > > --- > diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c > index 170b957..49d77fa 100644 > --- a/accel/tcg/translate-all.c > +++ b/accel/tcg/translate-all.c > @@ -1446,7 +1446,8 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool > rm_from_page_list) > phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); > h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb_cflags(tb) & CF_HASH_MASK, > tb->trace_vcpu_dstate); > - if (!qht_remove(&tb_ctx.htable, tb, h)) { > + if (!(tb->cflags & CF_NOCACHE) && > + !qht_remove(&tb_ctx.htable, tb, h)) { > return; > } > > @@ -1604,8 +1605,6 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, > { > PageDesc *p; > PageDesc *p2 = NULL; > - void *existing_tb = NULL; > - uint32_t h; > > assert_memory_lock(); > > @@ -1625,20 +1624,25 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, > tb->page_addr[1] = -1; > } > > - /* add in the hash table */ > - h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK, > - tb->trace_vcpu_dstate); > - qht_insert(&tb_ctx.htable, tb, h, &existing_tb); > + if (!(tb->cflags & CF_NOCACHE)) { > + void *existing_tb = NULL; > + uint32_t h; > > - /* remove TB from the page(s) if we couldn't insert it */ > - if (unlikely(existing_tb)) { > - tb_page_remove(p, tb); > - invalidate_page_bitmap(p); > - if (p2) { > - tb_page_remove(p2, tb); > - invalidate_page_bitmap(p2); > + /* add in the hash table */ > + h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK, > + tb->trace_vcpu_dstate); > + qht_insert(&tb_ctx.htable, tb, h, &existing_tb); > + > + /* remove TB from the page(s) if we couldn't insert it */ > + if (unlikely(existing_tb)) { > + tb_page_remove(p, tb); > + invalidate_page_bitmap(p); > + if (p2) { > + tb_page_remove(p2, tb); > + invalidate_page_bitmap(p2); > + } > + tb = existing_tb; > } > - tb = existing_tb; > } > > if (p2 && p2 != p) {
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index d75c35380a..45f6ebc65e 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -245,10 +245,7 @@ void cpu_exec_step_atomic(CPUState *cpu) if (tb == NULL) { mmap_lock(); tb_lock(); - tb = tb_htable_lookup(cpu, pc, cs_base, flags, cf_mask); - if (likely(tb == NULL)) { - tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); - } + tb = tb_gen_code(cpu, pc, cs_base, flags, cflags); tb_unlock(); mmap_unlock(); } @@ -398,14 +395,7 @@ static inline TranslationBlock *tb_find(CPUState *cpu, tb_lock(); acquired_tb_lock = true; - /* There's a chance that our desired tb has been translated while - * taking the locks so we check again inside the lock. - */ - tb = tb_htable_lookup(cpu, pc, cs_base, flags, cf_mask); - if (likely(tb == NULL)) { - /* if no translated code available, then translate it now */ - tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask); - } + tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask); mmap_unlock(); /* We add the TB in the virtual pc hash table for the fast lookup */ diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index c75298d08a..2585e6fd3e 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1581,18 +1581,30 @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb, * (-1) to indicate that only one page contains the TB. * * Called with mmap_lock held for user-mode emulation. + * + * Returns a pointer @tb, or a pointer to an existing TB that matches @tb. + * Note that in !user-mode, another thread might have already added a TB + * for the same block of guest code that @tb corresponds to. In that case, + * the caller should discard the original @tb, and use instead the returned TB. */ -static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, - tb_page_addr_t phys_page2) +static TranslationBlock * +tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, + tb_page_addr_t phys_page2) { PageDesc *p; PageDesc *p2 = NULL; + void *existing_tb = NULL; uint32_t h; assert_memory_lock(); /* * Add the TB to the page list, acquiring first the pages's locks. + * We keep the locks held until after inserting the TB in the hash table, + * so that if the insertion fails we know for sure that the TBs are still + * in the page descriptors. + * Note that inserting into the hash table first isn't an option, since + * we can only insert TBs that are fully initialized. */ page_lock_pair(&p, phys_pc, &p2, phys_page2, 1); tb_page_add(p, tb, 0, phys_pc & TARGET_PAGE_MASK); @@ -1602,21 +1614,33 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, tb->page_addr[1] = -1; } + /* add in the hash table */ + h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK, + tb->trace_vcpu_dstate); + qht_insert(&tb_ctx.htable, tb, h, &existing_tb); + + /* remove TB from the page(s) if we couldn't insert it */ + if (unlikely(existing_tb)) { + tb_page_remove(p, tb); + invalidate_page_bitmap(p); + if (p2) { + tb_page_remove(p2, tb); + invalidate_page_bitmap(p2); + } + tb = existing_tb; + } + if (p2) { page_unlock(p2); } page_unlock(p); - /* add in the hash table */ - h = tb_hash_func(phys_pc, tb->pc, tb->flags, tb->cflags & CF_HASH_MASK, - tb->trace_vcpu_dstate); - qht_insert(&tb_ctx.htable, tb, h, NULL); - #ifdef CONFIG_USER_ONLY if (DEBUG_TB_CHECK_GATE) { tb_page_check(); } #endif + return tb; } /* Called with mmap_lock held for user mode emulation. */ @@ -1625,7 +1649,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, uint32_t flags, int cflags) { CPUArchState *env = cpu->env_ptr; - TranslationBlock *tb; + TranslationBlock *tb, *existing_tb; tb_page_addr_t phys_pc, phys_page2; target_ulong virt_page2; tcg_insn_unit *gen_code_buf; @@ -1773,7 +1797,15 @@ TranslationBlock *tb_gen_code(CPUState *cpu, * memory barrier is required before tb_link_page() makes the TB visible * through the physical hash table and physical page list. */ - tb_link_page(tb, phys_pc, phys_page2); + existing_tb = tb_link_page(tb, phys_pc, phys_page2); + /* if the TB already exists, discard what we just translated */ + if (unlikely(existing_tb != tb)) { + uintptr_t orig_aligned = (uintptr_t)gen_code_buf; + + orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize); + atomic_set(&tcg_ctx->code_gen_ptr, orig_aligned); + return existing_tb; + } tcg_tb_insert(tb); return tb; } diff --git a/docs/devel/multi-thread-tcg.txt b/docs/devel/multi-thread-tcg.txt index faf8918b23..faf09c6069 100644 --- a/docs/devel/multi-thread-tcg.txt +++ b/docs/devel/multi-thread-tcg.txt @@ -140,6 +140,9 @@ to atomically insert new elements. The lookup caches are updated atomically and the lookup hash uses QHT which is designed for concurrent safe lookup. +Parallel code generation is supported. QHT is used at insertion time +as the synchronization point across threads, thereby ensuring that we only +keep track of a single TranslationBlock for each guest code block. Memory maps and TLBs --------------------