diff mbox series

[066/147] include/exec: Move TLB_MMIO, TLB_DISCARD_WRITE to slow flags

Message ID 20250422192819.302784-67-richard.henderson@linaro.org
State Superseded
Headers show
Series single-binary patch queue | expand

Commit Message

Richard Henderson April 22, 2025, 7:26 p.m. UTC
Recover two bits from the inline flags.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/tlb-flags.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

Comments

Pierrick Bouvier April 22, 2025, 8:54 p.m. UTC | #1
On 4/22/25 12:26, Richard Henderson wrote:
> Recover two bits from the inline flags.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   include/exec/tlb-flags.h | 17 +++++++++--------
>   1 file changed, 9 insertions(+), 8 deletions(-)
>

Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
Jonathan Cameron April 25, 2025, 5:35 p.m. UTC | #2
On Tue, 22 Apr 2025 12:26:55 -0700
Richard Henderson <richard.henderson@linaro.org> wrote:

> Recover two bits from the inline flags.


Hi Richard,

Early days but something (I'm fairly sure in this patch) is tripping up my favourite
TCG corner case of running code out of MMIO memory (interleaved CXL memory).

Only seeing it on arm64 tests so far which isn't upstream yet..
(guess what I was getting ready to post today)

Back trace is:

#0  0x0000555555fd4296 in cpu_atomic_fetch_andq_le_mmu (env=0x555557ee19b0, addr=18442241572520067072, val=18446744073701163007, oi=8244, retaddr=<optimized out>) at ../../accel/tcg/atomic_template.h:140
#1  0x00007fffb6894125 in code_gen_buffer ()
#2  0x0000555555fc4c46 in cpu_tb_exec (cpu=cpu@entry=0x555557ededf0, itb=itb@entry=0x7fffb6894000 <code_gen_buffer+200511443>, tb_exit=tb_exit@entry=0x7ffff4bfb744) at ../../accel/tcg/cpu-exec.c:455
#3  0x0000555555fc51c2 in cpu_loop_exec_tb (tb_exit=0x7ffff4bfb744, last_tb=<synthetic pointer>, pc=<optimized out>, tb=0x7fffb6894000 <code_gen_buffer+200511443>, cpu=0x555557ededf0) at ../../accel/tcg/cpu-exec.c:904
#4  cpu_exec_loop (cpu=cpu@entry=0x555557ededf0, sc=sc@entry=0x7ffff4bfb7f0) at ../../accel/tcg/cpu-exec.c:1018
#5  0x0000555555fc58f1 in cpu_exec_setjmp (cpu=cpu@entry=0x555557ededf0, sc=sc@entry=0x7ffff4bfb7f0) at ../../accel/tcg/cpu-exec.c:1035
#6  0x0000555555fc5f6c in cpu_exec (cpu=cpu@entry=0x555557ededf0) at ../../accel/tcg/cpu-exec.c:1061 
#7  0x0000555556146ac3 in tcg_cpu_exec (cpu=cpu@entry=0x555557ededf0) at ../../accel/tcg/tcg-accel-ops.c:81
#8  0x0000555556146ee3 in mttcg_cpu_thread_fn (arg=arg@entry=0x555557ededf0) at ../../accel/tcg/tcg-accel-ops-mttcg.c:94
#9  0x00005555561f6450 in qemu_thread_start (args=0x555557f8f430) at ../../util/qemu-thread-posix.c:541
#10 0x00007ffff7750aa4 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:447
#11 0x00007ffff77ddc3c in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:78  

I haven't pushed out the rebased tree yet making this a truly awful bug report.

The pull request you sent with this in wasn't bisectable so this was a bit of a guessing
game. I see the seg fault only after this patch.

> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  include/exec/tlb-flags.h | 17 +++++++++--------
>  1 file changed, 9 insertions(+), 8 deletions(-)
> 
> diff --git a/include/exec/tlb-flags.h b/include/exec/tlb-flags.h
> index a0e51a4b37..54a6bae768 100644
> --- a/include/exec/tlb-flags.h
> +++ b/include/exec/tlb-flags.h
> @@ -53,20 +53,15 @@
>   * contain the page physical address.
>   */
>  #define TLB_NOTDIRTY        (1 << (TARGET_PAGE_BITS_MIN - 2))
> -/* Set if TLB entry is an IO callback.  */
> -#define TLB_MMIO            (1 << (TARGET_PAGE_BITS_MIN - 3))
> -/* Set if TLB entry writes ignored.  */
> -#define TLB_DISCARD_WRITE   (1 << (TARGET_PAGE_BITS_MIN - 4))
>  /* Set if the slow path must be used; more flags in CPUTLBEntryFull. */
> -#define TLB_FORCE_SLOW      (1 << (TARGET_PAGE_BITS_MIN - 5))
> +#define TLB_FORCE_SLOW      (1 << (TARGET_PAGE_BITS_MIN - 3))
>  
>  /*
>   * Use this mask to check interception with an alignment mask
>   * in a TCG backend.
>   */
>  #define TLB_FLAGS_MASK \
> -    (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO \
> -    | TLB_FORCE_SLOW | TLB_DISCARD_WRITE)
> +    (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_FORCE_SLOW)
>  
>  /*
>   * Flags stored in CPUTLBEntryFull.slow_flags[x].
> @@ -78,8 +73,14 @@
>  #define TLB_WATCHPOINT       (1 << 1)
>  /* Set if TLB entry requires aligned accesses.  */
>  #define TLB_CHECK_ALIGNED    (1 << 2)
> +/* Set if TLB entry writes ignored.  */
> +#define TLB_DISCARD_WRITE    (1 << 3)
> +/* Set if TLB entry is an IO callback.  */
> +#define TLB_MMIO             (1 << 4)
>  
> -#define TLB_SLOW_FLAGS_MASK  (TLB_BSWAP | TLB_WATCHPOINT | TLB_CHECK_ALIGNED)
> +#define TLB_SLOW_FLAGS_MASK \
> +    (TLB_BSWAP | TLB_WATCHPOINT | TLB_CHECK_ALIGNED | \
> +     TLB_DISCARD_WRITE | TLB_MMIO)
>  
>  /* The two sets of flags must not overlap. */
>  QEMU_BUILD_BUG_ON(TLB_FLAGS_MASK & TLB_SLOW_FLAGS_MASK);
Alistair Francis April 29, 2025, 9:35 p.m. UTC | #3
On Sat, Apr 26, 2025 at 3:36 AM Jonathan Cameron via
<qemu-devel@nongnu.org> wrote:
>
> On Tue, 22 Apr 2025 12:26:55 -0700
> Richard Henderson <richard.henderson@linaro.org> wrote:
>
> > Recover two bits from the inline flags.
>
>
> Hi Richard,
>
> Early days but something (I'm fairly sure in this patch) is tripping up my favourite
> TCG corner case of running code out of MMIO memory (interleaved CXL memory).
>
> Only seeing it on arm64 tests so far which isn't upstream yet..
> (guess what I was getting ready to post today)
>
> Back trace is:
>
> #0  0x0000555555fd4296 in cpu_atomic_fetch_andq_le_mmu (env=0x555557ee19b0, addr=18442241572520067072, val=18446744073701163007, oi=8244, retaddr=<optimized out>) at ../../accel/tcg/atomic_template.h:140
> #1  0x00007fffb6894125 in code_gen_buffer ()
> #2  0x0000555555fc4c46 in cpu_tb_exec (cpu=cpu@entry=0x555557ededf0, itb=itb@entry=0x7fffb6894000 <code_gen_buffer+200511443>, tb_exit=tb_exit@entry=0x7ffff4bfb744) at ../../accel/tcg/cpu-exec.c:455
> #3  0x0000555555fc51c2 in cpu_loop_exec_tb (tb_exit=0x7ffff4bfb744, last_tb=<synthetic pointer>, pc=<optimized out>, tb=0x7fffb6894000 <code_gen_buffer+200511443>, cpu=0x555557ededf0) at ../../accel/tcg/cpu-exec.c:904
> #4  cpu_exec_loop (cpu=cpu@entry=0x555557ededf0, sc=sc@entry=0x7ffff4bfb7f0) at ../../accel/tcg/cpu-exec.c:1018
> #5  0x0000555555fc58f1 in cpu_exec_setjmp (cpu=cpu@entry=0x555557ededf0, sc=sc@entry=0x7ffff4bfb7f0) at ../../accel/tcg/cpu-exec.c:1035
> #6  0x0000555555fc5f6c in cpu_exec (cpu=cpu@entry=0x555557ededf0) at ../../accel/tcg/cpu-exec.c:1061
> #7  0x0000555556146ac3 in tcg_cpu_exec (cpu=cpu@entry=0x555557ededf0) at ../../accel/tcg/tcg-accel-ops.c:81
> #8  0x0000555556146ee3 in mttcg_cpu_thread_fn (arg=arg@entry=0x555557ededf0) at ../../accel/tcg/tcg-accel-ops-mttcg.c:94
> #9  0x00005555561f6450 in qemu_thread_start (args=0x555557f8f430) at ../../util/qemu-thread-posix.c:541
> #10 0x00007ffff7750aa4 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:447
> #11 0x00007ffff77ddc3c in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:78
>
> I haven't pushed out the rebased tree yet making this a truly awful bug report.
>
> The pull request you sent with this in wasn't bisectable so this was a bit of a guessing
> game. I see the seg fault only after this patch.

I see the same thing with some RISC-V tests. I can provide the test
images if you want as well

build/qemu-system-riscv64 -machine virt -cpu rv64,h=false -m 1G \
    -serial mon:stdio -serial null -nographic \
    -append "root=/dev/vda ro" \
    -netdev user,id=net0 -device virtio-net-device,netdev=net0 \
    -smp 4 -d guest_errors \
    -bios none \
    -device loader,file=./images/qemuriscv64/buildroot/Image,addr=0x80200000 \
    -kernel ./images/qemuriscv64/buildroot/fw_jump.elf \
    -drive id=disk0,file=./images/qemuriscv64/buildroot/rootfs.ext2,if=none,format=raw
\
    -device virtio-blk-device,drive=disk0


#0  0x000055555598b0f1 in cpu_atomic_xchgl_le_mmu (env=0x5555567ff290,
addr=33554444, val=0, oi=3619, retaddr=<optimized out>)
   at ../accel/tcg/atomic_template.h:111
#1  0x00007fffb2c5e537 in code_gen_buffer ()
#2  0x000055555597c661 in cpu_tb_exec
   (cpu=cpu@entry=0x5555567fc6d0, itb=itb@entry=0x7fffb2c5e400
<code_gen_buffer+113632211>, tb_exit=tb_exit@entry=0x7fff47ffe764)
   at ../accel/tcg/cpu-exec.c:453
#3  0x000055555597cb4a in cpu_loop_exec_tb
   (cpu=0x5555567fc6d0, tb=0x7fffb2c5e400 <code_gen_buffer+113632211>,
pc=<optimized out>, last_tb=<synthetic pointer>,
tb_exit=0x7fff47ffe764)
   at ../accel/tcg/cpu-exec.c:903
#4  cpu_exec_loop (cpu=cpu@entry=0x5555567fc6d0,
sc=sc@entry=0x7fff47ffe810) at ../accel/tcg/cpu-exec.c:1017
#5  0x000055555597d23d in cpu_exec_setjmp
(cpu=cpu@entry=0x5555567fc6d0, sc=sc@entry=0x7fff47ffe810) at
../accel/tcg/cpu-exec.c:1034
#6  0x000055555597d909 in cpu_exec (cpu=cpu@entry=0x5555567fc6d0) at
../accel/tcg/cpu-exec.c:1060
#7  0x0000555555af1c62 in tcg_cpu_exec (cpu=cpu@entry=0x5555567fc6d0)
at ../accel/tcg/tcg-accel-ops.c:81
#8  0x0000555555af2012 in mttcg_cpu_thread_fn (arg=0x5555567fc6d0) at
../accel/tcg/tcg-accel-ops-mttcg.c:94
#9  0x0000555555b956c7 in qemu_thread_start (args=0x5555569e8da0) at
../util/qemu-thread-posix.c:541
#10 0x00007ffff77f2f14 in start_thread () at /lib64/libc.so.6
#11 0x00007ffff7875aac in __clone3 () at /lib64/libc.so.6

Alistair
Richard Henderson April 30, 2025, 2:43 a.m. UTC | #4
On 4/29/25 14:35, Alistair Francis wrote:
> On Sat, Apr 26, 2025 at 3:36 AM Jonathan Cameron via
> <qemu-devel@nongnu.org> wrote:
>>
>> On Tue, 22 Apr 2025 12:26:55 -0700
>> Richard Henderson <richard.henderson@linaro.org> wrote:
>>
>>> Recover two bits from the inline flags.
>>
>>
>> Hi Richard,
>>
>> Early days but something (I'm fairly sure in this patch) is tripping up my favourite
>> TCG corner case of running code out of MMIO memory (interleaved CXL memory).
>>
>> Only seeing it on arm64 tests so far which isn't upstream yet..
>> (guess what I was getting ready to post today)
>>
>> Back trace is:
>>
>> #0  0x0000555555fd4296 in cpu_atomic_fetch_andq_le_mmu (env=0x555557ee19b0, addr=18442241572520067072, val=18446744073701163007, oi=8244, retaddr=<optimized out>) at ../../accel/tcg/atomic_template.h:140
>> #1  0x00007fffb6894125 in code_gen_buffer ()
>> #2  0x0000555555fc4c46 in cpu_tb_exec (cpu=cpu@entry=0x555557ededf0, itb=itb@entry=0x7fffb6894000 <code_gen_buffer+200511443>, tb_exit=tb_exit@entry=0x7ffff4bfb744) at ../../accel/tcg/cpu-exec.c:455
>> #3  0x0000555555fc51c2 in cpu_loop_exec_tb (tb_exit=0x7ffff4bfb744, last_tb=<synthetic pointer>, pc=<optimized out>, tb=0x7fffb6894000 <code_gen_buffer+200511443>, cpu=0x555557ededf0) at ../../accel/tcg/cpu-exec.c:904
>> #4  cpu_exec_loop (cpu=cpu@entry=0x555557ededf0, sc=sc@entry=0x7ffff4bfb7f0) at ../../accel/tcg/cpu-exec.c:1018
>> #5  0x0000555555fc58f1 in cpu_exec_setjmp (cpu=cpu@entry=0x555557ededf0, sc=sc@entry=0x7ffff4bfb7f0) at ../../accel/tcg/cpu-exec.c:1035
>> #6  0x0000555555fc5f6c in cpu_exec (cpu=cpu@entry=0x555557ededf0) at ../../accel/tcg/cpu-exec.c:1061
>> #7  0x0000555556146ac3 in tcg_cpu_exec (cpu=cpu@entry=0x555557ededf0) at ../../accel/tcg/tcg-accel-ops.c:81
>> #8  0x0000555556146ee3 in mttcg_cpu_thread_fn (arg=arg@entry=0x555557ededf0) at ../../accel/tcg/tcg-accel-ops-mttcg.c:94
>> #9  0x00005555561f6450 in qemu_thread_start (args=0x555557f8f430) at ../../util/qemu-thread-posix.c:541
>> #10 0x00007ffff7750aa4 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:447
>> #11 0x00007ffff77ddc3c in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:78
>>
>> I haven't pushed out the rebased tree yet making this a truly awful bug report.
>>
>> The pull request you sent with this in wasn't bisectable so this was a bit of a guessing
>> game. I see the seg fault only after this patch.
> 
> I see the same thing with some RISC-V tests. I can provide the test
> images if you want as well


Yes please.


r~
Jonathan Cameron May 8, 2025, 1:29 p.m. UTC | #5
On Tue, 29 Apr 2025 19:43:05 -0700
Richard Henderson <richard.henderson@linaro.org> wrote:

> On 4/29/25 14:35, Alistair Francis wrote:
> > On Sat, Apr 26, 2025 at 3:36 AM Jonathan Cameron via
> > <qemu-devel@nongnu.org> wrote:  
> >>
> >> On Tue, 22 Apr 2025 12:26:55 -0700
> >> Richard Henderson <richard.henderson@linaro.org> wrote:
> >>  
> >>> Recover two bits from the inline flags.  
> >>
> >>
> >> Hi Richard,
> >>
> >> Early days but something (I'm fairly sure in this patch) is tripping up my favourite
> >> TCG corner case of running code out of MMIO memory (interleaved CXL memory).
> >>
> >> Only seeing it on arm64 tests so far which isn't upstream yet..
> >> (guess what I was getting ready to post today)
> >>
> >> Back trace is:
> >>
> >> #0  0x0000555555fd4296 in cpu_atomic_fetch_andq_le_mmu (env=0x555557ee19b0, addr=18442241572520067072, val=18446744073701163007, oi=8244, retaddr=<optimized out>) at ../../accel/tcg/atomic_template.h:140
> >> #1  0x00007fffb6894125 in code_gen_buffer ()
> >> #2  0x0000555555fc4c46 in cpu_tb_exec (cpu=cpu@entry=0x555557ededf0, itb=itb@entry=0x7fffb6894000 <code_gen_buffer+200511443>, tb_exit=tb_exit@entry=0x7ffff4bfb744) at ../../accel/tcg/cpu-exec.c:455
> >> #3  0x0000555555fc51c2 in cpu_loop_exec_tb (tb_exit=0x7ffff4bfb744, last_tb=<synthetic pointer>, pc=<optimized out>, tb=0x7fffb6894000 <code_gen_buffer+200511443>, cpu=0x555557ededf0) at ../../accel/tcg/cpu-exec.c:904
> >> #4  cpu_exec_loop (cpu=cpu@entry=0x555557ededf0, sc=sc@entry=0x7ffff4bfb7f0) at ../../accel/tcg/cpu-exec.c:1018
> >> #5  0x0000555555fc58f1 in cpu_exec_setjmp (cpu=cpu@entry=0x555557ededf0, sc=sc@entry=0x7ffff4bfb7f0) at ../../accel/tcg/cpu-exec.c:1035
> >> #6  0x0000555555fc5f6c in cpu_exec (cpu=cpu@entry=0x555557ededf0) at ../../accel/tcg/cpu-exec.c:1061
> >> #7  0x0000555556146ac3 in tcg_cpu_exec (cpu=cpu@entry=0x555557ededf0) at ../../accel/tcg/tcg-accel-ops.c:81
> >> #8  0x0000555556146ee3 in mttcg_cpu_thread_fn (arg=arg@entry=0x555557ededf0) at ../../accel/tcg/tcg-accel-ops-mttcg.c:94
> >> #9  0x00005555561f6450 in qemu_thread_start (args=0x555557f8f430) at ../../util/qemu-thread-posix.c:541
> >> #10 0x00007ffff7750aa4 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:447
> >> #11 0x00007ffff77ddc3c in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:78
> >>
> >> I haven't pushed out the rebased tree yet making this a truly awful bug report.
> >>
> >> The pull request you sent with this in wasn't bisectable so this was a bit of a guessing
> >> game. I see the seg fault only after this patch.  
> > 
> > I see the same thing with some RISC-V tests. I can provide the test
> > images if you want as well  
> 
> 
> Yes please.
> 
> 
> r~

I'm guessing Alastair is busy.

I got around to testing this on x86 and indeed blow up is the same.

0x0000555555e3dd77 in cpu_atomic_add_fetchl_le_mmu (env=0x55555736bef0, addr=140271756837240, val=1, oi=34, retaddr=<optimized out>) at ../../accel/tcg/atomic_template.h:143
143     GEN_ATOMIC_HELPER(add_fetch)
(gdb) bt
#0  0x0000555555e3dd77 in cpu_atomic_add_fetchl_le_mmu (env=0x55555736bef0, addr=140271756837240, val=1, oi=34, retaddr=<optimized out>) at ../../accel/tcg/atomic_template.h:143
#1  0x00007fffbc31c6f0 in code_gen_buffer ()
#2  0x0000555555e23aa6 in cpu_tb_exec (cpu=cpu@entry=0x555557369330, itb=itb@entry=0x7fffbc31c600 <code_gen_buffer+295441875>, tb_exit=tb_exit@entry=0x7ffff4bfd6ec) at ../../accel/tcg/cpu-exec.c:438
#3  0x0000555555e24025 in cpu_loop_exec_tb (tb_exit=0x7ffff4bfd6ec, last_tb=<synthetic pointer>, pc=<optimized out>, tb=0x7fffbc31c600 <code_gen_buffer+295441875>, cpu=0x555557369330) at ../../accel/tcg/cpu-exec.c:872
#4  cpu_exec_loop (cpu=cpu@entry=0x555557369330, sc=sc@entry=0x7ffff4bfd7b0) at ../../accel/tcg/cpu-exec.c:982
#5  0x0000555555e247a1 in cpu_exec_setjmp (cpu=cpu@entry=0x555557369330, sc=sc@entry=0x7ffff4bfd7b0) at ../../accel/tcg/cpu-exec.c:999
#6  0x0000555555e24e2c in cpu_exec (cpu=cpu@entry=0x555557369330) at ../../accel/tcg/cpu-exec.c:1025
#7  0x0000555555e42c73 in tcg_cpu_exec (cpu=cpu@entry=0x555557369330) at ../../accel/tcg/tcg-accel-ops.c:81
#8  0x0000555555e43093 in mttcg_cpu_thread_fn (arg=arg@entry=0x555557369330) at ../../accel/tcg/tcg-accel-ops-mttcg.c:94
#9  0x0000555555ef2250 in qemu_thread_start (args=0x5555573e6e20) at ../../util/qemu-thread-posix.c:541
#10 0x00007ffff7750aa4 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:447
#11 0x00007ffff77ddc3c in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:78

Need one patch for my particular setup to work around some DMA buffer issues in virtio (similar to
a patch for pci space last year).  I've been meaning to post an RFC to get feedback on how
to handle this but not gotten to it yet!

From 801e47897c5959a22ed050d7e7feebbbd3a12588 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Mon, 22 Apr 2024 13:54:37 +0100
Subject: [PATCH] physmem: Increase bounce buffers for "memory" address space.

Doesn't need to be this big and should be configurable.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 system/physmem.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/system/physmem.c b/system/physmem.c
index 3f4fd69d9a..651b875827 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -2798,6 +2798,7 @@ static void memory_map_init(void)
     memory_region_init(system_memory, NULL, "system", UINT64_MAX);
     address_space_init(&address_space_memory, system_memory, "memory");
 
+    address_space_memory.max_bounce_buffer_size = 1024 * 1024 * 1024;
     system_io = g_malloc(sizeof(*system_io));
     memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
                           65536);
diff mbox series

Patch

diff --git a/include/exec/tlb-flags.h b/include/exec/tlb-flags.h
index a0e51a4b37..54a6bae768 100644
--- a/include/exec/tlb-flags.h
+++ b/include/exec/tlb-flags.h
@@ -53,20 +53,15 @@ 
  * contain the page physical address.
  */
 #define TLB_NOTDIRTY        (1 << (TARGET_PAGE_BITS_MIN - 2))
-/* Set if TLB entry is an IO callback.  */
-#define TLB_MMIO            (1 << (TARGET_PAGE_BITS_MIN - 3))
-/* Set if TLB entry writes ignored.  */
-#define TLB_DISCARD_WRITE   (1 << (TARGET_PAGE_BITS_MIN - 4))
 /* Set if the slow path must be used; more flags in CPUTLBEntryFull. */
-#define TLB_FORCE_SLOW      (1 << (TARGET_PAGE_BITS_MIN - 5))
+#define TLB_FORCE_SLOW      (1 << (TARGET_PAGE_BITS_MIN - 3))
 
 /*
  * Use this mask to check interception with an alignment mask
  * in a TCG backend.
  */
 #define TLB_FLAGS_MASK \
-    (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO \
-    | TLB_FORCE_SLOW | TLB_DISCARD_WRITE)
+    (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_FORCE_SLOW)
 
 /*
  * Flags stored in CPUTLBEntryFull.slow_flags[x].
@@ -78,8 +73,14 @@ 
 #define TLB_WATCHPOINT       (1 << 1)
 /* Set if TLB entry requires aligned accesses.  */
 #define TLB_CHECK_ALIGNED    (1 << 2)
+/* Set if TLB entry writes ignored.  */
+#define TLB_DISCARD_WRITE    (1 << 3)
+/* Set if TLB entry is an IO callback.  */
+#define TLB_MMIO             (1 << 4)
 
-#define TLB_SLOW_FLAGS_MASK  (TLB_BSWAP | TLB_WATCHPOINT | TLB_CHECK_ALIGNED)
+#define TLB_SLOW_FLAGS_MASK \
+    (TLB_BSWAP | TLB_WATCHPOINT | TLB_CHECK_ALIGNED | \
+     TLB_DISCARD_WRITE | TLB_MMIO)
 
 /* The two sets of flags must not overlap. */
 QEMU_BUILD_BUG_ON(TLB_FLAGS_MASK & TLB_SLOW_FLAGS_MASK);