@@ -8,16 +8,27 @@
#ifndef QEMU_CACHEFLUSH_H
#define QEMU_CACHEFLUSH_H
+/**
+ * flush_idcache_range:
+ * @rx: instruction address
+ * @rw: data address
+ * @len: length to flush
+ *
+ * Flush @len bytes of the data cache at @rw and the icache at @rx
+ * to bring them in sync. The two addresses may be different virtual
+ * mappings of the same physical page(s).
+ */
+
#if defined(__i386__) || defined(__x86_64__) || defined(__s390__)
-static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
+static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
{
/* icache is coherent and does not require flushing. */
}
#else
-void flush_icache_range(uintptr_t start, uintptr_t stop);
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len);
#endif
@@ -2947,7 +2947,7 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
invalidate_and_set_dirty(mr, addr1, l);
break;
case FLUSH_CACHE:
- flush_icache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr + l);
+ flush_idcache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr, l);
break;
}
}
@@ -1079,7 +1079,8 @@ void tcg_prologue_init(TCGContext *s)
buf1 = s->code_ptr;
#ifndef CONFIG_TCG_INTERPRETER
- flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
+ flush_idcache_range((uintptr_t)buf0, (uintptr_t)buf0,
+ tcg_ptr_byte_diff(buf1, buf0));
#endif
/* Deduct the prologue from the buffer. */
@@ -4324,7 +4325,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
#ifndef CONFIG_TCG_INTERPRETER
/* flush instruction cache */
- flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
+ flush_idcache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_buf,
+ tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
#endif
return tcg_current_code_size(s);
@@ -21,29 +21,32 @@
#include <sys/cachectl.h>
#endif
-void flush_icache_range(uintptr_t start, uintptr_t stop)
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
{
- cacheflush((void *)start, stop - start, ICACHE);
+ if (rx != rw) {
+ cacheflush((void *)rw, len, DCACHE);
+ }
+ cacheflush((void *)rx, len, ICACHE);
}
#elif defined(__powerpc__)
-void flush_icache_range(uintptr_t start, uintptr_t stop)
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
{
- uintptr_t p, start1, stop1;
+ uintptr_t p, b, e;
size_t dsize = qemu_dcache_linesize;
size_t isize = qemu_icache_linesize;
- start1 = start & ~(dsize - 1);
- stop1 = (stop + dsize - 1) & ~(dsize - 1);
- for (p = start1; p < stop1; p += dsize) {
+ b = rw & ~(dsize - 1);
+ e = (rw + len + dsize - 1) & ~(dsize - 1);
+ for (p = b; p < e; p += dsize) {
asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
}
asm volatile ("sync" : : : "memory");
- start &= start & ~(isize - 1);
- stop1 = (stop + isize - 1) & ~(isize - 1);
- for (p = start1; p < stop1; p += isize) {
+ b = rx & ~(isize - 1);
+ e = (rx + len + isize - 1) & ~(isize - 1);
+ for (p = b; p < e; p += isize) {
asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
}
asm volatile ("sync" : : : "memory");
@@ -52,20 +55,23 @@ void flush_icache_range(uintptr_t start, uintptr_t stop)
#elif defined(__sparc__)
-void flush_icache_range(uintptr_t start, uintptr_t stop)
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
{
- uintptr_t p;
-
- for (p = start & -8; p < ((stop + 7) & -8); p += 8) {
+ /* No additional data flush to the RW virtual address required. */
+ uintptr_t p, end = (rx + len + 7) & -8;
+ for (p = rx & -8; p < end; p += 8) {
__asm__ __volatile__("flush\t%0" : : "r" (p));
}
}
#else
-void flush_icache_range(uintptr_t start, uintptr_t stop)
+void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
{
- __builtin___clear_cache((char *)start, (char *)stop);
+ if (rw != rx) {
+ __builtin___clear_cache((char *)rw, (char *)rw + len);
+ }
+ __builtin___clear_cache((char *)rx, (char *)rx + len);
}
#endif
@@ -166,9 +166,11 @@ static void fallback_cache_info(int *isize, int *dsize)
*isize = *dsize;
} else {
#if defined(_ARCH_PPC)
- /* For PPC, we're going to use the icache size computed for
- flush_icache_range. Which means that we must use the
- architecture minimum. */
+ /*
+ * For PPC, we're going to use the cache sizes computed for
+ * flush_idcache_range. Which means that we must use the
+ * architecture minimum.
+ */
*isize = *dsize = 16;
#else
/* Otherwise, 64 bytes is not uncommon. */
@@ -1363,7 +1363,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
}
pair = (uint64_t)i2 << 32 | i1;
qatomic_set((uint64_t *)jmp_addr, pair);
- flush_icache_range(jmp_addr, jmp_addr + 8);
+ flush_idcache_range(jmp_addr, jmp_addr, 8);
}
static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
@@ -2660,7 +2660,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
uintptr_t addr)
{
qatomic_set((uint32_t *)jmp_addr, deposit32(OPC_J, 0, 26, addr >> 2));
- flush_icache_range(jmp_addr, jmp_addr + 4);
+ flush_idcache_range(jmp_addr, jmp_addr, 4);
}
typedef struct {
@@ -1753,12 +1753,12 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
/* As per the enclosing if, this is ppc64. Avoid the _Static_assert
within qatomic_set that would fail to build a ppc32 host. */
qatomic_set__nocheck((uint64_t *)jmp_addr, pair);
- flush_icache_range(jmp_addr, jmp_addr + 8);
+ flush_idcache_range(jmp_addr, jmp_addr, 8);
} else {
intptr_t diff = addr - jmp_addr;
tcg_debug_assert(in_range_b(diff));
qatomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc));
- flush_icache_range(jmp_addr, jmp_addr + 4);
+ flush_idcache_range(jmp_addr, jmp_addr, 4);
}
}
@@ -1836,7 +1836,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
if (!USE_REG_TB) {
qatomic_set((uint32_t *)jmp_addr,
deposit32(CALL, 0, 30, br_disp >> 2));
- flush_icache_range(jmp_addr, jmp_addr + 4);
+ flush_idcache_range(jmp_addr, jmp_addr, 4);
return;
}
@@ -1860,5 +1860,5 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
}
qatomic_set((uint64_t *)jmp_addr, deposit64(i2, 32, 32, i1));
- flush_icache_range(jmp_addr, jmp_addr + 8);
+ flush_idcache_range(jmp_addr, jmp_addr, 8);
}