@@ -2003,140 +2003,6 @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
[MO_BEUQ] = helper_be_stq_mmu,
};
-/* We expect to use a 16-bit negative offset from ENV. */
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
-
-/* Perform the TLB load and compare. Places the result of the comparison
- in CR7, loads the addend of the TLB into R3, and returns the register
- containing the guest address (zero-extended into R4). Clobbers R0 and R2. */
-
-static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
- TCGReg addrlo, TCGReg addrhi,
- int mem_index, bool is_read)
-{
- int cmp_off
- = (is_read
- ? offsetof(CPUTLBEntry, addr_read)
- : offsetof(CPUTLBEntry, addr_write));
- int fast_off = TLB_MASK_TABLE_OFS(mem_index);
- int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
- int table_off = fast_off + offsetof(CPUTLBDescFast, table);
- unsigned s_bits = opc & MO_SIZE;
- unsigned a_bits = get_alignment_bits(opc);
-
- /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
-
- /* Extract the page index, shifted into place for tlb index. */
- if (TCG_TARGET_REG_BITS == 32) {
- tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
- } else {
- tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
- }
- tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
-
- /* Load the TLB comparator. */
- if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
- uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
- ? LWZUX : LDUX);
- tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
- } else {
- tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
- tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
- } else {
- tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
- }
- }
-
- /* Load the TLB addend for use on the fast path. Do this asap
- to minimize any load use delay. */
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3,
- offsetof(CPUTLBEntry, addend));
-
- /* Clear the non-page, non-alignment bits from the address */
- if (TCG_TARGET_REG_BITS == 32) {
- /* We don't support unaligned accesses on 32-bits.
- * Preserve the bottom bits and thus trigger a comparison
- * failure on unaligned accesses.
- */
- if (a_bits < s_bits) {
- a_bits = s_bits;
- }
- tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
- (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
- } else {
- TCGReg t = addrlo;
-
- /* If the access is unaligned, we need to make sure we fail if we
- * cross a page boundary. The trick is to add the access size-1
- * to the address before masking the low bits. That will make the
- * address overflow to the next page if we cross a page boundary,
- * which will then force a mismatch of the TLB compare.
- */
- if (a_bits < s_bits) {
- unsigned a_mask = (1 << a_bits) - 1;
- unsigned s_mask = (1 << s_bits) - 1;
- tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
- t = TCG_REG_R0;
- }
-
- /* Mask the address for the requested alignment. */
- if (TARGET_LONG_BITS == 32) {
- tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
- (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
- /* Zero-extend the address for use in the final address. */
- tcg_out_ext32u(s, TCG_REG_R4, addrlo);
- addrlo = TCG_REG_R4;
- } else if (a_bits == 0) {
- tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
- } else {
- tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
- 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
- tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
- }
- }
-
- if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
- tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
- 0, 7, TCG_TYPE_I32);
- tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
- tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
- } else {
- tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
- 0, 7, TCG_TYPE_TL);
- }
-
- return addrlo;
-}
-
-/* Record the context of a call to the out of line helper code for the slow
- path for a load or store, so that we can later generate the correct
- helper code. */
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
- TCGType type, MemOpIdx oi,
- TCGReg datalo_reg, TCGReg datahi_reg,
- TCGReg addrlo_reg, TCGReg addrhi_reg,
- tcg_insn_unit *raddr, tcg_insn_unit *lptr)
-{
- TCGLabelQemuLdst *label = new_ldst_label(s);
-
- label->is_ld = is_ld;
- label->type = type;
- label->oi = oi;
- label->datalo_reg = datalo_reg;
- label->datahi_reg = datahi_reg;
- label->addrlo_reg = addrlo_reg;
- label->addrhi_reg = addrhi_reg;
- label->raddr = tcg_splitwx_to_rx(raddr);
- label->label_ptr[0] = lptr;
-}
-
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
MemOpIdx oi = lb->oi;
@@ -2225,27 +2091,6 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
return true;
}
#else
-
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
- TCGReg addrhi, unsigned a_bits)
-{
- unsigned a_mask = (1 << a_bits) - 1;
- TCGLabelQemuLdst *label = new_ldst_label(s);
-
- label->is_ld = is_ld;
- label->addrlo_reg = addrlo;
- label->addrhi_reg = addrhi;
-
- /* We are expecting a_bits to max out at 7, much lower than ANDI. */
- tcg_debug_assert(a_bits < 16);
- tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, a_mask));
-
- label->label_ptr[0] = s->code_ptr;
- tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
-
- label->raddr = tcg_splitwx_to_rx(s->code_ptr);
-}
-
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
if (!reloc_pc14(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
@@ -2294,37 +2139,171 @@ typedef struct {
TCGReg index;
} HostAddress;
+/*
+ * For softmmu, perform the TLB load and compare.
+ * For useronly, perform any required alignment tests.
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
+ * is required and fill in @h with the host address for the fast path.
+ */
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
+ TCGReg addrlo, TCGReg addrhi,
+ MemOpIdx oi, bool is_ld)
+{
+ TCGLabelQemuLdst *ldst = NULL;
+ MemOp opc = get_memop(oi);
+ unsigned a_bits = get_alignment_bits(opc);
+
+#ifdef CONFIG_SOFTMMU
+ int mem_index = get_mmuidx(oi);
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
+ : offsetof(CPUTLBEntry, addr_write);
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
+ int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
+ int table_off = fast_off + offsetof(CPUTLBDescFast, table);
+ unsigned s_bits = opc & MO_SIZE;
+
+ ldst = new_ldst_label(s);
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addrlo;
+ ldst->addrhi_reg = addrhi;
+
+ /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx]. */
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
+
+ /* Extract the page index, shifted into place for tlb index. */
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+ } else {
+ tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+ }
+ tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
+
+ /* Load the TLB comparator. */
+ if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
+ uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
+ ? LWZUX : LDUX);
+ tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
+ } else {
+ tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
+ } else {
+ tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
+ }
+ }
+
+ /*
+ * Load the TLB addend for use on the fast path.
+ * Do this asap to minimize any load use delay.
+ */
+ h->base = TCG_REG_R3;
+ tcg_out_ld(s, TCG_TYPE_PTR, h->base, TCG_REG_R3,
+ offsetof(CPUTLBEntry, addend));
+
+ /* Clear the non-page, non-alignment bits from the address */
+ if (TCG_TARGET_REG_BITS == 32) {
+ /*
+ * We don't support unaligned accesses on 32-bits.
+ * Preserve the bottom bits and thus trigger a comparison
+ * failure on unaligned accesses.
+ */
+ if (a_bits < s_bits) {
+ a_bits = s_bits;
+ }
+ tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
+ (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
+ } else {
+ TCGReg t = addrlo;
+
+ /*
+ * If the access is unaligned, we need to make sure we fail if we
+ * cross a page boundary. The trick is to add the access size-1
+ * to the address before masking the low bits. That will make the
+ * address overflow to the next page if we cross a page boundary,
+ * which will then force a mismatch of the TLB compare.
+ */
+ if (a_bits < s_bits) {
+ unsigned a_mask = (1 << a_bits) - 1;
+ unsigned s_mask = (1 << s_bits) - 1;
+ tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
+ t = TCG_REG_R0;
+ }
+
+ /* Mask the address for the requested alignment. */
+ if (TARGET_LONG_BITS == 32) {
+ tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
+ (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
+ /* Zero-extend the address for use in the final address. */
+ tcg_out_ext32u(s, TCG_REG_R4, addrlo);
+ addrlo = TCG_REG_R4;
+ } else if (a_bits == 0) {
+ tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
+ } else {
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
+ 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
+ tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
+ }
+ }
+ h->index = addrlo;
+
+ if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
+ 0, 7, TCG_TYPE_I32);
+ tcg_out_cmp(s, TCG_COND_EQ, addrhi, TCG_REG_R4, 0, 6, TCG_TYPE_I32);
+ tcg_out32(s, CRAND | BT(7, CR_EQ) | BA(6, CR_EQ) | BB(7, CR_EQ));
+ } else {
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_R0, TCG_REG_TMP1,
+ 0, 7, TCG_TYPE_TL);
+ }
+
+ /* Load a pointer into the current opcode w/conditional branch-link. */
+ ldst->label_ptr[0] = s->code_ptr;
+ tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
+#else
+ if (a_bits) {
+ ldst = new_ldst_label(s);
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addrlo;
+ ldst->addrhi_reg = addrhi;
+
+ /* We are expecting a_bits to max out at 7, much lower than ANDI. */
+ tcg_debug_assert(a_bits < 16);
+ tcg_out32(s, ANDI | SAI(addrlo, TCG_REG_R0, (1 << a_bits) - 1));
+
+ ldst->label_ptr[0] = s->code_ptr;
+ tcg_out32(s, BC | BI(0, CR_EQ) | BO_COND_FALSE | LK);
+ }
+
+ h->base = guest_base ? TCG_GUEST_BASE_REG : 0;
+ h->index = addrlo;
+ if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+ tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
+ h->index = TCG_REG_TMP1;
+ }
+#endif
+
+ return ldst;
+}
+
static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg addrlo, TCGReg addrhi,
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
- MemOp s_bits = opc & MO_SIZE;
+ TCGLabelQemuLdst *ldst;
HostAddress h;
-#ifdef CONFIG_SOFTMMU
- tcg_insn_unit *label_ptr;
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
- h.index = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), true);
- h.base = TCG_REG_R3;
-
- /* Load a pointer into the current opcode w/conditional branch-link. */
- label_ptr = s->code_ptr;
- tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
-#else /* !CONFIG_SOFTMMU */
- unsigned a_bits = get_alignment_bits(opc);
- if (a_bits) {
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
- }
- h.base = guest_base ? TCG_GUEST_BASE_REG : 0;
- h.index = addrlo;
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
- tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
- h.index = TCG_REG_TMP1;
- }
-#endif
-
- if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
+ if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
if (opc & MO_BSWAP) {
tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
tcg_out32(s, LWBRX | TAB(datalo, h.base, h.index));
@@ -2357,10 +2336,12 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
}
}
-#ifdef CONFIG_SOFTMMU
- add_qemu_ldst_label(s, true, data_type, oi, datalo, datahi,
- addrlo, addrhi, s->code_ptr, label_ptr);
-#endif
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = datalo;
+ ldst->datahi_reg = datahi;
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
+ }
}
static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
@@ -2368,32 +2349,12 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
- MemOp s_bits = opc & MO_SIZE;
+ TCGLabelQemuLdst *ldst;
HostAddress h;
-#ifdef CONFIG_SOFTMMU
- tcg_insn_unit *label_ptr;
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
- h.index = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), false);
- h.base = TCG_REG_R3;
-
- /* Load a pointer into the current opcode w/conditional branch-link. */
- label_ptr = s->code_ptr;
- tcg_out32(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
-#else /* !CONFIG_SOFTMMU */
- unsigned a_bits = get_alignment_bits(opc);
- if (a_bits) {
- tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
- }
- h.base = guest_base ? TCG_GUEST_BASE_REG : 0;
- h.index = addrlo;
- if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
- tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
- h.index = TCG_REG_TMP1;
- }
-#endif
-
- if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
+ if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
if (opc & MO_BSWAP) {
tcg_out32(s, ADDI | TAI(TCG_REG_R0, h.index, 4));
tcg_out32(s, STWBRX | SAB(datalo, h.base, h.index));
@@ -2418,10 +2379,12 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
}
}
-#ifdef CONFIG_SOFTMMU
- add_qemu_ldst_label(s, false, data_type, oi, datalo, datahi,
- addrlo, addrhi, s->code_ptr, label_ptr);
-#endif
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = datalo;
+ ldst->datahi_reg = datahi;
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
+ }
}
static void tcg_out_nop_fill(tcg_insn_unit *p, int count)