@@ -4221,9 +4221,9 @@ typedef struct {
* final element on each page. Identify any single element that spans
* the page boundary. Return true if there are any active elements.
*/
-static bool __attribute__((unused))
-sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr, uint64_t *vg,
- intptr_t reg_max, int esz, int msize)
+static bool sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr,
+ uint64_t *vg, intptr_t reg_max,
+ int esz, int msize)
{
const int esize = 1 << esz;
const uint64_t pg_mask = pred_esz_masks[esz];
@@ -4313,10 +4313,9 @@ sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr, uint64_t *vg,
* Control the generation of page faults with @fault. Return false if
* there is no work to do, which can only happen with @fault == FAULT_NO.
*/
-static bool __attribute__((unused))
-sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault, CPUARMState *env,
- target_ulong addr, MMUAccessType access_type,
- uintptr_t retaddr)
+static bool sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault,
+ CPUARMState *env, target_ulong addr,
+ MMUAccessType access_type, uintptr_t retaddr)
{
int mmu_idx = cpu_mmu_index(env, false);
int mem_off = info->mem_off_first[0];
@@ -4388,109 +4387,116 @@ static inline bool test_host_page(void *host)
/*
* Common helper for all contiguous one-register predicated loads.
*/
-static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr,
- uint32_t desc, const uintptr_t retaddr,
- const int esz, const int msz,
- sve_ldst1_host_fn *host_fn,
- sve_ldst1_tlb_fn *tlb_fn)
+static inline QEMU_ALWAYS_INLINE
+void sve_ld1_r(CPUARMState *env, uint64_t *vg, const target_ulong addr,
+ uint32_t desc, const uintptr_t retaddr,
+ const int esz, const int msz,
+ sve_ldst1_host_fn *host_fn,
+ sve_ldst1_tlb_fn *tlb_fn)
{
- const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
- const int mmu_idx = get_mmuidx(oi);
const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
void *vd = &env->vfp.zregs[rd];
- const int diffsz = esz - msz;
const intptr_t reg_max = simd_oprsz(desc);
- const intptr_t mem_max = reg_max >> diffsz;
- ARMVectorReg scratch;
+ intptr_t reg_off, reg_last, mem_off;
+ SVEContLdSt info;
void *host;
- intptr_t split, reg_off, mem_off;
+ int flags;
- /* Find the first active element. */
- reg_off = find_next_active(vg, 0, reg_max, esz);
- if (unlikely(reg_off == reg_max)) {
+ /* Find the active elements. */
+ if (!sve_cont_ldst_elements(&info, addr, vg, reg_max, esz, 1 << msz)) {
/* The entire predicate was false; no load occurs. */
memset(vd, 0, reg_max);
return;
}
- mem_off = reg_off >> diffsz;
- /*
- * If the (remaining) load is entirely within a single page, then:
- * For softmmu, and the tlb hits, then no faults will occur;
- * For user-only, either the first load will fault or none will.
- * We can thus perform the load directly to the destination and
- * Vd will be unmodified on any exception path.
- */
- split = max_for_page(addr, mem_off, mem_max);
- if (likely(split == mem_max)) {
- host = tlb_vaddr_to_host(env, addr + mem_off, MMU_DATA_LOAD, mmu_idx);
- if (test_host_page(host)) {
- intptr_t i = reg_off;
- host -= mem_off;
- do {
- host_fn(vd, i, host + (i >> diffsz));
- i = find_next_active(vg, i + (1 << esz), reg_max, esz);
- } while (i < reg_max);
- /* After having taken any fault, zero leading inactive elements. */
- swap_memzero(vd, reg_off);
- return;
- }
- }
+ /* Probe the page(s). Exit with exception for any invalid page. */
+ sve_cont_ldst_pages(&info, FAULT_ALL, env, addr, MMU_DATA_LOAD, retaddr);
- /*
- * Perform the predicated read into a temporary, thus ensuring
- * if the load of the last element faults, Vd is not modified.
- */
+ flags = info.page[0].flags | info.page[1].flags;
+ if (unlikely(flags != 0)) {
#ifdef CONFIG_USER_ONLY
- swap_memzero(&scratch, reg_off);
- host = g2h(addr);
- do {
- host_fn(&scratch, reg_off, host + (reg_off >> diffsz));
- reg_off += 1 << esz;
- reg_off = find_next_active(vg, reg_off, reg_max, esz);
- } while (reg_off < reg_max);
+ g_assert_not_reached();
#else
- memset(&scratch, 0, reg_max);
- goto start;
- while (1) {
- reg_off = find_next_active(vg, reg_off, reg_max, esz);
- if (reg_off >= reg_max) {
- break;
- }
- mem_off = reg_off >> diffsz;
- split = max_for_page(addr, mem_off, mem_max);
+ /*
+ * At least one page includes MMIO (or watchpoints).
+ * Any bus operation can fail with cpu_transaction_failed,
+ * which for ARM will raise SyncExternal. Perform the load
+ * into scratch memory to preserve register state until the end.
+ */
+ ARMVectorReg scratch;
- start:
- if (split - mem_off >= (1 << msz)) {
- /* At least one whole element on this page. */
- host = tlb_vaddr_to_host(env, addr + mem_off,
- MMU_DATA_LOAD, mmu_idx);
- if (host) {
- host -= mem_off;
- do {
- host_fn(&scratch, reg_off, host + mem_off);
- reg_off += 1 << esz;
- reg_off = find_next_active(vg, reg_off, reg_max, esz);
- mem_off = reg_off >> diffsz;
- } while (split - mem_off >= (1 << msz));
- continue;
+ memset(&scratch, 0, reg_max);
+ mem_off = info.mem_off_first[0];
+ reg_off = info.reg_off_first[0];
+ reg_last = info.reg_off_last[1];
+ if (reg_last < 0) {
+ reg_last = info.reg_off_split;
+ if (reg_last < 0) {
+ reg_last = info.reg_off_last[0];
}
}
- /*
- * Perform one normal read. This may fault, longjmping out to the
- * main loop in order to raise an exception. It may succeed, and
- * as a side-effect load the TLB entry for the next round. Finally,
- * in the extremely unlikely case we're performing this operation
- * on I/O memory, it may succeed but not bring in the TLB entry.
- * But even then we have still made forward progress.
- */
- tlb_fn(env, &scratch, reg_off, addr + mem_off, retaddr);
- reg_off += 1 << esz;
- }
-#endif
+ do {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ tlb_fn(env, &scratch, reg_off, addr + mem_off, retaddr);
+ }
+ reg_off += 1 << esz;
+ mem_off += 1 << msz;
+ } while (reg_off & 63);
+ } while (reg_off <= reg_last);
- memcpy(vd, &scratch, reg_max);
+ memcpy(vd, &scratch, reg_max);
+ return;
+#endif
+ }
+
+ /* The entire operation is in RAM, on valid pages. */
+
+ memset(vd, 0, reg_max);
+ mem_off = info.mem_off_first[0];
+ reg_off = info.reg_off_first[0];
+ reg_last = info.reg_off_last[0];
+ host = info.page[0].host;
+
+ while (reg_off <= reg_last) {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ host_fn(vd, reg_off, host + mem_off);
+ }
+ reg_off += 1 << esz;
+ mem_off += 1 << msz;
+ } while (reg_off <= reg_last && (reg_off & 63));
+ }
+
+ /*
+ * Use the slow path to manage the cross-page misalignment.
+ * But we know this is RAM and cannot trap.
+ */
+ mem_off = info.mem_off_split;
+ if (unlikely(mem_off >= 0)) {
+ tlb_fn(env, vd, info.reg_off_split, addr + mem_off, retaddr);
+ }
+
+ mem_off = info.mem_off_first[1];
+ if (unlikely(mem_off >= 0)) {
+ reg_off = info.reg_off_first[1];
+ reg_last = info.reg_off_last[1];
+ host = info.page[1].host;
+
+ do {
+ uint64_t pg = vg[reg_off >> 6];
+ do {
+ if ((pg >> (reg_off & 63)) & 1) {
+ host_fn(vd, reg_off, host + mem_off);
+ }
+ reg_off += 1 << esz;
+ mem_off += 1 << msz;
+ } while (reg_off & 63);
+ } while (reg_off <= reg_last);
+ }
}
#define DO_LD1_1(NAME, ESZ) \