@@ -93,25 +93,6 @@ void HELPER(msr_i_daifclear)(CPUARMState *env, uint32_t imm)
arm_rebuild_hflags(env);
}
-/* 64bit/double versions of the neon float compare functions */
-uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
-{
- float_status *fpst = fpstp;
- return -float64_eq_quiet(a, b, fpst);
-}
-
-uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp)
-{
- float_status *fpst = fpstp;
- return -float64_le(b, a, fpst);
-}
-
-uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
-{
- float_status *fpst = fpstp;
- return -float64_lt(b, a, fpst);
-}
-
/* Reciprocal step and sqrt step. Note that unlike the A32/T32
* versions, these do a fully fused multiply-add or
* multiply-add-and-halve.
@@ -207,67 +188,6 @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
}
-/* Pairwise long add: add pairs of adjacent elements into
- * double-width elements in the result (eg _s8 is an 8x8->16 op)
- */
-uint64_t HELPER(neon_addlp_s8)(uint64_t a)
-{
- uint64_t nsignmask = 0x0080008000800080ULL;
- uint64_t wsignmask = 0x8000800080008000ULL;
- uint64_t elementmask = 0x00ff00ff00ff00ffULL;
- uint64_t tmp1, tmp2;
- uint64_t res, signres;
-
- /* Extract odd elements, sign extend each to a 16 bit field */
- tmp1 = a & elementmask;
- tmp1 ^= nsignmask;
- tmp1 |= wsignmask;
- tmp1 = (tmp1 - nsignmask) ^ wsignmask;
- /* Ditto for the even elements */
- tmp2 = (a >> 8) & elementmask;
- tmp2 ^= nsignmask;
- tmp2 |= wsignmask;
- tmp2 = (tmp2 - nsignmask) ^ wsignmask;
-
- /* calculate the result by summing bits 0..14, 16..22, etc,
- * and then adjusting the sign bits 15, 23, etc manually.
- * This ensures the addition can't overflow the 16 bit field.
- */
- signres = (tmp1 ^ tmp2) & wsignmask;
- res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
- res ^= signres;
-
- return res;
-}
-
-uint64_t HELPER(neon_addlp_u8)(uint64_t a)
-{
- uint64_t tmp;
-
- tmp = a & 0x00ff00ff00ff00ffULL;
- tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
- return tmp;
-}
-
-uint64_t HELPER(neon_addlp_s16)(uint64_t a)
-{
- int32_t reslo, reshi;
-
- reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
- reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
-
- return (uint32_t)reslo | (((uint64_t)reshi) << 32);
-}
-
-uint64_t HELPER(neon_addlp_u16)(uint64_t a)
-{
- uint64_t tmp;
-
- tmp = a & 0x0000ffff0000ffffULL;
- tmp += (a >> 16) & 0x0000ffff0000ffffULL;
- return tmp;
-}
-
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
{
@@ -1738,3 +1738,109 @@ void HELPER(neon_zip16)(void *vd, void *vm)
rm[0] = m0;
rd[0] = d0;
}
+
+uint64_t HELPER(neon_tbl)(CPUARMState *env, uint32_t desc,
+ uint64_t ireg, uint64_t def)
+{
+ uint64_t tmp, val = 0;
+ uint32_t maxindex = ((desc & 3) + 1) * 8;
+ uint32_t base_reg = desc >> 2;
+ uint32_t shift, index, reg;
+
+ for (shift = 0; shift < 64; shift += 8) {
+ index = (ireg >> shift) & 0xff;
+ if (index < maxindex) {
+ reg = base_reg + (index >> 3);
+ tmp = *aa32_vfp_dreg(env, reg);
+ tmp = ((tmp >> ((index & 7) << 3)) & 0xff) << shift;
+ } else {
+ tmp = def & (0xffull << shift);
+ }
+ val |= tmp;
+ }
+ return val;
+}
+
+#ifdef TARGET_AARCH64
+
+/* Pairwise long add: add pairs of adjacent elements into
+ * double-width elements in the result (eg _s8 is an 8x8->16 op)
+ */
+uint64_t HELPER(neon_addlp_s8)(uint64_t a)
+{
+ uint64_t nsignmask = 0x0080008000800080ULL;
+ uint64_t wsignmask = 0x8000800080008000ULL;
+ uint64_t elementmask = 0x00ff00ff00ff00ffULL;
+ uint64_t tmp1, tmp2;
+ uint64_t res, signres;
+
+ /* Extract odd elements, sign extend each to a 16 bit field */
+ tmp1 = a & elementmask;
+ tmp1 ^= nsignmask;
+ tmp1 |= wsignmask;
+ tmp1 = (tmp1 - nsignmask) ^ wsignmask;
+ /* Ditto for the even elements */
+ tmp2 = (a >> 8) & elementmask;
+ tmp2 ^= nsignmask;
+ tmp2 |= wsignmask;
+ tmp2 = (tmp2 - nsignmask) ^ wsignmask;
+
+ /* calculate the result by summing bits 0..14, 16..22, etc,
+ * and then adjusting the sign bits 15, 23, etc manually.
+ * This ensures the addition can't overflow the 16 bit field.
+ */
+ signres = (tmp1 ^ tmp2) & wsignmask;
+ res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
+ res ^= signres;
+
+ return res;
+}
+
+uint64_t HELPER(neon_addlp_u8)(uint64_t a)
+{
+ uint64_t tmp;
+
+ tmp = a & 0x00ff00ff00ff00ffULL;
+ tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
+ return tmp;
+}
+
+uint64_t HELPER(neon_addlp_s16)(uint64_t a)
+{
+ int32_t reslo, reshi;
+
+ reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
+ reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
+
+ return (uint32_t)reslo | (((uint64_t)reshi) << 32);
+}
+
+uint64_t HELPER(neon_addlp_u16)(uint64_t a)
+{
+ uint64_t tmp;
+
+ tmp = a & 0x0000ffff0000ffffULL;
+ tmp += (a >> 16) & 0x0000ffff0000ffffULL;
+ return tmp;
+}
+
+/* 64bit/double versions of the neon float compare functions */
+uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ return -float64_eq_quiet(a, b, fpst);
+}
+
+uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ return -float64_le(b, a, fpst);
+}
+
+uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
+{
+ float_status *fpst = fpstp;
+ return -float64_lt(b, a, fpst);
+}
+
+#endif /* TARGET_AARCH64 */
@@ -82,28 +82,6 @@ void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome,
raise_exception(env, excp, syndrome, target_el);
}
-uint64_t HELPER(neon_tbl)(CPUARMState *env, uint32_t desc,
- uint64_t ireg, uint64_t def)
-{
- uint64_t tmp, val = 0;
- uint32_t maxindex = ((desc & 3) + 1) * 8;
- uint32_t base_reg = desc >> 2;
- uint32_t shift, index, reg;
-
- for (shift = 0; shift < 64; shift += 8) {
- index = (ireg >> shift) & 0xff;
- if (index < maxindex) {
- reg = base_reg + (index >> 3);
- tmp = *aa32_vfp_dreg(env, reg);
- tmp = ((tmp >> ((index & 7) << 3)) & 0xff) << shift;
- } else {
- tmp = def & (0xffull << shift);
- }
- val |= tmp;
- }
- return val;
-}
-
void HELPER(v8m_stackcheck)(CPUARMState *env, uint32_t newvalue)
{
/*
Move various NEON helpers to the well named neon_helper.c. Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> --- target/arm/tcg/helper-a64.c | 80 -------------------------- target/arm/tcg/neon_helper.c | 106 +++++++++++++++++++++++++++++++++++ target/arm/tcg/op_helper.c | 22 -------- 3 files changed, 106 insertions(+), 102 deletions(-)