sim/
* MAINTAINTERS (aarch64): Add myself.
sim/aarch64/
* simulator.c (do_vec_MLA): Rewrite switch body.
sim/testsuite/sim/aarch64/
* mla.s: New.
@@ -1,3 +1,7 @@
+2017-02-14 Jim Wilson <jim.wilson@linaro.org>
+
+ * MAINTAINTERS (aarch64): Add myself.
+
2016-12-14 Maciej W. Rozycki <macro@imgtec.com>
* MAINTAINERS (Maintainers for particular sims): Add myself as
@@ -14,6 +14,7 @@ Mike Frysinger vapier@gentoo.org
Maintainers for particular sims:
aarch64 Nick Clifton <nickc@redhat.com>
+aarch64 Jim Wilson <jim.wilson@linaro.org>
arm Nick Clifton <nickc@redhat.com>
bfin Mike Frysinger <vapier@gentoo.org>
cr16 M R Swami Reddy <MR.Swami.Reddy@nsc.com>
@@ -1,5 +1,7 @@
2017-02-14 Jim Wilson <jim.wilson@linaro.org>
+ * simulator.c (do_vec_MLA): Rewrite switch body.
+
* simulator.c (do_vec_bit): Change loop limits from 16 and 8 to 4 and
2. Move test_false if inside loop. Fix logic for computing result
stored to vd.
@@ -3799,63 +3799,30 @@ do_vec_MLA (sim_cpu *cpu)
switch (INSTR (23, 22))
{
case 0:
- {
- uint16_t a[16], b[16];
-
- for (i = 0; i < (full ? 16 : 8); i++)
- {
- a[i] = aarch64_get_vec_u8 (cpu, vn, i);
- b[i] = aarch64_get_vec_u8 (cpu, vm, i);
- }
-
- for (i = 0; i < (full ? 16 : 8); i++)
- {
- uint16_t v = aarch64_get_vec_u8 (cpu, vd, i);
-
- aarch64_set_vec_u16 (cpu, vd, i, v + (a[i] * b[i]));
- }
- }
+ for (i = 0; i < (full ? 16 : 8); i++)
+ aarch64_set_vec_u8 (cpu, vd, i,
+ aarch64_get_vec_u8 (cpu, vd, i)
+ + (aarch64_get_vec_u8 (cpu, vn, i)
+ * aarch64_get_vec_u8 (cpu, vm, i)));
return;
case 1:
- {
- uint32_t a[8], b[8];
-
- for (i = 0; i < (full ? 8 : 4); i++)
- {
- a[i] = aarch64_get_vec_u16 (cpu, vn, i);
- b[i] = aarch64_get_vec_u16 (cpu, vm, i);
- }
-
- for (i = 0; i < (full ? 8 : 4); i++)
- {
- uint32_t v = aarch64_get_vec_u16 (cpu, vd, i);
-
- aarch64_set_vec_u32 (cpu, vd, i, v + (a[i] * b[i]));
- }
- }
+ for (i = 0; i < (full ? 8 : 4); i++)
+ aarch64_set_vec_u16 (cpu, vd, i,
+ aarch64_get_vec_u16 (cpu, vd, i)
+ + (aarch64_get_vec_u16 (cpu, vn, i)
+ * aarch64_get_vec_u16 (cpu, vm, i)));
return;
case 2:
- {
- uint64_t a[4], b[4];
-
- for (i = 0; i < (full ? 4 : 2); i++)
- {
- a[i] = aarch64_get_vec_u32 (cpu, vn, i);
- b[i] = aarch64_get_vec_u32 (cpu, vm, i);
- }
-
- for (i = 0; i < (full ? 4 : 2); i++)
- {
- uint64_t v = aarch64_get_vec_u32 (cpu, vd, i);
-
- aarch64_set_vec_u64 (cpu, vd, i, v + (a[i] * b[i]));
- }
- }
+ for (i = 0; i < (full ? 4 : 2); i++)
+ aarch64_set_vec_u32 (cpu, vd, i,
+ aarch64_get_vec_u32 (cpu, vd, i)
+ + (aarch64_get_vec_u32 (cpu, vn, i)
+ * aarch64_get_vec_u32 (cpu, vm, i)));
return;
- case 3:
+ default:
HALT_UNALLOC;
}
}
@@ -1,5 +1,7 @@
2017-02-14 Jim Wilson <jim.wilson@linaro.org>
+ * mla.s: New.
+
* bit.s: New.
* ldn_single.s: New.
new file mode 100644
@@ -0,0 +1,103 @@
+# mach: aarch64
+
+# Check the vector multiply add instruction: mla.
+
+.include "testutils.inc"
+
+input:
+ .word 0x04030201
+ .word 0x08070605
+ .word 0x0c0b0a09
+ .word 0x100f0e0d
+m8b:
+ .word 0x110a0502
+ .word 0x4132251a
+m16b:
+ .word 0x110a0502
+ .word 0x4132251a
+ .word 0x917a6552
+ .word 0x01e2c5aa
+m4h:
+ .word 0x180a0402
+ .word 0x70323c1a
+m8h:
+ .word 0x180a0402
+ .word 0x70323c1a
+ .word 0x087ab452
+ .word 0xe0e26caa
+m2s:
+ .word 0x140a0402
+ .word 0xa46a3c1a
+m4s:
+ .word 0x140a0402
+ .word 0xa46a3c1a
+ .word 0xb52ab452
+ .word 0x464b6caa
+
+ start
+ adrp x0, input
+ ldr q0, [x0, #:lo12:input]
+
+ movi v1.8b, #1
+ mla v1.8b, v0.8b, v0.8b
+ mov x1, v1.d[0]
+ adrp x3, m8b
+ ldr x4, [x3, #:lo12:m8b]
+ cmp x1, x4
+ bne .Lfailure
+
+ movi v1.16b, #1
+ mla v1.16b, v0.16b, v0.16b
+ mov x1, v1.d[0]
+ mov x2, v1.d[1]
+ adrp x3, m16b
+ ldr x4, [x3, #:lo12:m16b]
+ cmp x1, x4
+ bne .Lfailure
+ ldr x5, [x3, #:lo12:m16b+8]
+ cmp x2, x5
+ bne .Lfailure
+
+ movi v1.4h, #1
+ mla v1.4h, v0.4h, v0.4h
+ mov x1, v1.d[0]
+ adrp x3, m4h
+ ldr x4, [x3, #:lo12:m4h]
+ cmp x1, x4
+ bne .Lfailure
+
+ movi v1.8h, #1
+ mla v1.8h, v0.8h, v0.8h
+ mov x1, v1.d[0]
+ mov x2, v1.d[1]
+ adrp x3, m8h
+ ldr x4, [x3, #:lo12:m8h]
+ cmp x1, x4
+ bne .Lfailure
+ ldr x5, [x3, #:lo12:m8h+8]
+ cmp x2, x5
+ bne .Lfailure
+
+ movi v1.2s, #1
+ mla v1.2s, v0.2s, v0.2s
+ mov x1, v1.d[0]
+ adrp x3, m2s
+ ldr x4, [x3, #:lo12:m2s]
+ cmp x1, x4
+ bne .Lfailure
+
+ movi v1.4s, #1
+ mla v1.4s, v0.4s, v0.4s
+ mov x1, v1.d[0]
+ mov x2, v1.d[1]
+ adrp x3, m4s
+ ldr x4, [x3, #:lo12:m4s]
+ cmp x1, x4
+ bne .Lfailure
+ ldr x5, [x3, #:lo12:m4s+8]
+ cmp x2, x5
+ bne .Lfailure
+
+ pass
+.Lfailure:
+ fail