@@ -1174,6 +1174,27 @@
;; Widening operations
+(define_insn_and_split "widen_ssum<mode>3"
+ [(set (match_operand:<V_double_width> 0 "s_register_operand" "=&w")
+ (plus:<V_double_width> (unspec:<V_double_width>
+ [(match_operand:VQI 1 "s_register_operand" "w")]
+ UNSPEC_VSIGN_EXTEND)
+ (match_operand:<V_double_width> 2 "s_register_operand"
"0")))]
+ "TARGET_NEON"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx loreg = simplify_gen_subreg (<V_HALF>mode, operands[1],
<MODE>mode, 0);
+ rtx hireg = simplify_gen_subreg (<V_HALF>mode, operands[1],
<MODE>mode, GET_MODE_SIZE (<V_HALF>mode));
+
+ emit_insn (gen_widen_ssum<V_half>3 (operands[0], loreg, operands[2]));
+ emit_insn (gen_widen_ssum<V_half>3 (operands[0], hireg, operands[2]));
+ DONE;
+ }
+ [(set_attr "type" "neon_add_widen")
+ (set_attr "length" "8")])
+
(define_insn "widen_ssum<mode>3"
[(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
(plus:<V_widen> (sign_extend:<V_widen>
@@ -1184,6 +1205,27 @@
[(set_attr "type" "neon_add_widen")]
)
+(define_insn_and_split "widen_usum<mode>3"
+ [(set (match_operand:<V_double_width> 0 "s_register_operand" "=&w")
+ (plus:<V_double_width> (unspec:<V_double_width>
+ [(match_operand:VQI 1 "s_register_operand" "w")]
+ UNSPEC_VZERO_EXTEND)
+ (match_operand:<V_double_width> 2 "s_register_operand"
"0")))]
+ "TARGET_NEON"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx loreg = simplify_gen_subreg (<V_HALF>mode, operands[1],
<MODE>mode, 0);
+ rtx hireg = simplify_gen_subreg (<V_HALF>mode, operands[1],
<MODE>mode, GET_MODE_SIZE (<V_HALF>mode));
+
+ emit_insn (gen_widen_usum<V_half>3 (operands[0], loreg, operands[2]));
+ emit_insn (gen_widen_usum<V_half>3 (operands[0], hireg, operands[2]));
+ DONE;
+ }
+ [(set_attr "type" "neon_add_widen")
+ (set_attr "length" "8")])
+
(define_insn "widen_usum<mode>3"
[(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
(plus:<V_widen> (zero_extend:<V_widen>
@@ -358,5 +358,7 @@
UNSPEC_NVRINTX
UNSPEC_NVRINTA
UNSPEC_NVRINTN
+ UNSPEC_VZERO_EXTEND
+ UNSPEC_VSIGN_EXTEND
])
b/gcc/testsuite/gcc.target/arm/neon-vaddws16.c
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-add-options arm_neon_ok } */
+/* { dg-options "-O3" } */
+
+
+int
+t6(int len, void * dummy, short * __restrict x)
+{
+ len = len & ~31;
+ int result = 0;
+ __asm volatile ("");
+ for (int i = 0; i < len; i++)
+ result += x[i];
+ return result;
+}
+
+/* { dg-final { scan-assembler "vaddw\.s16" } } */
+
+
+
b/gcc/testsuite/gcc.target/arm/neon-vaddws32.c
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-add-options arm_neon_ok } */
+/* { dg-options "-O3" } */
+
+int
+t6(int len, void * dummy, int * __restrict x)
+{
+ len = len & ~31;
+ long long result = 0;
+ __asm volatile ("");
+ for (int i = 0; i < len; i++)
+ result += x[i];
+ return result;
+}
+
+/* { dg-final { scan-assembler "vaddw\.s32" } } */
+
+
b/gcc/testsuite/gcc.target/arm/neon-vaddwu16.c
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-add-options arm_neon_ok } */
+/* { dg-options "-O3" } */
+
+
+int
+t6(int len, void * dummy, unsigned short * __restrict x)
+{
+ len = len & ~31;
+ unsigned int result = 0;
+ __asm volatile ("");
+ for (int i = 0; i < len; i++)
+ result += x[i];
+ return result;
+}
+
+/* { dg-final { scan-assembler "vaddw.u16" } } */
b/gcc/testsuite/gcc.target/arm/neon-vaddwu32.c
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-add-options arm_neon_ok } */
+/* { dg-options "-O3" } */
+
+int
+t6(int len, void * dummy, unsigned int * __restrict x)
+{
+ len = len & ~31;
+ unsigned long long result = 0;
+ __asm volatile ("");
+ for (int i = 0; i < len; i++)
+ result += x[i];
+ return result;
+}
+
+/* { dg-final { scan-assembler "vaddw\.u32" } } */
+
b/gcc/testsuite/gcc.target/arm/neon-vaddwu8.c
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-add-options arm_neon_ok } */
+/* { dg-options "-O3" } */
+
+
+int
+t6(int len, void * dummy, char * __restrict x)
+{
+ len = len & ~31;
+ unsigned short result = 0;
+ __asm volatile ("");
+ for (int i = 0; i < len; i++)
+ result += x[i];
+ return result;
+}
+
+/* { dg-final { scan-assembler "vaddw\.u8" } } */
+
+
+