@@ -1174,6 +1174,57 @@
;; Widening operations
+(define_expand "widen_ssum<mode>3"
+ [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
+ (plus:<V_double_width> (sign_extend:<V_double_width>
(match_operand:VQI 1 "s_register_operand" ""))
+ (match_operand:<V_double_width> 2
"s_register_operand" "")))]
+ "TARGET_NEON"
+ {
+ int i;
+ int half_elem = <V_mode_nunits>/2;
+ rtvec v1 = rtvec_alloc (half_elem);
+ rtvec v2 = rtvec_alloc (half_elem);
+ rtx p1, p2;
+
+ for (i = 0; i < half_elem; i++)
+ RTVEC_ELT (v1, i) = GEN_INT (i);
+ p1 = gen_rtx_PARALLEL (GET_MODE (operands[1]), v1);
+
+ for (i = half_elem; i < <V_mode_nunits>; i++)
+ RTVEC_ELT (v2, i - half_elem) = GEN_INT (i);
+ p2 = gen_rtx_PARALLEL (GET_MODE (operands[1]), v2);
+
+ if (operands[0] != operands[2])
+ emit_move_insn (operands[0], operands[2]);
+
+ emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
operands[1], p1, operands[0]));
+ emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
operands[1], p2, operands[0]));
+ DONE;
+ }
+)
+
+(define_insn "vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3"
+ [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
+ (plus:<VW:V_widen> (sign_extend:<VW:V_widen> (vec_select:VW
(match_operand:VQI 1 "s_register_operand" "%w")
+ (match_operand:VQI 2 "vect_par_constant_low"
"")))
+ (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
+ "TARGET_NEON"
+ "vaddw.<V_s_elem>\t%q0, %q3, %e1"
+ [(set_attr "type" "neon_add_widen")
+ (set_attr "length" "8")]
+)
+
+(define_insn "vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3"
+ [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
+ (plus:<VW:V_widen> (sign_extend:<VW:V_widen> (vec_select:VW
(match_operand:VQI 1 "s_register_operand" "%w")
+ (match_operand:VQI 2
"vect_par_constant_high" "")))
+ (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
+ "TARGET_NEON"
+ "vaddw.<V_s_elem>\t%q0, %q3, %f1"
+ [(set_attr "type" "neon_add_widen")
+ (set_attr "length" "8")]
+)
+
(define_insn "widen_ssum<mode>3"
[(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
(plus:<V_widen> (sign_extend:<V_widen>
@@ -1184,6 +1235,57 @@
[(set_attr "type" "neon_add_widen")]
)
+(define_expand "widen_usum<mode>3"
+ [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
+ (plus:<V_double_width> (zero_extend:<V_double_width>
(match_operand:VQI 1 "s_register_operand" ""))
+ (match_operand:<V_double_width> 2
"s_register_operand" "")))]
+ "TARGET_NEON"
+ {
+ int i;
+ int half_elem = <V_mode_nunits>/2;
+ rtvec v1 = rtvec_alloc (half_elem);
+ rtvec v2 = rtvec_alloc (half_elem);
+ rtx p1, p2;
+
+ for (i = 0; i < half_elem; i++)
+ RTVEC_ELT (v1, i) = GEN_INT (i);
+ p1 = gen_rtx_PARALLEL (GET_MODE (operands[1]), v1);
+
+ for (i = half_elem; i < <V_mode_nunits>; i++)
+ RTVEC_ELT (v2, i - half_elem) = GEN_INT (i);
+ p2 = gen_rtx_PARALLEL (GET_MODE (operands[1]), v2);
+
+ if (operands[0] != operands[2])
+ emit_move_insn (operands[0], operands[2]);
+
+ emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
operands[1], p1, operands[0]));
+ emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
operands[1], p2, operands[0]));
+ DONE;
+ }
+)
+
+(define_insn "vec_sel_widen_usum_lo<VQI:mode><VW:mode>3"
+ [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
+ (plus:<VW:V_widen> (zero_extend:<VW:V_widen> (vec_select:VW
(match_operand:VQI 1 "s_register_operand" "%w")
+ (match_operand:VQI 2 "vect_par_constant_low"
"")))
+ (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
+ "TARGET_NEON"
+ "vaddw.<V_u_elem>\t%q0, %q3, %e1"
+ [(set_attr "type" "neon_add_widen")
+ (set_attr "length" "8")]
+)
+
+(define_insn "vec_sel_widen_usum_hi<VQI:mode><VW:mode>3"
+ [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
+ (plus:<VW:V_widen> (zero_extend:<VW:V_widen> (vec_select:VW
(match_operand:VQI 1 "s_register_operand" "%w")
+ (match_operand:VQI 2
"vect_par_constant_high" "")))
+ (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
+ "TARGET_NEON"
+ "vaddw.<V_u_elem>\t%q0, %q3, %f1"
+ [(set_attr "type" "neon_add_widen")
+ (set_attr "length" "8")]
+)
+
@@ -4939,10 +4939,10 @@ is of a wider mode, is computed and added to
operand 3. Operand 3 is of a mode
equal or wider than the mode of the absolute difference. The result is
placed
in operand 0, which is of the same mode as operand 3.
-@cindex @code{ssum_widen@var{m3}} instruction pattern
-@item @samp{ssum_widen@var{m3}}
-@cindex @code{usum_widen@var{m3}} instruction pattern
-@itemx @samp{usum_widen@var{m3}}
+@cindex @code{widen_ssum@var{m3}} instruction pattern
+@item @samp{widen_ssum@var{m3}}
+@cindex @code{widen_usum@var{m3}} instruction pattern
+@itemx @samp{widen_usum@var{m3}}
Operands 0 and 2 are of the same mode, which is wider than the mode of
operand 1. Add operand 1 to operand 2 and place the widened result in
operand 0. (This is used express accumulation of elements into an
accumulator
@@ -399,16 +399,24 @@ read_name (struct md_name *name)
{
int c;
size_t i;
+ int in_angle_bracket;
c = read_skip_spaces ();
i = 0;
+ in_angle_bracket = 0;
while (1)
{
+ if (c == '<')
+ in_angle_bracket = 1;
+
+ if (c == '>')
+ in_angle_bracket = 0;
+
if (c == ' ' || c == '\n' || c == '\t' || c == '\f' || c == '\r'
|| c == EOF)
break;
- if (c == ':' || c == ')' || c == ']' || c == '"' || c == '/'
+ if (((c == ':') and (in_angle_bracket == 0)) || c == ')' || c ==
']' || c == '"' || c == '/'
|| c == '(' || c == '[')
{
unread_char (c);
b/gcc/testsuite/gcc.target/arm/neon-vaddws16.c
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-add-options arm_neon_ok } */
+/* { dg-options "-O3" } */
+
+
+int
+t6(int len, void * dummy, short * __restrict x)
+{
+ len = len & ~31;
+ int result = 0;
+ __asm volatile ("");
+ for (int i = 0; i < len; i++)
+ result += x[i];
+ return result;
+}
+
+/* { dg-final { scan-assembler "vaddw\.s16" } } */
+
+
+
b/gcc/testsuite/gcc.target/arm/neon-vaddws32.c
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-add-options arm_neon_ok } */
+/* { dg-options "-O3" } */
+
+int
+t6(int len, void * dummy, int * __restrict x)
+{
+ len = len & ~31;
+ long long result = 0;
+ __asm volatile ("");
+ for (int i = 0; i < len; i++)
+ result += x[i];
+ return result;
+}
+
+/* { dg-final { scan-assembler "vaddw\.s32" } } */
+
+
b/gcc/testsuite/gcc.target/arm/neon-vaddwu16.c
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-add-options arm_neon_ok } */
+/* { dg-options "-O3" } */
+
+
+int
+t6(int len, void * dummy, unsigned short * __restrict x)
+{
+ len = len & ~31;
+ unsigned int result = 0;
+ __asm volatile ("");
+ for (int i = 0; i < len; i++)
+ result += x[i];
+ return result;
+}
+
+/* { dg-final { scan-assembler "vaddw.u16" } } */
b/gcc/testsuite/gcc.target/arm/neon-vaddwu32.c
new file mode 100644
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-add-options arm_neon_ok } */
+/* { dg-options "-O3" } */
+
+int
+t6(int len, void * dummy, unsigned int * __restrict x)
+{
+ len = len & ~31;
+ unsigned long long result = 0;
+ __asm volatile ("");
+ for (int i = 0; i < len; i++)
+ result += x[i];
+ return result;
+}
+
+/* { dg-final { scan-assembler "vaddw\.u32" } } */
+
b/gcc/testsuite/gcc.target/arm/neon-vaddwu8.c
new file mode 100644
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-add-options arm_neon_ok } */
+/* { dg-options "-O3" } */
+
+
+int
+t6(int len, void * dummy, char * __restrict x)
+{
+ len = len & ~31;
+ unsigned short result = 0;
+ __asm volatile ("");
+ for (int i = 0; i < len; i++)
+ result += x[i];
+ return result;
+}
+
+/* { dg-final { scan-assembler "vaddw\.u8" } } */
+
+
+