diff mbox

[Richard,Sandiford] Add an array_mode_supported_p target hook

Message ID g4y63vl8i9.fsf@linaro.org
State Accepted
Headers show

Commit Message

Richard Sandiford March 31, 2011, 1:33 p.m. UTC
This patch adds an array_mode_supported_p hook, which says whether
MAX_FIXED_MODE_SIZE should be ignored for a given type of array.
It follows on from the discussion here:

    http://gcc.gnu.org/ml/gcc/2011-03/msg00342.html

The intended use of the hook is to allow small arrays of vectors
to have a non-BLK mode, and hence to be stored in rtl registers.
These arrays are used both in the ARM arm_neon.h API and in the
optabs proposed in:

    http://gcc.gnu.org/ml/gcc/2011-03/msg00322.html

The tail end of the thread was about the definition of TYPE_MODE:

#define TYPE_MODE(NODE) \
  (TREE_CODE (TYPE_CHECK (NODE)) == VECTOR_TYPE \
   ? vector_type_mode (NODE) : (NODE)->type.mode)

with this outcome:

    http://gcc.gnu.org/ml/gcc/2011-03/msg00470.html

To summarise my take on it:

- The current definition of TYPE_MODE isn't sufficient even for vector
  modes and vector_mode_supported_p, because non-vector types can have
  vector modes.

- We should no longer treat types as having one mode everywhere.
  We should instead replace TYPE_MODE with a function that takes
  a context.  Tests of things like vector_mode_supported_p would
  move from layout_type to this new function.

I think this patch fits within that scheme.  array_mode_supported_p
would be treated in the same way as vector_mode_supported_p.

I realise the ideal would be to get rid of TYPE_MODE first.
But that's going to be a longer-term thing.  Now that there's
at least a plan, I'd like to press ahead with the array stuff
on the basis that

(a) although the new hook won't work with the "target" attribute,
    our current mode handling doesn't work in just the same way.

(b) the new hook doesn't interfere with the plan.

(c) getting good code from the intrinsics (and support for these
    instructions in the vectoriser) is going to be much more important
    to most ARM users than the ability to turn Neon on and off for
    individual functions in a TU.

To give an example of the difference, the Neon code posted here:

    http://hilbert-space.de/?p=22

produces this inner loop before the patch (but with
http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01996.html applied):

.L3:
	vld3.8	{d16-d18}, [r1]!
	vstmia	ip, {d16-d18}
	fldd	d19, [sp, #24]
	adr	r5, .L6
	ldmia	r5, {r4-r5}
	fldd	d16, [sp, #32]
	vmov	d18, r4, r5  @ v8qi
	vmull.u8	q9, d19, d18
	adr	r5, .L6+8
	ldmia	r5, {r4-r5}
	vmov	d17, r4, r5  @ v8qi
	vstmia	sp, {d18-d19}
	vmlal.u8	q9, d16, d17
	fldd	d16, [sp, #40]
	adr	r5, .L6+16
	ldmia	r5, {r4-r5}
	vmov	d17, r4, r5  @ v8qi
	vmlal.u8	q9, d16, d17
	add	r3, r3, #1
	vshrn.i16	d16, q9, #8
	cmp	r3, r2
	vst1.8	{d16}, [r0]!
	bne	.L3

With both patches applied, the inner loop is:

.L3:
	vld3.8	{d18-d20}, [r1]!
	vmull.u8	q8, d18, d21
	vmlal.u8	q8, d19, d22
	vmlal.u8	q8, d20, d23
	add	r3, r3, #1
	vshrn.i16	d16, q8, #8
	cmp	r3, r2
	vst1.8	{d16}, [r0]!
	bne	.L3

Tested on arm-linux-gnueabi.  OK to install?

Richard


gcc/
	* hooks.h (hook_bool_mode_uhwi_false): Declare.
	* hooks.c (hook_bool_mode_uhwi_false): New function.
	* target.def (array_mode_supported_p): New hook.
	* doc/tm.texi.in (TARGET_ARRAY_MODE_SUPPORTED_P): Add @hook.
	* doc/tm.texi: Regenerate.
	* stor-layout.c (mode_for_array): New function.
	(layout_type): Use it.
	* config/arm/arm.c (arm_array_mode_supported_p): New function.
	(TARGET_ARRAY_MODE_SUPPORTED_P): Define.
diff mbox

Patch

Index: gcc/hooks.h
===================================================================
--- gcc/hooks.h	2011-03-31 10:57:26.000000000 +0100
+++ gcc/hooks.h	2011-03-31 14:18:21.000000000 +0100
@@ -34,6 +34,8 @@  extern bool hook_bool_mode_false (enum m
 extern bool hook_bool_mode_true (enum machine_mode);
 extern bool hook_bool_mode_const_rtx_false (enum machine_mode, const_rtx);
 extern bool hook_bool_mode_const_rtx_true (enum machine_mode, const_rtx);
+extern bool hook_bool_mode_uhwi_false (enum machine_mode,
+				       unsigned HOST_WIDE_INT);
 extern bool hook_bool_tree_false (tree);
 extern bool hook_bool_const_tree_false (const_tree);
 extern bool hook_bool_tree_true (tree);
Index: gcc/hooks.c
===================================================================
--- gcc/hooks.c	2011-03-31 10:57:26.000000000 +0100
+++ gcc/hooks.c	2011-03-31 14:18:21.000000000 +0100
@@ -101,6 +101,15 @@  hook_bool_mode_const_rtx_true (enum mach
   return true;
 }
 
+/* Generic hook that takes (enum machine_mode, unsigned HOST_WIDE_INT)
+   and returns false.  */
+bool
+hook_bool_mode_uhwi_false (enum machine_mode mode ATTRIBUTE_UNUSED,
+			   unsigned HOST_WIDE_INT value ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
 /* Generic hook that takes (FILE *, const char *) and does nothing.  */
 void
 hook_void_FILEptr_constcharptr (FILE *a ATTRIBUTE_UNUSED, const char *b ATTRIBUTE_UNUSED)
Index: gcc/target.def
===================================================================
--- gcc/target.def	2011-03-31 10:57:26.000000000 +0100
+++ gcc/target.def	2011-03-31 14:18:41.000000000 +0100
@@ -1611,6 +1611,38 @@  DEFHOOK
  bool, (enum machine_mode mode),
  hook_bool_mode_false)
 
+/* True if we should try to use a scalar mode to represent an array,
+   overriding the usual MAX_FIXED_MODE limit.  */
+DEFHOOK
+(array_mode_supported_p,
+ "Return true if GCC should try to use a scalar mode to store an array\n\
+of @var{nelems} elements, given that each element has mode @var{mode}.\n\
+Returning true here overrides the usual @code{MAX_FIXED_MODE} limit\n\
+and allows GCC to use any defined integer mode.\n\
+\n\
+One use of this hook is to support vector load and store operations\n\
+that operate on several homogeneous vectors.  For example, ARM Neon\n\
+has operations like:\n\
+\n\
+@smallexample\n\
+int8x8x3_t vld3_s8 (const int8_t *)\n\
+@end smallexample\n\
+\n\
+where the return type is defined as:\n\
+\n\
+@smallexample\n\
+typedef struct int8x8x3_t\n\
+@{\n\
+  int8x8_t val[3];\n\
+@} int8x8x3_t;\n\
+@end smallexample\n\
+\n\
+If this hook allows @code{val} to have a scalar mode, then\n\
+@code{int8x8x3_t} can have the same mode.  GCC can then store\n\
+@code{int8x8x3_t}s in registers rather than forcing them onto the stack.",
+ bool, (enum machine_mode mode, unsigned HOST_WIDE_INT nelems),
+ hook_bool_mode_uhwi_false)
+
 /* Compute cost of moving data from a register of class FROM to one of
    TO, using MODE.  */
 DEFHOOK
Index: gcc/doc/tm.texi.in
===================================================================
--- gcc/doc/tm.texi.in	2011-03-29 10:32:08.000000000 +0100
+++ gcc/doc/tm.texi.in	2011-03-31 14:27:42.000000000 +0100
@@ -4271,6 +4271,8 @@  insns involving vector mode @var{mode}. 
 must have move patterns for this mode.
 @end deftypefn
 
+@hook TARGET_ARRAY_MODE_SUPPORTED_P
+
 @hook TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
 Define this to return nonzero for machine modes for which the port has
 small register classes.  If this target hook returns nonzero for a given
Index: gcc/stor-layout.c
===================================================================
--- gcc/stor-layout.c	2011-03-31 10:57:26.000000000 +0100
+++ gcc/stor-layout.c	2011-03-31 14:22:23.000000000 +0100
@@ -546,6 +546,34 @@  get_mode_alignment (enum machine_mode mo
   return MIN (BIGGEST_ALIGNMENT, MAX (1, mode_base_align[mode]*BITS_PER_UNIT));
 }
 
+/* Return the natural mode of an array, given that it is SIZE bytes in
+   total and has elements of type ELEM_TYPE.  */
+
+static enum machine_mode
+mode_for_array (tree elem_type, tree size)
+{
+  tree elem_size;
+  unsigned HOST_WIDE_INT int_size, int_elem_size;
+  bool limit_p;
+
+  /* One-element arrays get the component type's mode.  */
+  elem_size = TYPE_SIZE (elem_type);
+  if (simple_cst_equal (size, elem_size))
+    return TYPE_MODE (elem_type);
+
+  limit_p = true;
+  if (host_integerp (size, 1) && host_integerp (elem_size, 1))
+    {
+      int_size = tree_low_cst (size, 1);
+      int_elem_size = tree_low_cst (elem_size, 1);
+      if (int_elem_size > 0
+	  && int_size % int_elem_size == 0
+	  && targetm.array_mode_supported_p (TYPE_MODE (elem_type),
+					     int_size / int_elem_size))
+	limit_p = false;
+    }
+  return mode_for_size_tree (size, MODE_INT, limit_p);
+}
 
 /* Subroutine of layout_decl: Force alignment required for the data type.
    But if the decl itself wants greater alignment, don't override that.  */
@@ -2039,14 +2067,8 @@  layout_type (tree type)
 	    && (TYPE_MODE (TREE_TYPE (type)) != BLKmode
 		|| TYPE_NO_FORCE_BLK (TREE_TYPE (type))))
 	  {
-	    /* One-element arrays get the component type's mode.  */
-	    if (simple_cst_equal (TYPE_SIZE (type),
-				  TYPE_SIZE (TREE_TYPE (type))))
-	      SET_TYPE_MODE (type, TYPE_MODE (TREE_TYPE (type)));
-	    else
-	      SET_TYPE_MODE (type, mode_for_size_tree (TYPE_SIZE (type),
-						       MODE_INT, 1));
-
+	    SET_TYPE_MODE (type, mode_for_array (TREE_TYPE (type),
+						 TYPE_SIZE (type)));
 	    if (TYPE_MODE (type) != BLKmode
 		&& STRICT_ALIGNMENT && TYPE_ALIGN (type) < BIGGEST_ALIGNMENT
 		&& TYPE_ALIGN (type) < GET_MODE_ALIGNMENT (TYPE_MODE (type)))
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c	2011-03-31 14:10:12.000000000 +0100
+++ gcc/config/arm/arm.c	2011-03-31 14:18:21.000000000 +0100
@@ -243,6 +243,8 @@  static rtx arm_pic_static_addr (rtx orig
 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
+static bool arm_array_mode_supported_p (enum machine_mode,
+					unsigned HOST_WIDE_INT);
 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
 static bool arm_class_likely_spilled_p (reg_class_t);
 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
@@ -403,6 +405,8 @@  #define TARGET_ADDRESS_COST arm_address_
 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
+#undef TARGET_ARRAY_MODE_SUPPORTED_P
+#define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
@@ -22377,6 +22381,20 @@  arm_vector_mode_supported_p (enum machin
   return false;
 }
 
+/* Implements target hook array_mode_supported_p.  */
+
+static bool
+arm_array_mode_supported_p (enum machine_mode mode,
+			    unsigned HOST_WIDE_INT nelems)
+{
+  if (TARGET_NEON
+      && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
+      && (nelems >= 2 && nelems <= 4))
+    return true;
+
+  return false;
+}
+
 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
    registers when autovectorizing for Neon, at least until multiple vector
    widths are supported properly by the middle-end.  */