From fa14ca29817f3247417a8bf9e70cc8312f4c5edf Mon Sep 17 00:00:00 2001
From: Charles Baylis <charles.baylis@linaro.org>
Date: Thu, 4 Sep 2014 14:59:23 +0100
Subject: [PATCH 1/2] [AARCH64,NEON] Add patterns + builtins for
vld[234](q?)_lane_* intrinsics
This patch adds new patterns and builtins to represent single lane structure
loads instructions, which will be used to implement the vld[234](q?)_lane_*
intrinsics.
Tested (with the rest of the patch series) with make check on aarch64-oe-linux
with qemu, and also causes no regressions in clyon's NEON intrinsics tests.
<DATE> Charles Baylis <charles.baylis@linaro.org>
* config/aarch64/aarch64-builtins.c
(aarch64_types_loadstruct_lane_qualifiers): Define.
* config/aarch64/aarch64-simd-builtins.def (ld2_lane, ld3_lane,
ld4_lane): New builtins.
* config/aarch64/aarch64-simd.md (aarch64_vec_load_lanesoi_lane<mode>):
New pattern.
(aarch64_vec_load_lanesci_lane<mode>): Likewise.
(aarch64_vec_load_lanesxi_lane<mode>): Likewise.
(aarch64_ld2_lane<mode>): New expand.
(aarch64_ld3_lane<mode>): Likewise.
(aarch64_ld4_lane<mode>): Likewise.
* config/aarch64/aarch64.md (define_c_enum "unspec"): Add
UNSPEC_LD2_LANE, UNSPEC_LD3_LANE, UNSPEC_LD4_LANE.
Change-Id: I4c36d18072215133573e07483cfe12165201c339
---
gcc/config/aarch64/aarch64-builtins.c | 5 ++
gcc/config/aarch64/aarch64-simd-builtins.def | 4 ++
gcc/config/aarch64/aarch64-simd.md | 95 ++++++++++++++++++++++++++++
gcc/config/aarch64/aarch64.md | 3 +
4 files changed, 107 insertions(+)
@@ -201,6 +201,11 @@ aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_const_pointer_map_mode };
#define TYPES_LOAD1 (aarch64_types_load1_qualifiers)
#define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_loadstruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_none, qualifier_const_pointer_map_mode,
+ qualifier_none, qualifier_none };
+#define TYPES_LOADSTRUCT_LANE (aarch64_types_loadstruct_lane_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_bsl_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
@@ -83,6 +83,10 @@
BUILTIN_VQ (LOADSTRUCT, ld2, 0)
BUILTIN_VQ (LOADSTRUCT, ld3, 0)
BUILTIN_VQ (LOADSTRUCT, ld4, 0)
+ /* Implemented by aarch64_ld<VSTRUCT:nregs>_lane<VQ:mode>. */
+ BUILTIN_VQ (LOADSTRUCT_LANE, ld2_lane, 0)
+ BUILTIN_VQ (LOADSTRUCT_LANE, ld3_lane, 0)
+ BUILTIN_VQ (LOADSTRUCT_LANE, ld4_lane, 0)
/* Implemented by aarch64_st<VSTRUCT:nregs><VDC:mode>. */
BUILTIN_VDC (STORESTRUCT, st2, 0)
BUILTIN_VDC (STORESTRUCT, st3, 0)
@@ -3991,6 +3991,18 @@
[(set_attr "type" "neon_load2_2reg<q>")]
)
+(define_insn "aarch64_vec_load_lanesoi_lane<mode>"
+ [(set (match_operand:OI 0 "register_operand" "=w")
+ (unspec:OI [(match_operand:<V_TWO_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
+ (match_operand:OI 2 "register_operand" "0")
+ (match_operand:SI 3 "immediate_operand" "i")
+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
+ UNSPEC_LD2_LANE))]
+ "TARGET_SIMD"
+ "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1"
+ [(set_attr "type" "neon_load2_one_lane")]
+)
+
(define_insn "vec_store_lanesoi<mode>"
[(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:OI [(match_operand:OI 1 "register_operand" "w")
@@ -4022,6 +4034,18 @@
[(set_attr "type" "neon_load3_3reg<q>")]
)
+(define_insn "aarch64_vec_load_lanesci_lane<mode>"
+ [(set (match_operand:CI 0 "register_operand" "=w")
+ (unspec:CI [(match_operand:<V_THREE_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
+ (match_operand:CI 2 "register_operand" "0")
+ (match_operand:SI 3 "immediate_operand" "i")
+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ UNSPEC_LD3_LANE))]
+ "TARGET_SIMD"
+ "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1"
+ [(set_attr "type" "neon_load3_one_lane")]
+)
+
(define_insn "vec_store_lanesci<mode>"
[(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:CI [(match_operand:CI 1 "register_operand" "w")
@@ -4053,6 +4077,18 @@
[(set_attr "type" "neon_load4_4reg<q>")]
)
+(define_insn "aarch64_vec_load_lanesxi_lane<mode>"
+ [(set (match_operand:XI 0 "register_operand" "=w")
+ (unspec:XI [(match_operand:<V_FOUR_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
+ (match_operand:XI 2 "register_operand" "0")
+ (match_operand:SI 3 "immediate_operand" "i")
+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ UNSPEC_LD4_LANE))]
+ "TARGET_SIMD"
+ "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1"
+ [(set_attr "type" "neon_load4_one_lane")]
+)
+
(define_insn "vec_store_lanesxi<mode>"
[(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:XI [(match_operand:XI 1 "register_operand" "w")
@@ -4366,6 +4402,65 @@
DONE;
})
+(define_expand "aarch64_ld2_lane<mode>"
+ [(match_operand:OI 0 "register_operand" "=w")
+ (match_operand:DI 1 "register_operand" "w")
+ (match_operand:OI 2 "register_operand" "0")
+ (match_operand:SI 3 "immediate_operand" "i")
+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ "TARGET_SIMD"
+{
+ enum machine_mode mode = <V_TWO_ELEM>mode;
+ rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ emit_insn (gen_aarch64_vec_load_lanesoi_lane<mode> (operands[0],
+ mem,
+ operands[2],
+ operands[3]));
+ DONE;
+})
+
+(define_expand "aarch64_ld3_lane<mode>"
+ [(match_operand:CI 0 "register_operand" "=w")
+ (match_operand:DI 1 "register_operand" "w")
+ (match_operand:CI 2 "register_operand" "0")
+ (match_operand:SI 3 "immediate_operand" "i")
+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ "TARGET_SIMD"
+{
+ enum machine_mode mode = <V_THREE_ELEM>mode;
+ rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ emit_insn (gen_aarch64_vec_load_lanesci_lane<mode> (operands[0],
+ mem,
+ operands[2],
+ operands[3]));
+ DONE;
+})
+
+(define_expand "aarch64_ld4_lane<mode>"
+ [(match_operand:XI 0 "register_operand" "=w")
+ (match_operand:DI 1 "register_operand" "w")
+ (match_operand:XI 2 "register_operand" "0")
+ (match_operand:SI 3 "immediate_operand" "i")
+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+ "TARGET_SIMD"
+{
+ enum machine_mode mode = <V_FOUR_ELEM>mode;
+ rtx mem = gen_rtx_MEM (mode, operands[1]);
+
+ aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
+ emit_insn (gen_aarch64_vec_load_lanesxi_lane<mode> (operands[0],
+ mem,
+ operands[2],
+ operands[3]));
+ DONE;
+})
+
+
+
;; Expanders for builtins to extract vector registers from large
;; opaque integer modes.
@@ -92,6 +92,9 @@
UNSPEC_LD2
UNSPEC_LD3
UNSPEC_LD4
+ UNSPEC_LD2_LANE
+ UNSPEC_LD3_LANE
+ UNSPEC_LD4_LANE
UNSPEC_MB
UNSPEC_NOP
UNSPEC_PRLG_STK
--
1.9.1