From patchwork Tue Mar 22 14:08:05 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Andrew Stubbs X-Patchwork-Id: 730 Return-Path: Delivered-To: unknown Received: from imap.gmail.com (74.125.159.109) by localhost6.localdomain6 with IMAP4-SSL; 08 Jun 2011 14:45:14 -0000 Delivered-To: patches@linaro.org Received: by 10.204.113.5 with SMTP id y5cs108871bkp; Tue, 22 Mar 2011 07:08:14 -0700 (PDT) Received: by 10.42.138.67 with SMTP id b3mr3475704icu.490.1300802891208; Tue, 22 Mar 2011 07:08:11 -0700 (PDT) Received: from mail.codesourcery.com (mail.codesourcery.com [38.113.113.100]) by mx.google.com with ESMTPS id 15si17490690ibb.85.2011.03.22.07.08.09 (version=TLSv1/SSLv3 cipher=OTHER); Tue, 22 Mar 2011 07:08:10 -0700 (PDT) Received-SPF: pass (google.com: domain of ams@codesourcery.com designates 38.113.113.100 as permitted sender) client-ip=38.113.113.100; Authentication-Results: mx.google.com; spf=pass (google.com: domain of ams@codesourcery.com designates 38.113.113.100 as permitted sender) smtp.mail=ams@codesourcery.com Received: (qmail 24689 invoked from network); 22 Mar 2011 14:08:08 -0000 Received: from unknown (HELO ?192.168.0.104?) (ams@127.0.0.2) by mail.codesourcery.com with ESMTPA; 22 Mar 2011 14:08:08 -0000 Message-ID: <4D88AD45.2010803@codesourcery.com> Date: Tue, 22 Mar 2011 14:08:05 +0000 From: Andrew Stubbs Organization: CodeSourcery User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.14) Gecko/20110223 Lightning/1.0b2 Thunderbird/3.1.8 MIME-Version: 1.0 To: Richard Earnshaw CC: gcc-patches@gcc.gnu.org, patches@linaro.org, Bernd Schmidt Subject: Re: [PATCH][ARM] Discourage use of NEON on Cortex-A8 References: <4D7CF174.1090608@codesourcery.com> <1300122644.6014.46.camel@e102346-lin.cambridge.arm.com> <4D7E5696.9010405@codesourcery.com> <1300126674.6014.49.camel@e102346-lin.cambridge.arm.com> <4D88ACB0.8030109@codesourcery.com> In-Reply-To: <4D88ACB0.8030109@codesourcery.com> And again, with the patch ... On 22/03/11 14:05, Andrew Stubbs wrote: > On 14/03/11 18:17, Richard Earnshaw wrote: > > I think the order should be: not-a8, core-regs, core-regs, only-a8. > > Ok, how about this? > > I've tested that it still builds spec2k crafty. > > Andrew 2011-03-22 Bernd Schmidt Andrew Stubbs gcc/ * config/arm/vfp.md (arm_movdi_vfp): Enable only when not tuning for Cortex-A8. (arm_movdi_vfp_cortexa8): New pattern. * config/arm/neon.md (adddi3_neon, subdi3_neon, anddi3_neon, iordi3_neon, xordi3_neon): Add alternatives to discourage Neon instructions when tuning for Cortex-A8. Set attribute "arch". * config/arm/arm.md: Move include arm-tune.md up a bit. (define_attr "arch"): Add "onlya8" and "nota8" values. (define_attr "arch_enabled"): Handle "onlya8" and "nota8". --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -149,6 +149,9 @@ ;;--------------------------------------------------------------------------- ;; Attributes +;; Processor type. This is created automatically from arm-cores.def. +(include "arm-tune.md") + ; IS_THUMB is set to 'yes' when we are generating Thumb code, and 'no' when ; generating ARM code. This is used to control the length of some insn ; patterns that share the same RTL in both ARM and Thumb code. @@ -192,7 +195,7 @@ ; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without ; arm_arch6. This attribute is used to compute attribute "enabled", ; use type "any" to enable an alternative in all cases. -(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6" +(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,onlya8,nota8" (const_string "any")) (define_attr "arch_enabled" "no,yes" @@ -225,6 +228,14 @@ (and (eq_attr "arch" "nov6") (ne (symbol_ref "(TARGET_32BIT && !arm_arch6)") (const_int 0))) + (const_string "yes") + + (and (eq_attr "arch" "onlya8") + (eq_attr "tune" "cortexa8")) + (const_string "yes") + + (and (eq_attr "arch" "nota8") + (not (eq_attr "tune" "cortexa8"))) (const_string "yes")] (const_string "no"))) @@ -485,9 +496,6 @@ ;;--------------------------------------------------------------------------- ;; Pipeline descriptions -;; Processor type. This is created automatically from arm-cores.def. -(include "arm-tune.md") - (define_attr "tune_cortexr4" "yes,no" (const (if_then_else (eq_attr "tune" "cortexr4,cortexr4f") --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -583,23 +583,25 @@ ) (define_insn "adddi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r") - (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0") - (match_operand:DI 2 "s_register_operand" "w,r,0"))) + [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w") + (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w") + (match_operand:DI 2 "s_register_operand" "w,r,0,w"))) (clobber (reg:CC CC_REGNUM))] "TARGET_NEON" { switch (which_alternative) { - case 0: return "vadd.i64\t%P0, %P1, %P2"; + case 0: /* fall through */ + case 3: return "vadd.i64\t%P0, %P1, %P2"; case 1: return "#"; case 2: return "#"; default: gcc_unreachable (); } } - [(set_attr "neon_type" "neon_int_1,*,*") - (set_attr "conds" "*,clob,clob") - (set_attr "length" "*,8,8")] + [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1") + (set_attr "conds" "*,clob,clob,*") + (set_attr "length" "*,8,8,*") + (set_attr "arch" "nota8,*,*,onlya8")] ) (define_insn "*sub3_neon" @@ -617,24 +619,26 @@ ) (define_insn "subdi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r") - (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0") - (match_operand:DI 2 "s_register_operand" "w,r,0,0"))) + [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w") + (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w") + (match_operand:DI 2 "s_register_operand" "w,r,0,0,w"))) (clobber (reg:CC CC_REGNUM))] "TARGET_NEON" { switch (which_alternative) { - case 0: return "vsub.i64\t%P0, %P1, %P2"; + case 0: /* fall through */ + case 4: return "vsub.i64\t%P0, %P1, %P2"; case 1: /* fall through */ case 2: /* fall through */ case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"; default: gcc_unreachable (); } } - [(set_attr "neon_type" "neon_int_2,*,*,*") - (set_attr "conds" "*,clob,clob,clob") - (set_attr "length" "*,8,8,8")] + [(set_attr "neon_type" "neon_int_2,*,*,*,neon_int_2") + (set_attr "conds" "*,clob,clob,clob,*") + (set_attr "length" "*,8,8,8,*") + (set_attr "arch" "nota8,*,*,*,onlya8")] ) (define_insn "*mul3_neon" @@ -720,23 +724,26 @@ ) (define_insn "iordi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r") - (ior:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r") - (match_operand:DI 2 "neon_logic_op2" "w,Dl,r,r")))] + [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r,?w,?w") + (ior:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r,w,0") + (match_operand:DI 2 "neon_logic_op2" "w,Dl,r,r,w,Dl")))] "TARGET_NEON" { switch (which_alternative) { - case 0: return "vorr\t%P0, %P1, %P2"; - case 1: return neon_output_logic_immediate ("vorr", &operands[2], + case 0: /* fall through */ + case 4: return "vorr\t%P0, %P1, %P2"; + case 1: /* fall through */ + case 5: return neon_output_logic_immediate ("vorr", &operands[2], DImode, 0, VALID_NEON_QREG_MODE (DImode)); case 2: return "#"; case 3: return "#"; default: gcc_unreachable (); } } - [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*") - (set_attr "length" "*,*,8,8")] + [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1") + (set_attr "length" "*,*,8,8,*,*") + (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")] ) ;; The concrete forms of the Neon immediate-logic instructions are vbic and @@ -762,23 +769,26 @@ ) (define_insn "anddi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r") - (and:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r") - (match_operand:DI 2 "neon_inv_logic_op2" "w,DL,r,r")))] + [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r,?w,?w") + (and:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r,w,0") + (match_operand:DI 2 "neon_inv_logic_op2" "w,DL,r,r,w,DL")))] "TARGET_NEON" { switch (which_alternative) { - case 0: return "vand\t%P0, %P1, %P2"; - case 1: return neon_output_logic_immediate ("vand", &operands[2], + case 0: /* fall through */ + case 4: return "vand\t%P0, %P1, %P2"; + case 1: /* fall through */ + case 5: return neon_output_logic_immediate ("vand", &operands[2], DImode, 1, VALID_NEON_QREG_MODE (DImode)); case 2: return "#"; case 3: return "#"; default: gcc_unreachable (); } } - [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*") - (set_attr "length" "*,*,8,8")] + [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1") + (set_attr "length" "*,*,8,8,*,*") + (set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")] ) (define_insn "orn3_neon" @@ -836,16 +846,18 @@ ) (define_insn "xordi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r") - (xor:DI (match_operand:DI 1 "s_register_operand" "%w,0,r") - (match_operand:DI 2 "s_register_operand" "w,r,r")))] + [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w") + (xor:DI (match_operand:DI 1 "s_register_operand" "%w,0,r,w") + (match_operand:DI 2 "s_register_operand" "w,r,r,w")))] "TARGET_NEON" "@ veor\t%P0, %P1, %P2 # - #" - [(set_attr "neon_type" "neon_int_1,*,*") - (set_attr "length" "*,8,8")] + # + veor\t%P0, %P1, %P2" + [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1") + (set_attr "length" "*,8,8,*") + (set_attr "arch" "nota8,*,*,onlya8")] ) (define_insn "one_cmpl2" --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -134,9 +134,51 @@ ;; DImode moves (define_insn "*arm_movdi_vfp" - [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r,m,w,r,w,w, Uv") + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, m,w,r,w,w, Uv") (match_operand:DI 1 "di_operand" "rIK,mi,r,r,w,w,Uvi,w"))] - "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP && arm_tune != cortexa8 + && ( register_operand (operands[0], DImode) + || register_operand (operands[1], DImode))" + "* + switch (which_alternative) + { + case 0: + return \"#\"; + case 1: + case 2: + return output_move_double (operands); + case 3: + return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\"; + case 4: + return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\"; + case 5: + if (TARGET_VFP_SINGLE) + return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\"; + else + return \"fcpyd%?\\t%P0, %P1\\t%@ int\"; + case 6: case 7: + return output_move_vfp (operands); + default: + gcc_unreachable (); + } + " + [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored") + (set_attr "neon_type" "*,*,*,neon_mcr_2_mcrr,neon_mrrc,neon_vmov,*,*") + (set (attr "length") (cond [(eq_attr "alternative" "0,1,2") (const_int 8) + (eq_attr "alternative" "5") + (if_then_else + (eq (symbol_ref "TARGET_VFP_SINGLE") (const_int 1)) + (const_int 8) + (const_int 4))] + (const_int 4))) + (set_attr "pool_range" "*,1020,*,*,*,*,1020,*") + (set_attr "neg_pool_range" "*,1008,*,*,*,*,1008,*")] +) + +(define_insn "*arm_movdi_vfp_cortexa8" + [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r,m,w,!r,w,w, Uv") + (match_operand:DI 1 "di_operand" "rIK,mi,r,r,w,w,Uvi,w"))] + "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP && arm_tune == cortexa8 && ( register_operand (operands[0], DImode) || register_operand (operands[1], DImode))" "*