Message ID | 537405E1.8020000@linaro.org |
---|---|
State | New |
Headers | show |
On 15 May 2014 01:10, Kugan <kugan.vivekanandarajah@linaro.org> wrote: > > Hi All, > > AAarch64 back-end defines GENERAL_REGS and CORE_REGS with the same set > of register. Is there any reason why we need this? Nope an artifact of the early evolution of AArch64. Long ago CORE_REGS did not include SP. Your patch is fine, commit it. /Marcus
On 22/05/14 01:08, Marcus Shawcroft wrote: > On 15 May 2014 01:10, Kugan <kugan.vivekanandarajah@linaro.org> wrote: >> >> Hi All, >> >> AAarch64 back-end defines GENERAL_REGS and CORE_REGS with the same set >> of register. Is there any reason why we need this? > > Nope an artifact of the early evolution of AArch64. Long ago CORE_REGS > did not include SP. Your patch is fine, commit it. With these changes, Christophe has noted a regression in aarch64_be-none-elf (http://cbuild.validation.linaro.org/build/cross-validation/gcc/210735/report-build-info.html). One of the reduced test case is attached. Here part of the asm changes to: fmov d1, x20 fmov v1.d[1], x19 + str q0, [x29, 64] + str x22, [x29, 64] fmov d0, x21 - fmov v0.d[1], x22 bl __multf3 orr v1.16b, v0.16b, v0.16b Due to the cost changes in IRA, now part of the arguments(v0.d[1]) for multf3 ends up in stack. Reason for this us, in IRA, assign_hard_reg, while iterating for the cost for assigning register to reg:TI 99, allocates register 32 (FP register). Which I think is wrong. After which LRA makes it worse. There could be a latent bug here in LRA side but I think still we need to look at cost model as well. Increasing the cost model like below helps here. - NAMED_PARAM (GP2FP, 2), - NAMED_PARAM (FP2GP, 2), + NAMED_PARAM (GP2FP, 3), + NAMED_PARAM (FP2GP, 3), IRA Log for r99 [...] r99: preferred GENERAL_REGS, alternative ALL_REGS, allocno ALL_REGS a4 (r99,l0) best GENERAL_REGS, allocno ALL_REGS [...] a4(r99,l0) costs: GENERAL_REGS:2000,2000 POINTER_REGS:2000,2000 FP_LO_REGS:4000,4000 FP_REGS:4000,4000 ALL_REGS:8000,8000 MEM:12000,12000 [...] ;; a4(r99,l0) conflicts: ;; subobject 0: a3(r90,l0) a2(r93,l0) a5(r89,l0) a6(r88,l0) ;; total conflict hard regs: 33 ;; conflict hard regs: 33 [...] Allocno a4r99 of ALL_REGS(62) has 60 avail. regs 0-28 32 34-63, node: 0-28 32 34-63 obj 0 (confl regs = 29 31 33 64 65), obj 1 (confl regs = 29 31 33 64 65) [...] Pushing a4(r99,l0)(cost 0) [...] Popping a4(r99,l0) -- assign reg 32 [...] RTL snip from IRA Log for r99 [...] (insn 25 23 74 5 (set (reg:TF 33 v1) (reg:TF 32 v0)) t.c:12 40 {*movtf_aarch64} (expr_list:REG_DEAD (reg:TF 32 v0) (nil))) (insn 74 25 75 5 (clobber (reg:TI 99 [ d+-8 ])) t.c:12 -1 (nil)) (insn 75 74 76 5 (set (subreg:DI (reg:TI 99 [ d+-8 ]) 0) (reg:DI 88 [ d ])) t.c:12 34 {*movdi_aarch64} (expr_list:REG_DEAD (reg:DI 88 [ d ]) (nil))) (insn 76 75 77 5 (set (subreg:DI (reg:TI 99 [ d+-8 ]) 8) (reg:DI 89 [ d+8 ])) t.c:12 34 {*movdi_aarch64} (expr_list:REG_DEAD (reg:DI 89 [ d+8 ]) (nil))) (insn 77 76 27 5 (set (reg:TF 32 v0) (subreg:TF (reg:TI 99 [ d+-8 ]) 0)) t.c:12 40 {*movtf_aarch64} (expr_list:REG_DEAD (reg:TI 99 [ d+-8 ]) (nil))) [...] RTL snip from IRA Log for instructions that had r99 [...] (insn 25 23 74 5 (set (reg:TF 33 v1) (reg:TF 32 v0)) t.c:12 40 {*movtf_aarch64} (nil)) (insn 74 25 95 5 (clobber (reg:TI 32 v0 [orig:99 d+-8 ] [99])) t.c:12 -1 (nil)) (insn 95 74 75 5 (set (mem/c:TI (plus:DI (reg/f:DI 29 x29) (const_int 64 [0x40])) [0 %sfp+-16 S16 A128]) (reg:TI 32 v0 [orig:99 d+-8 ] [99])) t.c:12 37 {*movti_aarch64} (nil)) (insn 75 95 96 5 (set (mem/c:DI (plus:DI (reg/f:DI 29 x29) (const_int 64 [0x40])) [0 %sfp+-16 S8 A128]) (reg:DI 22 x22 [orig:88 d ] [88])) t.c:12 34 {*movdi_aarch64} (nil)) (insn 96 75 76 5 (set (reg:TI 32 v0 [orig:99 d+-8 ] [99]) (mem/c:TI (plus:DI (reg/f:DI 29 x29) (const_int 64 [0x40])) [0 %sfp+-16 S16 A128])) t.c:12 37 {*movti_aarch64} (nil)) (insn 76 96 77 5 (set (reg:DI 32 v0 [orig:99 d ] [99]) (reg:DI 21 x21 [orig:89 d+8 ] [89])) t.c:12 34 {*movdi_aarch64} (nil)) (insn 77 76 27 5 (set (reg:TF 32 v0) (reg:TF 32 v0 [orig:99 d+-8 ] [99])) t.c:12 40 {*movtf_aarch64} (nil)) [...] Thanks, Kugan
On 27 May 2014 23:27, Kugan <kugan.vivekanandarajah@linaro.org> wrote: > Due to the cost changes in IRA, now part of the arguments(v0.d[1]) for > multf3 ends up in stack. Reason for this us, in IRA, assign_hard_reg, > while iterating for the cost for assigning register to reg:TI 99, > allocates register 32 (FP register). Which I think is wrong. After which > LRA makes it worse. There could be a latent bug here in LRA side but I > think still we need to look at cost model as well. Increasing the cost > model like below helps here. If I understand correctly this is real regression, in which case adjusting the costs like this is simply papering over the cracks. Can you figure out what the real issue is? Cheers /Marcus
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index a3147ee..eee36ba 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -3951,7 +3951,7 @@ enum reg_class aarch64_regno_regclass (unsigned regno) { if (GP_REGNUM_P (regno)) - return CORE_REGS; + return GENERAL_REGS; if (regno == SP_REGNUM) return STACK_REG; @@ -4102,12 +4102,12 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, /* A TFmode or TImode memory access should be handled via an FP_REGS because AArch64 has richer addressing modes for LDR/STR instructions than LDP/STP instructions. */ - if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS + if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS && GET_MODE_SIZE (mode) == 16 && MEM_P (x)) return FP_REGS; if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x)) - return CORE_REGS; + return GENERAL_REGS; return NO_REGS; } @@ -4239,7 +4239,6 @@ aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode) { switch (regclass) { - case CORE_REGS: case POINTER_REGS: case GENERAL_REGS: case ALL_REGS: diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 7962aa4..3455ecc 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -409,7 +409,6 @@ extern unsigned long aarch64_tune_flags; enum reg_class { NO_REGS, - CORE_REGS, GENERAL_REGS, STACK_REG, POINTER_REGS, @@ -424,7 +423,6 @@ enum reg_class #define REG_CLASS_NAMES \ { \ "NO_REGS", \ - "CORE_REGS", \ "GENERAL_REGS", \ "STACK_REG", \ "POINTER_REGS", \ @@ -436,7 +434,6 @@ enum reg_class #define REG_CLASS_CONTENTS \ { \ { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \ - { 0x7fffffff, 0x00000000, 0x00000003 }, /* CORE_REGS */ \ { 0x7fffffff, 0x00000000, 0x00000003 }, /* GENERAL_REGS */ \ { 0x80000000, 0x00000000, 0x00000000 }, /* STACK_REG */ \ { 0xffffffff, 0x00000000, 0x00000003 }, /* POINTER_REGS */ \ @@ -447,7 +444,7 @@ enum reg_class #define REGNO_REG_CLASS(REGNO) aarch64_regno_regclass (REGNO) -#define INDEX_REG_CLASS CORE_REGS +#define INDEX_REG_CLASS GENERAL_REGS #define BASE_REG_CLASS POINTER_REGS /* Register pairs used to eliminate unneeded registers that point into