Message ID | 1505205277-26276-2-git-send-email-charles.baylis@linaro.org |
---|---|
State | New |
Headers | show |
Series | Addressing mode costs v3 | expand |
Hi Charles, On 12/09/17 09:34, charles.baylis@linaro.org wrote: > From: Charles Baylis <charles.baylis@linaro.org> > > Add bus widths. These use the approximation that v7 and later cores have > 64bit data bus width, and earlier cores have 32 bit bus width, with the > exception of v7m. > Given the way this field is used in patch 2 does it affect the addressing mode generation in the tests you added depending on the -mtune option given? If so, we'll get testsuite failures when people test with particular default CPU configurations. Could you expand on the benefits we get from this extra bus_width information? I get that we increase the cost of memory accesses if the size of the mode we load is larger than the bus width, but it's not as if there is ever an alternative in this regard, such as loading less memory, so what pass can make different decisions thanks to this field? Thanks, Kyrill > <date> Charles Baylis <charles.baylis@linaro.org> > > * config/arm/arm-protos.h (struct tune_params): New field > bus_width. > * config/arm/arm.c (arm_slowmul_tune): Initialise bus_width field. > (arm_fastmul_tune): Likewise. > (arm_strongarm_tune): Likewise. > (arm_xscale_tune): Likewise. > (arm_9e_tune): Likewise. > (arm_marvell_pj4_tune): Likewise. > (arm_v6t2_tune): Likewise. > (arm_cortex_tune): Likewise. > (arm_cortex_a8_tune): Likewise. > (arm_cortex_a7_tune): Likewise. > (arm_cortex_a15_tune): Likewise. > (arm_cortex_a35_tune): Likewise. > (arm_cortex_a53_tune): Likewise. > (arm_cortex_a57_tune): Likewise. > (arm_exynosm1_tune): Likewise. > (arm_xgene1_tune): Likewise. > (arm_cortex_a5_tune): Likewise. > (arm_cortex_a9_tune): Likewise. > (arm_cortex_a12_tune): Likewise. > (arm_cortex_a73_tune): Likewise. > (arm_v7m_tune): Likewise. > (arm_cortex_m7_tune): Likewise. > (arm_v6m_tune): Likewise. > (arm_fa726te_tune): Likewise. > > Change-Id: I613e876db93ffd6f8c1e72ba483be2efc0b56d66 > --- > gcc/config/arm/arm-protos.h | 2 ++ > gcc/config/arm/arm.c | 24 ++++++++++++++++++++++++ > 2 files changed, 26 insertions(+) > > diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h > index 4538078..47a85cc 100644 > --- a/gcc/config/arm/arm-protos.h > +++ b/gcc/config/arm/arm-protos.h > @@ -278,6 +278,8 @@ struct tune_params > int max_insns_inline_memset; > /* Issue rate of the processor. */ > unsigned int issue_rate; > + /* Bus width (bits). */ > + unsigned int bus_width; > /* Explicit prefetch data. */ > struct > { > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c > index bca8a34..32001e5 100644 > --- a/gcc/config/arm/arm.c > +++ b/gcc/config/arm/arm.c > @@ -1761,6 +1761,7 @@ const struct tune_params arm_slowmul_tune = > 5, /* Max cond insns. */ > 8, /* Memset max inline. */ > 1, /* Issue rate. */ > + 32, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_TRUE, > tune_params::PREF_LDRD_FALSE, > @@ -1783,6 +1784,7 @@ const struct tune_params arm_fastmul_tune = > 5, /* Max cond insns. */ > 8, /* Memset max inline. */ > 1, /* Issue rate. */ > + 32, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_TRUE, > tune_params::PREF_LDRD_FALSE, > @@ -1808,6 +1810,7 @@ const struct tune_params arm_strongarm_tune = > 3, /* Max cond insns. */ > 8, /* Memset max inline. */ > 1, /* Issue rate. */ > + 32, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_TRUE, > tune_params::PREF_LDRD_FALSE, > @@ -1830,6 +1833,7 @@ const struct tune_params arm_xscale_tune = > 3, /* Max cond insns. */ > 8, /* Memset max inline. */ > 1, /* Issue rate. */ > + 32, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_TRUE, > tune_params::PREF_LDRD_FALSE, > @@ -1852,6 +1856,7 @@ const struct tune_params arm_9e_tune = > 5, /* Max cond insns. */ > 8, /* Memset max inline. */ > 1, /* Issue rate. */ > + 32, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_TRUE, > tune_params::PREF_LDRD_FALSE, > @@ -1874,6 +1879,7 @@ const struct tune_params arm_marvell_pj4_tune = > 5, /* Max cond insns. */ > 8, /* Memset max inline. */ > 2, /* Issue rate. */ > + 32, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_TRUE, > tune_params::PREF_LDRD_FALSE, > @@ -1896,6 +1902,7 @@ const struct tune_params arm_v6t2_tune = > 5, /* Max cond insns. */ > 8, /* Memset max inline. */ > 1, /* Issue rate. */ > + 32, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_FALSE, > tune_params::PREF_LDRD_FALSE, > @@ -1920,6 +1927,7 @@ const struct tune_params arm_cortex_tune = > 5, /* Max cond insns. */ > 8, /* Memset max inline. */ > 2, /* Issue rate. */ > + 64, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_FALSE, > tune_params::PREF_LDRD_FALSE, > @@ -1942,6 +1950,7 @@ const struct tune_params arm_cortex_a8_tune = > 5, /* Max cond insns. */ > 8, /* Memset max inline. */ > 2, /* Issue rate. */ > + 64, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_FALSE, > tune_params::PREF_LDRD_FALSE, > @@ -1964,6 +1973,7 @@ const struct tune_params arm_cortex_a7_tune = > 5, /* Max cond insns. */ > 8, /* Memset max inline. */ > 2, /* Issue rate. */ > + 64, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_FALSE, > tune_params::PREF_LDRD_FALSE, > @@ -1986,6 +1996,7 @@ const struct tune_params arm_cortex_a15_tune = > 2, /* Max cond insns. */ > 8, /* Memset max inline. */ > 3, /* Issue rate. */ > + 64, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_FALSE, > tune_params::PREF_LDRD_TRUE, > @@ -2008,6 +2019,7 @@ const struct tune_params arm_cortex_a35_tune = > 5, /* Max cond insns. */ > 8, /* Memset max inline. */ > 1, /* Issue rate. */ > + 64, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_FALSE, > tune_params::PREF_LDRD_FALSE, > @@ -2030,6 +2042,7 @@ const struct tune_params arm_cortex_a53_tune = > 5, /* Max cond insns. */ > 8, /* Memset max inline. */ > 2, /* Issue rate. */ > + 64, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_FALSE, > tune_params::PREF_LDRD_FALSE, > @@ -2052,6 +2065,7 @@ const struct tune_params arm_cortex_a57_tune = > 2, /* Max cond insns. */ > 8, /* Memset max inline. */ > 3, /* Issue rate. */ > + 64, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_FALSE, > tune_params::PREF_LDRD_TRUE, > @@ -2074,6 +2088,7 @@ const struct tune_params arm_exynosm1_tune = > 2, /* Max cond insns. */ > 8, /* Memset max inline. */ > 3, /* Issue rate. */ > + 64, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_FALSE, > tune_params::PREF_LDRD_TRUE, > @@ -2096,6 +2111,7 @@ const struct tune_params arm_xgene1_tune = > 2, /* Max cond insns. */ > 32, /* Memset max inline. */ > 4, /* Issue rate. */ > + 64, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_FALSE, > tune_params::PREF_LDRD_TRUE, > @@ -2121,6 +2137,7 @@ const struct tune_params arm_cortex_a5_tune = > 1, /* Max cond insns. */ > 8, /* Memset max inline. */ > 2, /* Issue rate. */ > + 64, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_FALSE, > tune_params::PREF_LDRD_FALSE, > @@ -2143,6 +2160,7 @@ const struct tune_params arm_cortex_a9_tune = > 5, /* Max cond insns. */ > 8, /* Memset max inline. */ > 2, /* Issue rate. */ > + 64, /* Bus width. */ > ARM_PREFETCH_BENEFICIAL(4,32,32), > tune_params::PREF_CONST_POOL_FALSE, > tune_params::PREF_LDRD_FALSE, > @@ -2165,6 +2183,7 @@ const struct tune_params arm_cortex_a12_tune = > 2, /* Max cond insns. */ > 8, /* Memset max inline. */ > 2, /* Issue rate. */ > + 64, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_FALSE, > tune_params::PREF_LDRD_TRUE, > @@ -2187,6 +2206,7 @@ const struct tune_params arm_cortex_a73_tune = > 2, /* Max cond insns. */ > 8, /* Memset max inline. */ > 2, /* Issue rate. */ > + 64, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_FALSE, > tune_params::PREF_LDRD_TRUE, > @@ -2216,6 +2236,7 @@ const struct tune_params arm_v7m_tune = > 2, /* Max cond insns. */ > 8, /* Memset max inline. */ > 1, /* Issue rate. */ > + 32, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_TRUE, > tune_params::PREF_LDRD_FALSE, > @@ -2240,6 +2261,7 @@ const struct tune_params arm_cortex_m7_tune = > 1, /* Max cond insns. */ > 8, /* Memset max inline. */ > 2, /* Issue rate. */ > + 64, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_TRUE, > tune_params::PREF_LDRD_FALSE, > @@ -2265,6 +2287,7 @@ const struct tune_params arm_v6m_tune = > 5, /* Max cond insns. */ > 8, /* Memset max inline. */ > 1, /* Issue rate. */ > + 32, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_FALSE, > tune_params::PREF_LDRD_FALSE, > @@ -2287,6 +2310,7 @@ const struct tune_params arm_fa726te_tune = > 5, /* Max cond insns. */ > 8, /* Memset max inline. */ > 2, /* Issue rate. */ > + 32, /* Bus width. */ > ARM_PREFETCH_NOT_BENEFICIAL, > tune_params::PREF_CONST_POOL_TRUE, > tune_params::PREF_LDRD_FALSE, > -- > 2.7.4 >
On 13 September 2017 at 10:02, Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> wrote: > Hi Charles, > > On 12/09/17 09:34, charles.baylis@linaro.org wrote: >> >> From: Charles Baylis <charles.baylis@linaro.org> >> >> Add bus widths. These use the approximation that v7 and later cores have >> 64bit data bus width, and earlier cores have 32 bit bus width, with the >> exception of v7m. >> > > Given the way this field is used in patch 2 does it affect the addressing > mode generation > in the tests you added depending on the -mtune option given? > If so, we'll get testsuite failures when people test with particular default > CPU configurations. No, because the auto_inc_dec phase compares the cost of two different MEMs which differ only by addressing mode. The part of the calculation which depends on the bus_width is the same both times, so it is cancelled out. > Could you expand on the benefits we get from this extra bus_width > information? > I get that we increase the cost of memory accesses if the size of the mode > we load is larger than the > bus width, but it's not as if there is ever an alternative in this regard, > such as loading less memory, > so what pass can make different decisions thanks to this field? As far as this patch series is concerned, it doesn't matter. It is there to encapsulate the notion that a larger transfer results in rtx_costs() returning a larger cost, but I don't know of any part of the compiler which is sensitive to that difference. It's done this way because Ramana and Richard wanted it done that way (https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00652.html). From b7bec2e4f7ca0335e0e5bd84c297215a3a7fb8c7 Mon Sep 17 00:00:00 2001 From: Charles Baylis <charles.baylis@linaro.org> Date: Fri, 8 Sep 2017 12:53:50 +0100 Subject: [PATCH 1/3] [ARM] Add bus_width_bits to tune_params Add bus widths. These use the approximation that v7 and later cores have 64bit data bus width, and earlier cores have 32 bit bus width, with the exception of v7m. <date> Charles Baylis <charles.baylis@linaro.org> * config/arm/arm-protos.h (struct tune_params): New field bus_width. * config/arm/arm.c (arm_slowmul_tune): Initialise bus_width field. (arm_fastmul_tune): Likewise. (arm_strongarm_tune): Likewise. (arm_xscale_tune): Likewise. (arm_9e_tune): Likewise. (arm_marvell_pj4_tune): Likewise. (arm_v6t2_tune): Likewise. (arm_cortex_tune): Likewise. (arm_cortex_a8_tune): Likewise. (arm_cortex_a7_tune): Likewise. (arm_cortex_a15_tune): Likewise. (arm_cortex_a35_tune): Likewise. (arm_cortex_a53_tune): Likewise. (arm_cortex_a57_tune): Likewise. (arm_exynosm1_tune): Likewise. (arm_xgene1_tune): Likewise. (arm_cortex_a5_tune): Likewise. (arm_cortex_a9_tune): Likewise. (arm_cortex_a12_tune): Likewise. (arm_cortex_a73_tune): Likewise. (arm_v7m_tune): Likewise. (arm_cortex_m7_tune): Likewise. (arm_v6m_tune): Likewise. (arm_fa726te_tune): Likewise. Change-Id: I613e876db93ffd6f8c1e72ba483be2efc0b56d66 --- gcc/config/arm/arm-protos.h | 2 ++ gcc/config/arm/arm.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 4538078..47a85cc 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -278,6 +278,8 @@ struct tune_params int max_insns_inline_memset; /* Issue rate of the processor. */ unsigned int issue_rate; + /* Bus width (bits). */ + unsigned int bus_width; /* Explicit prefetch data. */ struct { diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index bca8a34..32001e5 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -1761,6 +1761,7 @@ const struct tune_params arm_slowmul_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ + 32, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, @@ -1783,6 +1784,7 @@ const struct tune_params arm_fastmul_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ + 32, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, @@ -1808,6 +1810,7 @@ const struct tune_params arm_strongarm_tune = 3, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ + 32, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, @@ -1830,6 +1833,7 @@ const struct tune_params arm_xscale_tune = 3, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ + 32, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, @@ -1852,6 +1856,7 @@ const struct tune_params arm_9e_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ + 32, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, @@ -1874,6 +1879,7 @@ const struct tune_params arm_marvell_pj4_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 32, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, @@ -1896,6 +1902,7 @@ const struct tune_params arm_v6t2_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ + 32, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, @@ -1920,6 +1927,7 @@ const struct tune_params arm_cortex_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, @@ -1942,6 +1950,7 @@ const struct tune_params arm_cortex_a8_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, @@ -1964,6 +1973,7 @@ const struct tune_params arm_cortex_a7_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, @@ -1986,6 +1996,7 @@ const struct tune_params arm_cortex_a15_tune = 2, /* Max cond insns. */ 8, /* Memset max inline. */ 3, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_TRUE, @@ -2008,6 +2019,7 @@ const struct tune_params arm_cortex_a35_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, @@ -2030,6 +2042,7 @@ const struct tune_params arm_cortex_a53_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, @@ -2052,6 +2065,7 @@ const struct tune_params arm_cortex_a57_tune = 2, /* Max cond insns. */ 8, /* Memset max inline. */ 3, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_TRUE, @@ -2074,6 +2088,7 @@ const struct tune_params arm_exynosm1_tune = 2, /* Max cond insns. */ 8, /* Memset max inline. */ 3, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_TRUE, @@ -2096,6 +2111,7 @@ const struct tune_params arm_xgene1_tune = 2, /* Max cond insns. */ 32, /* Memset max inline. */ 4, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_TRUE, @@ -2121,6 +2137,7 @@ const struct tune_params arm_cortex_a5_tune = 1, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, @@ -2143,6 +2160,7 @@ const struct tune_params arm_cortex_a9_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_BENEFICIAL(4,32,32), tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, @@ -2165,6 +2183,7 @@ const struct tune_params arm_cortex_a12_tune = 2, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_TRUE, @@ -2187,6 +2206,7 @@ const struct tune_params arm_cortex_a73_tune = 2, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_TRUE, @@ -2216,6 +2236,7 @@ const struct tune_params arm_v7m_tune = 2, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ + 32, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, @@ -2240,6 +2261,7 @@ const struct tune_params arm_cortex_m7_tune = 1, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, @@ -2265,6 +2287,7 @@ const struct tune_params arm_v6m_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ + 32, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, @@ -2287,6 +2310,7 @@ const struct tune_params arm_fa726te_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 32, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE,
On 15/09/17 16:38, Charles Baylis wrote: > On 13 September 2017 at 10:02, Kyrill Tkachov > <kyrylo.tkachov@foss.arm.com> wrote: >> Hi Charles, >> >> On 12/09/17 09:34, charles.baylis@linaro.org wrote: >>> From: Charles Baylis <charles.baylis@linaro.org> >>> >>> Add bus widths. These use the approximation that v7 and later cores have >>> 64bit data bus width, and earlier cores have 32 bit bus width, with the >>> exception of v7m. >>> >> Given the way this field is used in patch 2 does it affect the addressing >> mode generation >> in the tests you added depending on the -mtune option given? >> If so, we'll get testsuite failures when people test with particular default >> CPU configurations. > No, because the auto_inc_dec phase compares the cost of two different > MEMs which differ only by addressing mode. The part of the calculation > which depends on the bus_width is the same both times, so it is > cancelled out. > >> Could you expand on the benefits we get from this extra bus_width >> information? >> I get that we increase the cost of memory accesses if the size of the mode >> we load is larger than the >> bus width, but it's not as if there is ever an alternative in this regard, >> such as loading less memory, >> so what pass can make different decisions thanks to this field? > As far as this patch series is concerned, it doesn't matter. It is > there to encapsulate the notion that a larger transfer results in > rtx_costs() returning a larger cost, but I don't know of any part of > the compiler which is sensitive to that difference. It's done this way > because Ramana and Richard wanted it done that way > (https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00652.html). From what I can tell Ramana and Richard preferred to encode this attribute as a tuning struct property rather than an inline conditional based on arm_arch7. I agree that if we want to use that information, it should be encoded this way. What I'm not convinced about is whether we do want this parameter in the first place. The cost tables already encode information about the costs of different sized loads/stores. In patch 2, for example, you add the cost for extra_cost->ldst.load which is nominally just the cost of a normal 32-bit ldr. But we also have costs for ldst.ldrd which is the 64-bit two-register load which should reflect any extra cost due to a narrower bus in it. We also have costs for ldst.loadf (for 32-bit VFP loads) and ldst.loadd (for 64-bit VFP D-register loads). So I think we should use those cost fields depending on the mode class and size instead of using ldst.load unconditionally and adding a new bus_size parameter. So I think the way forward is to drop this patch and modify patch 2/3 to use the extra_cost->ldst fields as described above. Sorry for the back-and-forth. I think this is the best approach because it uses the existing fields more naturally and doesn't add new parameters that partly duplicate the information encoded in the existing fields. Ramana, Richard: if you prefer the bus_width approach I won't block it, but could you clarify your preference? If we do end up adding the bus_width parameter then this patch and patch 2/3 look ok. Thanks, Kyrill P.S. I'm going on a 4-week holiday from today, so I won't be able to do any further review in that timeframe. As I said, if we go with the bus_size approach then these patches are ok. If we go with my suggestion, this would be dropped and patch 2 would be extended to select the appropriate extra_cost->ldst field depending on mode.
On 15 September 2017 at 18:01, Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> wrote: > From what I can tell Ramana and Richard preferred to encode this attribute > as > a tuning struct property rather than an inline conditional based on > arm_arch7. > I agree that if we want to use that information, it should be encoded this > way. > What I'm not convinced about is whether we do want this parameter in the > first place. > > The cost tables already encode information about the costs of different > sized loads/stores. > In patch 2, for example, you add the cost for extra_cost->ldst.load which is > nominally just > the cost of a normal 32-bit ldr. But we also have costs for ldst.ldrd which > is the 64-bit two-register load > which should reflect any extra cost due to a narrower bus in it. We also > have costs for ldst.loadf (for 32-bit > VFP loads) and ldst.loadd (for 64-bit VFP D-register loads). So I think we > should use those cost fields > depending on the mode class and size instead of using ldst.load > unconditionally and adding a new bus_size parameter. > > So I think the way forward is to drop this patch and modify patch 2/3 to use > the extra_cost->ldst fields as described above. > > Sorry for the back-and-forth. I think this is the best approach because it > uses the existing fields more naturally and > doesn't add new parameters that partly duplicate the information encoded in > the existing fields. > Ramana, Richard: if you prefer the bus_width approach I won't block it, but > could you clarify your preference? > If we do end up adding the bus_width parameter then this patch and patch 2/3 > look ok. > Thanks, > Kyrill > > P.S. I'm going on a 4-week holiday from today, so I won't be able to do any > further review in that timeframe. > As I said, if we go with the bus_size approach then these patches are ok. If > we go with my suggestion, this would > be dropped and patch 2 would be extended to select the appropriate > extra_cost->ldst field depending on mode. OK, I agree with dropping this patch. I have posted an updated patch 2 which does not require it.
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 4538078..47a85cc 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -278,6 +278,8 @@ struct tune_params int max_insns_inline_memset; /* Issue rate of the processor. */ unsigned int issue_rate; + /* Bus width (bits). */ + unsigned int bus_width; /* Explicit prefetch data. */ struct { diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index bca8a34..32001e5 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -1761,6 +1761,7 @@ const struct tune_params arm_slowmul_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ + 32, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, @@ -1783,6 +1784,7 @@ const struct tune_params arm_fastmul_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ + 32, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, @@ -1808,6 +1810,7 @@ const struct tune_params arm_strongarm_tune = 3, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ + 32, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, @@ -1830,6 +1833,7 @@ const struct tune_params arm_xscale_tune = 3, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ + 32, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, @@ -1852,6 +1856,7 @@ const struct tune_params arm_9e_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ + 32, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, @@ -1874,6 +1879,7 @@ const struct tune_params arm_marvell_pj4_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 32, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, @@ -1896,6 +1902,7 @@ const struct tune_params arm_v6t2_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ + 32, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, @@ -1920,6 +1927,7 @@ const struct tune_params arm_cortex_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, @@ -1942,6 +1950,7 @@ const struct tune_params arm_cortex_a8_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, @@ -1964,6 +1973,7 @@ const struct tune_params arm_cortex_a7_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, @@ -1986,6 +1996,7 @@ const struct tune_params arm_cortex_a15_tune = 2, /* Max cond insns. */ 8, /* Memset max inline. */ 3, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_TRUE, @@ -2008,6 +2019,7 @@ const struct tune_params arm_cortex_a35_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, @@ -2030,6 +2042,7 @@ const struct tune_params arm_cortex_a53_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, @@ -2052,6 +2065,7 @@ const struct tune_params arm_cortex_a57_tune = 2, /* Max cond insns. */ 8, /* Memset max inline. */ 3, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_TRUE, @@ -2074,6 +2088,7 @@ const struct tune_params arm_exynosm1_tune = 2, /* Max cond insns. */ 8, /* Memset max inline. */ 3, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_TRUE, @@ -2096,6 +2111,7 @@ const struct tune_params arm_xgene1_tune = 2, /* Max cond insns. */ 32, /* Memset max inline. */ 4, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_TRUE, @@ -2121,6 +2137,7 @@ const struct tune_params arm_cortex_a5_tune = 1, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, @@ -2143,6 +2160,7 @@ const struct tune_params arm_cortex_a9_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_BENEFICIAL(4,32,32), tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, @@ -2165,6 +2183,7 @@ const struct tune_params arm_cortex_a12_tune = 2, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_TRUE, @@ -2187,6 +2206,7 @@ const struct tune_params arm_cortex_a73_tune = 2, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_TRUE, @@ -2216,6 +2236,7 @@ const struct tune_params arm_v7m_tune = 2, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ + 32, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, @@ -2240,6 +2261,7 @@ const struct tune_params arm_cortex_m7_tune = 1, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 64, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, @@ -2265,6 +2287,7 @@ const struct tune_params arm_v6m_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ + 32, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, @@ -2287,6 +2310,7 @@ const struct tune_params arm_fa726te_tune = 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ + 32, /* Bus width. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE,
From: Charles Baylis <charles.baylis@linaro.org> Add bus widths. These use the approximation that v7 and later cores have 64bit data bus width, and earlier cores have 32 bit bus width, with the exception of v7m. <date> Charles Baylis <charles.baylis@linaro.org> * config/arm/arm-protos.h (struct tune_params): New field bus_width. * config/arm/arm.c (arm_slowmul_tune): Initialise bus_width field. (arm_fastmul_tune): Likewise. (arm_strongarm_tune): Likewise. (arm_xscale_tune): Likewise. (arm_9e_tune): Likewise. (arm_marvell_pj4_tune): Likewise. (arm_v6t2_tune): Likewise. (arm_cortex_tune): Likewise. (arm_cortex_a8_tune): Likewise. (arm_cortex_a7_tune): Likewise. (arm_cortex_a15_tune): Likewise. (arm_cortex_a35_tune): Likewise. (arm_cortex_a53_tune): Likewise. (arm_cortex_a57_tune): Likewise. (arm_exynosm1_tune): Likewise. (arm_xgene1_tune): Likewise. (arm_cortex_a5_tune): Likewise. (arm_cortex_a9_tune): Likewise. (arm_cortex_a12_tune): Likewise. (arm_cortex_a73_tune): Likewise. (arm_v7m_tune): Likewise. (arm_cortex_m7_tune): Likewise. (arm_v6m_tune): Likewise. (arm_fa726te_tune): Likewise. Change-Id: I613e876db93ffd6f8c1e72ba483be2efc0b56d66 --- gcc/config/arm/arm-protos.h | 2 ++ gcc/config/arm/arm.c | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+) -- 2.7.4