Message ID | 20190408212648.2407234-12-arnd@arndb.de |
---|---|
State | New |
Headers | show |
Series | [01/12] s390: remove -fno-strength-reduce flag | expand |
On Mon, 8 Apr 2019 23:26:25 +0200 Arnd Bergmann <arnd@arndb.de> wrote: > clang does not understand the contraint "0" in the CALL_ON_STACK() > macro: > > ../arch/s390/mm/maccess.c:117:10: error: invalid input constraint '0' in asm > return CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack, > ^ > ../arch/s390/include/asm/processor.h:292:20: note: expanded from macro 'CALL_ON_STACK' > [_fn] "X" (fn) CALL_FMT_##nr : CALL_CLOBBER_##nr); \ > ^ > <scratch space>:207:1: note: expanded from here > CALL_FMT_3 > ^ > ../arch/s390/include/asm/processor.h:267:20: note: expanded from macro 'CALL_FMT_3' > #define CALL_FMT_3 CALL_FMT_2, "d" (r4) > ^ > ../arch/s390/include/asm/processor.h:266:20: note: expanded from macro 'CALL_FMT_2' > #define CALL_FMT_2 CALL_FMT_1, "d" (r3) > ^ > ../arch/s390/include/asm/processor.h:265:32: note: expanded from macro 'CALL_FMT_1' > #define CALL_FMT_1 CALL_FMT_0, "0" (r2) > ^ > > I don't know what the correct fix here would be, changing it to "d" made > it build, since clang does understand this one. > > Signed-off-by: Arnd Bergmann <arnd@arndb.de> > --- > arch/s390/include/asm/processor.h | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h > index 700c650ffd4f..84c59c99668a 100644 > --- a/arch/s390/include/asm/processor.h > +++ b/arch/s390/include/asm/processor.h > @@ -262,7 +262,7 @@ static __no_kasan_or_inline unsigned short stap(void) > register unsigned long r4 asm("6") = (unsigned long)(arg5) > > #define CALL_FMT_0 > -#define CALL_FMT_1 CALL_FMT_0, "0" (r2) > +#define CALL_FMT_1 CALL_FMT_0, "d" (r2) > #define CALL_FMT_2 CALL_FMT_1, "d" (r3) > #define CALL_FMT_3 CALL_FMT_2, "d" (r4) > #define CALL_FMT_4 CALL_FMT_3, "d" (r5) This is (slightly) wrong. %r2 is used as the input register for the first argument and the result value for the call. With your patch you force the compiler to load the first argument in two registers. One solution would be to CALL_FMT1 as #define CALL_FMT1 CALL_FMT_0 It still is not optimal though as for CALL_FMT_0 the "+&d" (r2) indicates an input but CALL_ARGS_0 does not initialize r2. I am thinking about the following patch to cover all cases: -- From 91a4abbec91a9f26f84f7386f2c0f96de669b0eb Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky <schwidefsky@de.ibm.com> Date: Wed, 10 Apr 2019 15:48:43 +0200 Subject: [PATCH] s390: fine-tune stack switch helper The CALL_ON_STACK helper currently does not work with clang and for calls without arguments it does not initialize r2 although the contraint is "+&d". Rework the CALL_FMT_x and the CALL_ON_STACK macros to work with clang and produce optimal code in all cases. Reported-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- arch/s390/include/asm/processor.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 81038ab357ce..0ee022247580 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -261,12 +261,12 @@ static __no_kasan_or_inline unsigned short stap(void) CALL_ARGS_4(arg1, arg2, arg3, arg4); \ register unsigned long r4 asm("6") = (unsigned long)(arg5) -#define CALL_FMT_0 -#define CALL_FMT_1 CALL_FMT_0, "0" (r2) -#define CALL_FMT_2 CALL_FMT_1, "d" (r3) -#define CALL_FMT_3 CALL_FMT_2, "d" (r4) -#define CALL_FMT_4 CALL_FMT_3, "d" (r5) -#define CALL_FMT_5 CALL_FMT_4, "d" (r6) +#define CALL_FMT_0 "=&d" (r2) : +#define CALL_FMT_1 "+&d" (r2) : +#define CALL_FMT_2 CALL_FMT_1 "d" (r3), +#define CALL_FMT_3 CALL_FMT_2 "d" (r4), +#define CALL_FMT_4 CALL_FMT_3 "d" (r5), +#define CALL_FMT_5 CALL_FMT_4 "d" (r6), #define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory" #define CALL_CLOBBER_4 CALL_CLOBBER_5 @@ -286,10 +286,10 @@ static __no_kasan_or_inline unsigned short stap(void) " stg %[_prev],%[_bc](15)\n" \ " brasl 14,%[_fn]\n" \ " la 15,0(%[_prev])\n" \ - : "+&d" (r2), [_prev] "=&a" (prev) \ - : [_stack] "a" (stack), \ + : [_prev] "=&a" (prev), CALL_FMT_##nr \ + [_stack] "a" (stack), \ [_bc] "i" (offsetof(struct stack_frame, back_chain)), \ - [_fn] "X" (fn) CALL_FMT_##nr : CALL_CLOBBER_##nr); \ + [_fn] "X" (fn) : CALL_CLOBBER_##nr); \ r2; \ }) -- 2.16.4 -- blue skies, Martin. "Reality continues to ruin my life." - Calvin.
On Wed, Apr 10, 2019 at 6:55 AM Martin Schwidefsky <schwidefsky@de.ibm.com> wrote: > > On Mon, 8 Apr 2019 23:26:25 +0200 > Arnd Bergmann <arnd@arndb.de> wrote: > > > clang does not understand the contraint "0" in the CALL_ON_STACK() > > macro: > > > > ../arch/s390/mm/maccess.c:117:10: error: invalid input constraint '0' in asm > > return CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack, > > ^ > > ../arch/s390/include/asm/processor.h:292:20: note: expanded from macro 'CALL_ON_STACK' > > [_fn] "X" (fn) CALL_FMT_##nr : CALL_CLOBBER_##nr); \ > > ^ > > <scratch space>:207:1: note: expanded from here > > CALL_FMT_3 > > ^ > > ../arch/s390/include/asm/processor.h:267:20: note: expanded from macro 'CALL_FMT_3' > > #define CALL_FMT_3 CALL_FMT_2, "d" (r4) > > ^ > > ../arch/s390/include/asm/processor.h:266:20: note: expanded from macro 'CALL_FMT_2' > > #define CALL_FMT_2 CALL_FMT_1, "d" (r3) > > ^ > > ../arch/s390/include/asm/processor.h:265:32: note: expanded from macro 'CALL_FMT_1' > > #define CALL_FMT_1 CALL_FMT_0, "0" (r2) > > ^ > > > > I don't know what the correct fix here would be, changing it to "d" made > > it build, since clang does understand this one. > > > > Signed-off-by: Arnd Bergmann <arnd@arndb.de> > > --- > > arch/s390/include/asm/processor.h | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h > > index 700c650ffd4f..84c59c99668a 100644 > > --- a/arch/s390/include/asm/processor.h > > +++ b/arch/s390/include/asm/processor.h > > @@ -262,7 +262,7 @@ static __no_kasan_or_inline unsigned short stap(void) > > register unsigned long r4 asm("6") = (unsigned long)(arg5) > > > > #define CALL_FMT_0 > > -#define CALL_FMT_1 CALL_FMT_0, "0" (r2) > > +#define CALL_FMT_1 CALL_FMT_0, "d" (r2) > > #define CALL_FMT_2 CALL_FMT_1, "d" (r3) > > #define CALL_FMT_3 CALL_FMT_2, "d" (r4) > > #define CALL_FMT_4 CALL_FMT_3, "d" (r5) > > This is (slightly) wrong. %r2 is used as the input register for the first argument > and the result value for the call. With your patch you force the compiler to load > the first argument in two registers. One solution would be to CALL_FMT1 as > > #define CALL_FMT1 CALL_FMT_0 > > It still is not optimal though as for CALL_FMT_0 the "+&d" (r2) indicates an > input but CALL_ARGS_0 does not initialize r2. > > I am thinking about the following patch to cover all cases: > -- > From 91a4abbec91a9f26f84f7386f2c0f96de669b0eb Mon Sep 17 00:00:00 2001 > From: Martin Schwidefsky <schwidefsky@de.ibm.com> > Date: Wed, 10 Apr 2019 15:48:43 +0200 > Subject: [PATCH] s390: fine-tune stack switch helper Thanks for the patch. Just some minor typos in the commit. > > The CALL_ON_STACK helper currently does not work with clang and for > calls without arguments it does not initialize r2 although the contraint - calls without arguments it ... + calls without arguments. It ... - although the contraint + although the constraint `:set spell` in vim (I wonder if there's a way to set that when writing commit messages automatically) > is "+&d". Rework the CALL_FMT_x and the CALL_ON_STACK macros to work > with clang and produce optimal code in all cases. > > Reported-by: Arnd Bergmann <arnd@arndb.de> > Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> > --- > arch/s390/include/asm/processor.h | 18 +++++++++--------- > 1 file changed, 9 insertions(+), 9 deletions(-) > > diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h > index 81038ab357ce..0ee022247580 100644 > --- a/arch/s390/include/asm/processor.h > +++ b/arch/s390/include/asm/processor.h > @@ -261,12 +261,12 @@ static __no_kasan_or_inline unsigned short stap(void) > CALL_ARGS_4(arg1, arg2, arg3, arg4); \ > register unsigned long r4 asm("6") = (unsigned long)(arg5) > > -#define CALL_FMT_0 > -#define CALL_FMT_1 CALL_FMT_0, "0" (r2) > -#define CALL_FMT_2 CALL_FMT_1, "d" (r3) > -#define CALL_FMT_3 CALL_FMT_2, "d" (r4) > -#define CALL_FMT_4 CALL_FMT_3, "d" (r5) > -#define CALL_FMT_5 CALL_FMT_4, "d" (r6) > +#define CALL_FMT_0 "=&d" (r2) : > +#define CALL_FMT_1 "+&d" (r2) : > +#define CALL_FMT_2 CALL_FMT_1 "d" (r3), > +#define CALL_FMT_3 CALL_FMT_2 "d" (r4), > +#define CALL_FMT_4 CALL_FMT_3 "d" (r5), > +#define CALL_FMT_5 CALL_FMT_4 "d" (r6), > > #define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory" > #define CALL_CLOBBER_4 CALL_CLOBBER_5 > @@ -286,10 +286,10 @@ static __no_kasan_or_inline unsigned short stap(void) > " stg %[_prev],%[_bc](15)\n" \ > " brasl 14,%[_fn]\n" \ > " la 15,0(%[_prev])\n" \ > - : "+&d" (r2), [_prev] "=&a" (prev) \ > - : [_stack] "a" (stack), \ > + : [_prev] "=&a" (prev), CALL_FMT_##nr \ > + [_stack] "a" (stack), \ > [_bc] "i" (offsetof(struct stack_frame, back_chain)), \ > - [_fn] "X" (fn) CALL_FMT_##nr : CALL_CLOBBER_##nr); \ > + [_fn] "X" (fn) : CALL_CLOBBER_##nr); \ > r2; \ > }) > > -- > 2.16.4 > -- > blue skies, > Martin. > > "Reality continues to ruin my life." - Calvin. > > -- > You received this message because you are subscribed to the Google Groups "Clang Built Linux" group. > To unsubscribe from this group and stop receiving emails from it, send an email to clang-built-linux+unsubscribe@googlegroups.com. > To post to this group, send email to clang-built-linux@googlegroups.com. > To view this discussion on the web visit https://groups.google.com/d/msgid/clang-built-linux/20190410155513.1609f1a1%40mschwideX1. > For more options, visit https://groups.google.com/d/optout. -- Thanks, ~Nick Desaulniers
On Wed, Apr 10, 2019 at 3:55 PM Martin Schwidefsky <schwidefsky@de.ibm.com> wrote: > > On Mon, 8 Apr 2019 23:26:25 +0200 > Arnd Bergmann <arnd@arndb.de> wrote: > > > diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h > > index 700c650ffd4f..84c59c99668a 100644 > > --- a/arch/s390/include/asm/processor.h > > +++ b/arch/s390/include/asm/processor.h > > @@ -262,7 +262,7 @@ static __no_kasan_or_inline unsigned short stap(void) > > register unsigned long r4 asm("6") = (unsigned long)(arg5) > > > > #define CALL_FMT_0 > > -#define CALL_FMT_1 CALL_FMT_0, "0" (r2) > > +#define CALL_FMT_1 CALL_FMT_0, "d" (r2) > > #define CALL_FMT_2 CALL_FMT_1, "d" (r3) > > #define CALL_FMT_3 CALL_FMT_2, "d" (r4) > > #define CALL_FMT_4 CALL_FMT_3, "d" (r5) > > This is (slightly) wrong. %r2 is used as the input register for the first argument > and the result value for the call. With your patch you force the compiler to load > the first argument in two registers. One solution would be to CALL_FMT1 as > > #define CALL_FMT1 CALL_FMT_0 > > It still is not optimal though as for CALL_FMT_0 the "+&d" (r2) indicates an > input but CALL_ARGS_0 does not initialize r2. Ok, thanks for taking a closer look! > I am thinking about the following patch to cover all cases: > -- > From 91a4abbec91a9f26f84f7386f2c0f96de669b0eb Mon Sep 17 00:00:00 2001 > From: Martin Schwidefsky <schwidefsky@de.ibm.com> > Date: Wed, 10 Apr 2019 15:48:43 +0200 > Subject: [PATCH] s390: fine-tune stack switch helper > > The CALL_ON_STACK helper currently does not work with clang and for > calls without arguments it does not initialize r2 although the contraint > is "+&d". Rework the CALL_FMT_x and the CALL_ON_STACK macros to work > with clang and produce optimal code in all cases. > > Reported-by: Arnd Bergmann <arnd@arndb.de> > Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> I did another build test to confirm that your patch works fine with clang as well, looks good to me. Arnd
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 700c650ffd4f..84c59c99668a 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -262,7 +262,7 @@ static __no_kasan_or_inline unsigned short stap(void) register unsigned long r4 asm("6") = (unsigned long)(arg5) #define CALL_FMT_0 -#define CALL_FMT_1 CALL_FMT_0, "0" (r2) +#define CALL_FMT_1 CALL_FMT_0, "d" (r2) #define CALL_FMT_2 CALL_FMT_1, "d" (r3) #define CALL_FMT_3 CALL_FMT_2, "d" (r4) #define CALL_FMT_4 CALL_FMT_3, "d" (r5)
clang does not understand the contraint "0" in the CALL_ON_STACK() macro: ../arch/s390/mm/maccess.c:117:10: error: invalid input constraint '0' in asm return CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack, ^ ../arch/s390/include/asm/processor.h:292:20: note: expanded from macro 'CALL_ON_STACK' [_fn] "X" (fn) CALL_FMT_##nr : CALL_CLOBBER_##nr); \ ^ <scratch space>:207:1: note: expanded from here CALL_FMT_3 ^ ../arch/s390/include/asm/processor.h:267:20: note: expanded from macro 'CALL_FMT_3' #define CALL_FMT_3 CALL_FMT_2, "d" (r4) ^ ../arch/s390/include/asm/processor.h:266:20: note: expanded from macro 'CALL_FMT_2' #define CALL_FMT_2 CALL_FMT_1, "d" (r3) ^ ../arch/s390/include/asm/processor.h:265:32: note: expanded from macro 'CALL_FMT_1' #define CALL_FMT_1 CALL_FMT_0, "0" (r2) ^ I don't know what the correct fix here would be, changing it to "d" made it build, since clang does understand this one. Signed-off-by: Arnd Bergmann <arnd@arndb.de> --- arch/s390/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- 2.20.0