Message ID | 1529949285-11013-4-git-send-email-will.deacon@arm.com |
---|---|
State | Superseded |
Headers | show |
Series | Support rseq on arm64 | expand |
----- On Jun 25, 2018, at 1:54 PM, Will Deacon will.deacon@arm.com wrote: [...] > +#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \ > + post_commit_offset, abort_ip) \ > + " .pushsection __rseq_table, \"aw\"\n" \ > + " .balign 32\n" \ > + __rseq_str(label) ":\n" \ > + " .long " __rseq_str(version) ", " __rseq_str(flags) "\n" \ > + " .quad " __rseq_str(start_ip) ", " \ > + __rseq_str(post_commit_offset) ", " \ > + __rseq_str(abort_ip) "\n" \ > + " .popsection\n" > + > +#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \ > + __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \ > + (post_commit_ip - start_ip), abort_ip) > + > +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ > + RSEQ_INJECT_ASM(1) \ > + " adrp " RSEQ_ASM_TMP_REG ", " __rseq_str(cs_label) "\n" \ > + " add " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \ > + ", :lo12:" __rseq_str(cs_label) "\n" \ > + " str " RSEQ_ASM_TMP_REG ", %[" __rseq_str(rseq_cs) "]\n" \ > + __rseq_str(label) ":\n" > + > +#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \ > + " .pushsection __rseq_failure, \"ax\"\n" \ > + " .long " __rseq_str(RSEQ_SIG) "\n" \ > + __rseq_str(label) ":\n" \ > + " b %l[" __rseq_str(abort_label) "]\n" \ > + " .popsection\n" Thanks Will for porting rseq to arm64 ! I notice you are using the instructions adrp add str to implement RSEQ_ASM_STORE_RSEQ_CS(). Did you compare performance-wise with an approach using a literal pool near the instruction pointer like I did on arm32 ? With that approach, this ends up being simply adr str which provides significantly better performance on my test platform over loading a pointer targeting a separate data section. Thanks, Mathieu -- Mathieu Desnoyers EfficiOS Inc. http://www.efficios.com
Hi Mathieu, On Mon, Jun 25, 2018 at 02:10:10PM -0400, Mathieu Desnoyers wrote: > ----- On Jun 25, 2018, at 1:54 PM, Will Deacon will.deacon@arm.com wrote: > > +#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \ > > + post_commit_offset, abort_ip) \ > > + " .pushsection __rseq_table, \"aw\"\n" \ > > + " .balign 32\n" \ > > + __rseq_str(label) ":\n" \ > > + " .long " __rseq_str(version) ", " __rseq_str(flags) "\n" \ > > + " .quad " __rseq_str(start_ip) ", " \ > > + __rseq_str(post_commit_offset) ", " \ > > + __rseq_str(abort_ip) "\n" \ > > + " .popsection\n" > > + > > +#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \ > > + __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \ > > + (post_commit_ip - start_ip), abort_ip) > > + > > +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ > > + RSEQ_INJECT_ASM(1) \ > > + " adrp " RSEQ_ASM_TMP_REG ", " __rseq_str(cs_label) "\n" \ > > + " add " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \ > > + ", :lo12:" __rseq_str(cs_label) "\n" \ > > + " str " RSEQ_ASM_TMP_REG ", %[" __rseq_str(rseq_cs) "]\n" \ > > + __rseq_str(label) ":\n" > > + > > +#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \ > > + " .pushsection __rseq_failure, \"ax\"\n" \ > > + " .long " __rseq_str(RSEQ_SIG) "\n" \ > > + __rseq_str(label) ":\n" \ > > + " b %l[" __rseq_str(abort_label) "]\n" \ > > + " .popsection\n" > > Thanks Will for porting rseq to arm64 ! That's ok, it was good fun :) I'm going to chat with our compiler guys to see if there's any room for improving the flexibility in the critical section, since having a temporary in the clobber list is pretty grotty. > I notice you are using the instructions > > adrp > add > str > > to implement RSEQ_ASM_STORE_RSEQ_CS(). Did you compare > performance-wise with an approach using a literal pool > near the instruction pointer like I did on arm32 ? I didn't, no. Do you have a benchmark to hand so I can give this a go? The two reasons I didn't go down this route are: 1. It introduces data which is mapped as executable. I don't have a specific security concern here, but the way things have gone so far this year, I've realised that I'm not bright enough to anticipate these things. 2. It introduces a branch over the table on the fast path, which is likely to have a relatively higher branch misprediction cost on more advanced CPUs. I also find it grotty that we emit two tables so that debuggers can cope, but that's just a cosmetic nit. > With that approach, this ends up being simply > > adr > str > > which provides significantly better performance on my test > platform over loading a pointer targeting a separate data > section. My understanding is that your test platform is based on Cortex-A7, so I'd be wary about concluding too much about general performance from that CPU since its a pretty straightforward in-order design. Will
----- On Jun 26, 2018, at 11:14 AM, Will Deacon will.deacon@arm.com wrote: > Hi Mathieu, > > On Mon, Jun 25, 2018 at 02:10:10PM -0400, Mathieu Desnoyers wrote: >> ----- On Jun 25, 2018, at 1:54 PM, Will Deacon will.deacon@arm.com wrote: >> > +#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \ >> > + post_commit_offset, abort_ip) \ >> > + " .pushsection __rseq_table, \"aw\"\n" \ >> > + " .balign 32\n" \ >> > + __rseq_str(label) ":\n" \ >> > + " .long " __rseq_str(version) ", " __rseq_str(flags) "\n" \ >> > + " .quad " __rseq_str(start_ip) ", " \ >> > + __rseq_str(post_commit_offset) ", " \ >> > + __rseq_str(abort_ip) "\n" \ >> > + " .popsection\n" >> > + >> > +#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \ >> > + __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \ >> > + (post_commit_ip - start_ip), abort_ip) >> > + >> > +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ >> > + RSEQ_INJECT_ASM(1) \ >> > + " adrp " RSEQ_ASM_TMP_REG ", " __rseq_str(cs_label) "\n" \ >> > + " add " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \ >> > + ", :lo12:" __rseq_str(cs_label) "\n" \ >> > + " str " RSEQ_ASM_TMP_REG ", %[" __rseq_str(rseq_cs) "]\n" \ >> > + __rseq_str(label) ":\n" >> > + >> > +#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \ >> > + " .pushsection __rseq_failure, \"ax\"\n" \ >> > + " .long " __rseq_str(RSEQ_SIG) "\n" \ >> > + __rseq_str(label) ":\n" \ >> > + " b %l[" __rseq_str(abort_label) "]\n" \ >> > + " .popsection\n" >> >> Thanks Will for porting rseq to arm64 ! > > That's ok, it was good fun :) > > I'm going to chat with our compiler guys to see if there's any room for > improving the flexibility in the critical section, since having a temporary > in the clobber list is pretty grotty. Let me know how it goes! > >> I notice you are using the instructions >> >> adrp >> add >> str >> >> to implement RSEQ_ASM_STORE_RSEQ_CS(). Did you compare >> performance-wise with an approach using a literal pool >> near the instruction pointer like I did on arm32 ? > > I didn't, no. Do you have a benchmark to hand so I can give this a go? see tools/testing/selftests/rseq/param_test_benchmark --help It's a stripped-down version of param_test, without all the code for delay loops and testing checks. Example use for counter increment with 4 threads, doing 5G counter increments per thread: time ./param_test_benchmark -T i -t 4 -r 5000000000 > The two reasons I didn't go down this route are: > > 1. It introduces data which is mapped as executable. I don't have a > specific security concern here, but the way things have gone so far > this year, I've realised that I'm not bright enough to anticipate > these things. So far I've been able to dig up that "pure code" or "execute only" code is explicitly requested by compiler flags (-mno-pc-relative-literal-loads on aarch64, -mpure-code on arm32 when the moon cycle is aligned). It's a shame that it's not more standard, or that there does not appear to be any preprocessor define available to test this within code. I'm all for allowing end users to chose whether they want to use literal pools in code or not, but I think it should be configurable at compile time, and we should make it similar on arm32 and arm64. Given that compilers don't emit preprocessor define, perhaps we need to introduce our own RSEQ_NO_PC_RELATIVE_LITERAL_LOADS (or perhaps a shorter name ?) define to select behavior at compile-time. > 2. It introduces a branch over the table on the fast path, which is likely > to have a relatively higher branch misprediction cost on more advanced > CPUs. Hrm, wait a second... I see that your comparison of the cpu number requires: +#define RSEQ_ASM_OP_CMPEQ32(var, expect, label) \ + " ldr " RSEQ_ASM_TMP_REG32 ", %[" __rseq_str(var) "]\n" \ + " sub " RSEQ_ASM_TMP_REG32 ", " RSEQ_ASM_TMP_REG32 \ + ", %w[" __rseq_str(expect) "]\n" \ + " cbnz " RSEQ_ASM_TMP_REG32 ", " __rseq_str(label) "\n" because the abort code is emitted in a separate section: +#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \ + " .pushsection __rseq_failure, \"ax\"\n" \ + " .long " __rseq_str(RSEQ_SIG) "\n" \ + __rseq_str(label) ":\n" \ + " b %l[" __rseq_str(abort_label) "]\n" \ + " .popsection\n" Like I did on x86. But the cbnz instruction requires the branch target to be within +/- 1MB from the instruction (http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/ch06s04.html), which clearly is not guaranteed when you place the abort label in a separate section. Also, using cbnz to jump to a label that is outside of the assembly (e.g. %l[error1]) does not ensure that the branch target is within 1MB of the code. I've had assembler issues on arm32 due to those kind of constraints when integrating rseq headers into larger code-bases. So, one way to fix the fast-path so cpu number comparison can branch to a close location is to put the abort code near the fast-path, and you end up having to unconditionally jump over the abort code from the fast-path on success. So once you bite the bullet and jump over abort, you just have to ensure you place the struct rseq_cs data near the abort code, so you end up jumping over both at the same time. > > I also find it grotty that we emit two tables so that debuggers can cope, > but that's just a cosmetic nit. > >> With that approach, this ends up being simply >> >> adr >> str >> >> which provides significantly better performance on my test >> platform over loading a pointer targeting a separate data >> section. > > My understanding is that your test platform is based on Cortex-A7, so I'd > be wary about concluding too much about general performance from that CPU > since its a pretty straightforward in-order design. I did benchmarks on our Wandboard (Cortex A9) as well as the Cubietruck. I could only use perf to do detailed breakdown of the fast-path overhead on the Cubie because I could not get it to work on our Wandboard, but overall speed was better on Wandboard as well (as I recall) with the literal pool. Thanks, Mathieu -- Mathieu Desnoyers EfficiOS Inc. http://www.efficios.com
Hi Mathieu, On Tue, Jun 26, 2018 at 12:11:52PM -0400, Mathieu Desnoyers wrote: > ----- On Jun 26, 2018, at 11:14 AM, Will Deacon will.deacon@arm.com wrote: > > On Mon, Jun 25, 2018 at 02:10:10PM -0400, Mathieu Desnoyers wrote: > >> I notice you are using the instructions > >> > >> adrp > >> add > >> str > >> > >> to implement RSEQ_ASM_STORE_RSEQ_CS(). Did you compare > >> performance-wise with an approach using a literal pool > >> near the instruction pointer like I did on arm32 ? > > > > I didn't, no. Do you have a benchmark to hand so I can give this a go? > > see tools/testing/selftests/rseq/param_test_benchmark --help > > It's a stripped-down version of param_test, without all the code for > delay loops and testing checks. > > Example use for counter increment with 4 threads, doing 5G counter > increments per thread: > > time ./param_test_benchmark -T i -t 4 -r 5000000000 Thanks. I ran that on a few arm64 systems I have access to, with three configurations of the selftest: 1. As I posted 2. With the abort signature and branch in-lined, so as to avoid the CBNZ address limitations in large codebases 3. With both the abort handler and the table inlined (i.e. the same thing as 32-bit). There isn't a reliably measurable difference between (1) and (2), but I take between 12% and 27% hit between (2) and (3). So I'll post a v2 based on (2). Will
----- On Jun 28, 2018, at 12:47 PM, Will Deacon will.deacon@arm.com wrote: > Hi Mathieu, > > On Tue, Jun 26, 2018 at 12:11:52PM -0400, Mathieu Desnoyers wrote: >> ----- On Jun 26, 2018, at 11:14 AM, Will Deacon will.deacon@arm.com wrote: >> > On Mon, Jun 25, 2018 at 02:10:10PM -0400, Mathieu Desnoyers wrote: >> >> I notice you are using the instructions >> >> >> >> adrp >> >> add >> >> str >> >> >> >> to implement RSEQ_ASM_STORE_RSEQ_CS(). Did you compare >> >> performance-wise with an approach using a literal pool >> >> near the instruction pointer like I did on arm32 ? >> > >> > I didn't, no. Do you have a benchmark to hand so I can give this a go? >> >> see tools/testing/selftests/rseq/param_test_benchmark --help >> >> It's a stripped-down version of param_test, without all the code for >> delay loops and testing checks. >> >> Example use for counter increment with 4 threads, doing 5G counter >> increments per thread: >> >> time ./param_test_benchmark -T i -t 4 -r 5000000000 > > Thanks. I ran that on a few arm64 systems I have access to, with three > configurations of the selftest: > > 1. As I posted > 2. With the abort signature and branch in-lined, so as to avoid the CBNZ > address limitations in large codebases > 3. With both the abort handler and the table inlined (i.e. the same thing > as 32-bit). > > There isn't a reliably measurable difference between (1) and (2), but I take > between 12% and 27% hit between (2) and (3). Those results puzzle me. Do you have the actual code snippets of each implementation nearby ? Thanks, Mathieu > > So I'll post a v2 based on (2). > > Will -- Mathieu Desnoyers EfficiOS Inc. http://www.efficios.com
On Thu, Jun 28, 2018 at 04:50:40PM -0400, Mathieu Desnoyers wrote: > ----- On Jun 28, 2018, at 12:47 PM, Will Deacon will.deacon@arm.com wrote: > > On Tue, Jun 26, 2018 at 12:11:52PM -0400, Mathieu Desnoyers wrote: > >> ----- On Jun 26, 2018, at 11:14 AM, Will Deacon will.deacon@arm.com wrote: > >> > On Mon, Jun 25, 2018 at 02:10:10PM -0400, Mathieu Desnoyers wrote: > >> >> I notice you are using the instructions > >> >> > >> >> adrp > >> >> add > >> >> str > >> >> > >> >> to implement RSEQ_ASM_STORE_RSEQ_CS(). Did you compare > >> >> performance-wise with an approach using a literal pool > >> >> near the instruction pointer like I did on arm32 ? > >> > > >> > I didn't, no. Do you have a benchmark to hand so I can give this a go? > >> > >> see tools/testing/selftests/rseq/param_test_benchmark --help > >> > >> It's a stripped-down version of param_test, without all the code for > >> delay loops and testing checks. > >> > >> Example use for counter increment with 4 threads, doing 5G counter > >> increments per thread: > >> > >> time ./param_test_benchmark -T i -t 4 -r 5000000000 > > > > Thanks. I ran that on a few arm64 systems I have access to, with three > > configurations of the selftest: > > > > 1. As I posted > > 2. With the abort signature and branch in-lined, so as to avoid the CBNZ > > address limitations in large codebases > > 3. With both the abort handler and the table inlined (i.e. the same thing > > as 32-bit). > > > > There isn't a reliably measurable difference between (1) and (2), but I take > > between 12% and 27% hit between (2) and (3). > > Those results puzzle me. Do you have the actual code snippets of each > implementation nearby ? Sure, I've included the diffs for (2) and (3) below. They both apply on top of my branch at: git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git rseq Will --->8 diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h index 599788f74137..954f34671ca6 100644 --- a/tools/testing/selftests/rseq/rseq-arm64.h +++ b/tools/testing/selftests/rseq/rseq-arm64.h @@ -104,11 +104,11 @@ do { \ __rseq_str(label) ":\n" #define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \ - " .pushsection __rseq_failure, \"ax\"\n" \ - " .long " __rseq_str(RSEQ_SIG) "\n" \ + " b 222f\n" \ + " .inst " __rseq_str(RSEQ_SIG) "\n" \ __rseq_str(label) ":\n" \ " b %l[" __rseq_str(abort_label) "]\n" \ - " .popsection\n" + "222:\n" #define RSEQ_ASM_OP_STORE(value, var) \ " str %[" __rseq_str(value) "], %[" __rseq_str(var) "]\n" --->8 diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h index 599788f74137..2554aa17acf3 100644 --- a/tools/testing/selftests/rseq/rseq-arm64.h +++ b/tools/testing/selftests/rseq/rseq-arm64.h @@ -80,35 +80,37 @@ do { \ #define RSEQ_ASM_TMP_REG "x15" #define RSEQ_ASM_TMP_REG_2 "x14" -#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \ +#define __RSEQ_ASM_DEFINE_TABLE(version, flags, start_ip, \ post_commit_offset, abort_ip) \ - " .pushsection __rseq_table, \"aw\"\n" \ - " .balign 32\n" \ - __rseq_str(label) ":\n" \ " .long " __rseq_str(version) ", " __rseq_str(flags) "\n" \ " .quad " __rseq_str(start_ip) ", " \ __rseq_str(post_commit_offset) ", " \ - __rseq_str(abort_ip) "\n" \ - " .popsection\n" + __rseq_str(abort_ip) "\n" -#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \ - __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \ - (post_commit_ip - start_ip), abort_ip) +#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip) \ + " .pushsection __rseq_table, \"aw\"\n" \ + " .balign 32\n" \ + __RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \ + (post_commit_ip - start_ip), abort_ip) \ + " .popsection\n" -#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ +#define RSEQ_ASM_STORE_RSEQ_CS(label, table_label, rseq_cs) \ RSEQ_INJECT_ASM(1) \ - " adrp " RSEQ_ASM_TMP_REG ", " __rseq_str(cs_label) "\n" \ - " add " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \ - ", :lo12:" __rseq_str(cs_label) "\n" \ + " adr " RSEQ_ASM_TMP_REG ", " __rseq_str(table_label) "\n" \ " str " RSEQ_ASM_TMP_REG ", %[" __rseq_str(rseq_cs) "]\n" \ __rseq_str(label) ":\n" -#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \ - " .pushsection __rseq_failure, \"ax\"\n" \ - " .long " __rseq_str(RSEQ_SIG) "\n" \ +#define RSEQ_ASM_DEFINE_ABORT(table_label, start_ip, post_commit_ip, label, \ + abort_label) \ + " b 222f\n" \ + " .balign 32\n" \ + __rseq_str(table_label) ":\n" \ + __RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \ + (post_commit_ip - start_ip), label ## f) \ + " .inst " __rseq_str(RSEQ_SIG) "\n" \ __rseq_str(label) ":\n" \ " b %l[" __rseq_str(abort_label) "]\n" \ - " .popsection\n" + "222:\n" #define RSEQ_ASM_OP_STORE(value, var) \ " str %[" __rseq_str(value) "], %[" __rseq_str(var) "]\n" @@ -181,8 +183,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_INJECT_C(9) __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) RSEQ_INJECT_ASM(3) RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) @@ -191,9 +193,9 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) #endif - RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) RSEQ_INJECT_ASM(5) - RSEQ_ASM_DEFINE_ABORT(4, abort) + RSEQ_ASM_DEFINE_ABORT(3, 1b, 2b, 4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "Qo" (__rseq_abi.cpu_id), @@ -230,8 +232,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_INJECT_C(9) __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) RSEQ_INJECT_ASM(3) RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail]) @@ -243,9 +245,9 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_OP_R_LOAD(v) RSEQ_ASM_OP_R_STORE(load) RSEQ_ASM_OP_R_LOAD_OFF(voffp) - RSEQ_ASM_OP_R_FINAL_STORE(v, 3) + RSEQ_ASM_OP_R_FINAL_STORE(v, 2) RSEQ_INJECT_ASM(5) - RSEQ_ASM_DEFINE_ABORT(4, abort) + RSEQ_ASM_DEFINE_ABORT(3, 1b, 2b, 4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "Qo" (__rseq_abi.cpu_id), @@ -281,8 +283,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_INJECT_C(9) __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) RSEQ_INJECT_ASM(3) #ifdef RSEQ_COMPARE_TWICE @@ -290,9 +292,9 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) #endif RSEQ_ASM_OP_R_LOAD(v) RSEQ_ASM_OP_R_ADD(count) - RSEQ_ASM_OP_R_FINAL_STORE(v, 3) + RSEQ_ASM_OP_R_FINAL_STORE(v, 2) RSEQ_INJECT_ASM(4) - RSEQ_ASM_DEFINE_ABORT(4, abort) + RSEQ_ASM_DEFINE_ABORT(3, 1b, 2b, 4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "Qo" (__rseq_abi.cpu_id), @@ -324,8 +326,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) RSEQ_INJECT_ASM(3) RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) @@ -336,9 +338,9 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, #endif RSEQ_ASM_OP_STORE(newv2, v2) RSEQ_INJECT_ASM(5) - RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, abort) + RSEQ_ASM_DEFINE_ABORT(3, 1b, 2b, 4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "Qo" (__rseq_abi.cpu_id), @@ -378,8 +380,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) RSEQ_INJECT_ASM(3) RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) @@ -390,9 +392,9 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, #endif RSEQ_ASM_OP_STORE(newv2, v2) RSEQ_INJECT_ASM(5) - RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) + RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 2) RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, abort) + RSEQ_ASM_DEFINE_ABORT(3, 1b, 2b, 4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "Qo" (__rseq_abi.cpu_id), @@ -432,8 +434,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) RSEQ_INJECT_ASM(3) RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) @@ -445,9 +447,9 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3]) #endif - RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, abort) + RSEQ_ASM_DEFINE_ABORT(3, 1b, 2b, 4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "Qo" (__rseq_abi.cpu_id), @@ -489,8 +491,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) RSEQ_INJECT_ASM(3) RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) @@ -501,9 +503,9 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, #endif RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) RSEQ_INJECT_ASM(5) - RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, abort) + RSEQ_ASM_DEFINE_ABORT(3, 1b, 2b, 4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "Qo" (__rseq_abi.cpu_id), @@ -544,8 +546,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, RSEQ_INJECT_C(9) __asm__ __volatile__ goto ( - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) RSEQ_INJECT_ASM(3) RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) @@ -556,9 +558,9 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, #endif RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) RSEQ_INJECT_ASM(5) - RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) + RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 2) RSEQ_INJECT_ASM(6) - RSEQ_ASM_DEFINE_ABORT(4, abort) + RSEQ_ASM_DEFINE_ABORT(3, 1b, 2b, 4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
----- On Jul 2, 2018, at 12:49 PM, Will Deacon will.deacon@arm.com wrote: > On Thu, Jun 28, 2018 at 04:50:40PM -0400, Mathieu Desnoyers wrote: >> ----- On Jun 28, 2018, at 12:47 PM, Will Deacon will.deacon@arm.com wrote: >> > On Tue, Jun 26, 2018 at 12:11:52PM -0400, Mathieu Desnoyers wrote: >> >> ----- On Jun 26, 2018, at 11:14 AM, Will Deacon will.deacon@arm.com wrote: >> >> > On Mon, Jun 25, 2018 at 02:10:10PM -0400, Mathieu Desnoyers wrote: >> >> >> I notice you are using the instructions >> >> >> >> >> >> adrp >> >> >> add >> >> >> str >> >> >> >> >> >> to implement RSEQ_ASM_STORE_RSEQ_CS(). Did you compare >> >> >> performance-wise with an approach using a literal pool >> >> >> near the instruction pointer like I did on arm32 ? >> >> > >> >> > I didn't, no. Do you have a benchmark to hand so I can give this a go? >> >> >> >> see tools/testing/selftests/rseq/param_test_benchmark --help >> >> >> >> It's a stripped-down version of param_test, without all the code for >> >> delay loops and testing checks. >> >> >> >> Example use for counter increment with 4 threads, doing 5G counter >> >> increments per thread: >> >> >> >> time ./param_test_benchmark -T i -t 4 -r 5000000000 >> > >> > Thanks. I ran that on a few arm64 systems I have access to, with three >> > configurations of the selftest: >> > >> > 1. As I posted >> > 2. With the abort signature and branch in-lined, so as to avoid the CBNZ >> > address limitations in large codebases >> > 3. With both the abort handler and the table inlined (i.e. the same thing >> > as 32-bit). >> > >> > There isn't a reliably measurable difference between (1) and (2), but I take >> > between 12% and 27% hit between (2) and (3). >> >> Those results puzzle me. Do you have the actual code snippets of each >> implementation nearby ? > > Sure, I've included the diffs for (2) and (3) below. They both apply on top > of my branch at: > > git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git rseq > > Will I figured out that ADRP+ADD are optimized on Cortex A57 to have a 1 cycle latency. This would explain why they are doing comparatively well compared to ADR. And I guess having more compact code wins here. So I'm OK with your patchset with the modification for (2), which ensures the abort label is not too far away on large code-bases. Thanks! Mathieu > > --->8 > > diff --git a/tools/testing/selftests/rseq/rseq-arm64.h > b/tools/testing/selftests/rseq/rseq-arm64.h > index 599788f74137..954f34671ca6 100644 > --- a/tools/testing/selftests/rseq/rseq-arm64.h > +++ b/tools/testing/selftests/rseq/rseq-arm64.h > @@ -104,11 +104,11 @@ do { \ > __rseq_str(label) ":\n" > > #define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \ > - " .pushsection __rseq_failure, \"ax\"\n" \ > - " .long " __rseq_str(RSEQ_SIG) "\n" \ > + " b 222f\n" \ > + " .inst " __rseq_str(RSEQ_SIG) "\n" \ > __rseq_str(label) ":\n" \ > " b %l[" __rseq_str(abort_label) "]\n" \ > - " .popsection\n" > + "222:\n" > > #define RSEQ_ASM_OP_STORE(value, var) \ > " str %[" __rseq_str(value) "], %[" __rseq_str(var) "]\n" > > --->8 > > diff --git a/tools/testing/selftests/rseq/rseq-arm64.h > b/tools/testing/selftests/rseq/rseq-arm64.h > index 599788f74137..2554aa17acf3 100644 > --- a/tools/testing/selftests/rseq/rseq-arm64.h > +++ b/tools/testing/selftests/rseq/rseq-arm64.h > @@ -80,35 +80,37 @@ do { \ > #define RSEQ_ASM_TMP_REG "x15" > #define RSEQ_ASM_TMP_REG_2 "x14" > > -#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \ > +#define __RSEQ_ASM_DEFINE_TABLE(version, flags, start_ip, \ > post_commit_offset, abort_ip) \ > - " .pushsection __rseq_table, \"aw\"\n" \ > - " .balign 32\n" \ > - __rseq_str(label) ":\n" \ > " .long " __rseq_str(version) ", " __rseq_str(flags) "\n" \ > " .quad " __rseq_str(start_ip) ", " \ > __rseq_str(post_commit_offset) ", " \ > - __rseq_str(abort_ip) "\n" \ > - " .popsection\n" > + __rseq_str(abort_ip) "\n" > > -#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \ > - __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \ > - (post_commit_ip - start_ip), abort_ip) > +#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip) \ > + " .pushsection __rseq_table, \"aw\"\n" \ > + " .balign 32\n" \ > + __RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \ > + (post_commit_ip - start_ip), abort_ip) \ > + " .popsection\n" > > -#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ > +#define RSEQ_ASM_STORE_RSEQ_CS(label, table_label, rseq_cs) \ > RSEQ_INJECT_ASM(1) \ > - " adrp " RSEQ_ASM_TMP_REG ", " __rseq_str(cs_label) "\n" \ > - " add " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \ > - ", :lo12:" __rseq_str(cs_label) "\n" \ > + " adr " RSEQ_ASM_TMP_REG ", " __rseq_str(table_label) "\n" \ > " str " RSEQ_ASM_TMP_REG ", %[" __rseq_str(rseq_cs) "]\n" \ > __rseq_str(label) ":\n" > > -#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \ > - " .pushsection __rseq_failure, \"ax\"\n" \ > - " .long " __rseq_str(RSEQ_SIG) "\n" \ > +#define RSEQ_ASM_DEFINE_ABORT(table_label, start_ip, post_commit_ip, label, \ > + abort_label) \ > + " b 222f\n" \ > + " .balign 32\n" \ > + __rseq_str(table_label) ":\n" \ > + __RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \ > + (post_commit_ip - start_ip), label ## f) \ > + " .inst " __rseq_str(RSEQ_SIG) "\n" \ > __rseq_str(label) ":\n" \ > " b %l[" __rseq_str(abort_label) "]\n" \ > - " .popsection\n" > + "222:\n" > > #define RSEQ_ASM_OP_STORE(value, var) \ > " str %[" __rseq_str(value) "], %[" __rseq_str(var) "]\n" > @@ -181,8 +183,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, > intptr_t newv, int cpu) > RSEQ_INJECT_C(9) > > __asm__ __volatile__ goto ( > - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) > - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) > + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) > + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) > RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) > RSEQ_INJECT_ASM(3) > RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) > @@ -191,9 +193,9 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, > intptr_t newv, int cpu) > RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) > RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) > #endif > - RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) > + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) > RSEQ_INJECT_ASM(5) > - RSEQ_ASM_DEFINE_ABORT(4, abort) > + RSEQ_ASM_DEFINE_ABORT(3, 1b, 2b, 4, abort) > : /* gcc asm goto does not allow outputs */ > : [cpu_id] "r" (cpu), > [current_cpu_id] "Qo" (__rseq_abi.cpu_id), > @@ -230,8 +232,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t > expectnot, > RSEQ_INJECT_C(9) > > __asm__ __volatile__ goto ( > - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) > - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) > + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) > + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) > RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) > RSEQ_INJECT_ASM(3) > RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail]) > @@ -243,9 +245,9 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t > expectnot, > RSEQ_ASM_OP_R_LOAD(v) > RSEQ_ASM_OP_R_STORE(load) > RSEQ_ASM_OP_R_LOAD_OFF(voffp) > - RSEQ_ASM_OP_R_FINAL_STORE(v, 3) > + RSEQ_ASM_OP_R_FINAL_STORE(v, 2) > RSEQ_INJECT_ASM(5) > - RSEQ_ASM_DEFINE_ABORT(4, abort) > + RSEQ_ASM_DEFINE_ABORT(3, 1b, 2b, 4, abort) > : /* gcc asm goto does not allow outputs */ > : [cpu_id] "r" (cpu), > [current_cpu_id] "Qo" (__rseq_abi.cpu_id), > @@ -281,8 +283,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) > RSEQ_INJECT_C(9) > > __asm__ __volatile__ goto ( > - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) > - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) > + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) > + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) > RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) > RSEQ_INJECT_ASM(3) > #ifdef RSEQ_COMPARE_TWICE > @@ -290,9 +292,9 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) > #endif > RSEQ_ASM_OP_R_LOAD(v) > RSEQ_ASM_OP_R_ADD(count) > - RSEQ_ASM_OP_R_FINAL_STORE(v, 3) > + RSEQ_ASM_OP_R_FINAL_STORE(v, 2) > RSEQ_INJECT_ASM(4) > - RSEQ_ASM_DEFINE_ABORT(4, abort) > + RSEQ_ASM_DEFINE_ABORT(3, 1b, 2b, 4, abort) > : /* gcc asm goto does not allow outputs */ > : [cpu_id] "r" (cpu), > [current_cpu_id] "Qo" (__rseq_abi.cpu_id), > @@ -324,8 +326,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t > expect, > RSEQ_INJECT_C(9) > > __asm__ __volatile__ goto ( > - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) > - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) > + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) > + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) > RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) > RSEQ_INJECT_ASM(3) > RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) > @@ -336,9 +338,9 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t > expect, > #endif > RSEQ_ASM_OP_STORE(newv2, v2) > RSEQ_INJECT_ASM(5) > - RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) > + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) > RSEQ_INJECT_ASM(6) > - RSEQ_ASM_DEFINE_ABORT(4, abort) > + RSEQ_ASM_DEFINE_ABORT(3, 1b, 2b, 4, abort) > : /* gcc asm goto does not allow outputs */ > : [cpu_id] "r" (cpu), > [current_cpu_id] "Qo" (__rseq_abi.cpu_id), > @@ -378,8 +380,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, > intptr_t expect, > RSEQ_INJECT_C(9) > > __asm__ __volatile__ goto ( > - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) > - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) > + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) > + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) > RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) > RSEQ_INJECT_ASM(3) > RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) > @@ -390,9 +392,9 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, > intptr_t expect, > #endif > RSEQ_ASM_OP_STORE(newv2, v2) > RSEQ_INJECT_ASM(5) > - RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) > + RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 2) > RSEQ_INJECT_ASM(6) > - RSEQ_ASM_DEFINE_ABORT(4, abort) > + RSEQ_ASM_DEFINE_ABORT(3, 1b, 2b, 4, abort) > : /* gcc asm goto does not allow outputs */ > : [cpu_id] "r" (cpu), > [current_cpu_id] "Qo" (__rseq_abi.cpu_id), > @@ -432,8 +434,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, > RSEQ_INJECT_C(9) > > __asm__ __volatile__ goto ( > - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) > - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) > + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) > + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) > RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) > RSEQ_INJECT_ASM(3) > RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) > @@ -445,9 +447,9 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, > RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) > RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3]) > #endif > - RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) > + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) > RSEQ_INJECT_ASM(6) > - RSEQ_ASM_DEFINE_ABORT(4, abort) > + RSEQ_ASM_DEFINE_ABORT(3, 1b, 2b, 4, abort) > : /* gcc asm goto does not allow outputs */ > : [cpu_id] "r" (cpu), > [current_cpu_id] "Qo" (__rseq_abi.cpu_id), > @@ -489,8 +491,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t > expect, > RSEQ_INJECT_C(9) > > __asm__ __volatile__ goto ( > - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) > - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) > + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) > + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) > RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) > RSEQ_INJECT_ASM(3) > RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) > @@ -501,9 +503,9 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t > expect, > #endif > RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) > RSEQ_INJECT_ASM(5) > - RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) > + RSEQ_ASM_OP_FINAL_STORE(newv, v, 2) > RSEQ_INJECT_ASM(6) > - RSEQ_ASM_DEFINE_ABORT(4, abort) > + RSEQ_ASM_DEFINE_ABORT(3, 1b, 2b, 4, abort) > : /* gcc asm goto does not allow outputs */ > : [cpu_id] "r" (cpu), > [current_cpu_id] "Qo" (__rseq_abi.cpu_id), > @@ -544,8 +546,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, > intptr_t expect, > RSEQ_INJECT_C(9) > > __asm__ __volatile__ goto ( > - RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) > - RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) > + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) > + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) > RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) > RSEQ_INJECT_ASM(3) > RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) > @@ -556,9 +558,9 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, > intptr_t expect, > #endif > RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) > RSEQ_INJECT_ASM(5) > - RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) > + RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 2) > RSEQ_INJECT_ASM(6) > - RSEQ_ASM_DEFINE_ABORT(4, abort) > + RSEQ_ASM_DEFINE_ABORT(3, 1b, 2b, 4, abort) > : /* gcc asm goto does not allow outputs */ > : [cpu_id] "r" (cpu), > [current_cpu_id] "Qo" (__rseq_abi.cpu_id), -- Mathieu Desnoyers EfficiOS Inc. http://www.efficios.com
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 615252331813..fa144c556371 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -114,6 +114,26 @@ unsigned int yield_mod_cnt, nr_abort; "bne 222b\n\t" \ "333:\n\t" +#elif defined(__AARCH64EL__) + +#define RSEQ_INJECT_INPUT \ + , [loop_cnt_1] "Qo" (loop_cnt[1]) \ + , [loop_cnt_2] "Qo" (loop_cnt[2]) \ + , [loop_cnt_3] "Qo" (loop_cnt[3]) \ + , [loop_cnt_4] "Qo" (loop_cnt[4]) \ + , [loop_cnt_5] "Qo" (loop_cnt[5]) \ + , [loop_cnt_6] "Qo" (loop_cnt[6]) + +#define INJECT_ASM_REG RSEQ_ASM_TMP_REG32 + +#define RSEQ_INJECT_ASM(n) \ + " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \ + " cbz " INJECT_ASM_REG ", 333f\n" \ + "222:\n" \ + " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \ + " cbnz " INJECT_ASM_REG ", 222b\n" \ + "333:\n" + #elif __PPC__ #define RSEQ_INJECT_INPUT \ diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h new file mode 100644 index 000000000000..599788f74137 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-arm64.h @@ -0,0 +1,594 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * rseq-arm64.h + * + * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + * (C) Copyright 2018 - Will Deacon <will.deacon@arm.com> + */ + +#define RSEQ_SIG 0xd428bc00 /* BRK #0x45E0 */ + +#define rseq_smp_mb() __asm__ __volatile__ ("dmb ish" ::: "memory") +#define rseq_smp_rmb() __asm__ __volatile__ ("dmb ishld" ::: "memory") +#define rseq_smp_wmb() __asm__ __volatile__ ("dmb ishst" ::: "memory") + +#define rseq_smp_load_acquire(p) \ +__extension__ ({ \ + __typeof(*p) ____p1; \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile ("ldarb %w0, %1" \ + : "=r" (*(__u8 *)p) \ + : "Q" (*p) : "memory"); \ + break; \ + case 2: \ + asm volatile ("ldarh %w0, %1" \ + : "=r" (*(__u16 *)p) \ + : "Q" (*p) : "memory"); \ + break; \ + case 4: \ + asm volatile ("ldar %w0, %1" \ + : "=r" (*(__u32 *)p) \ + : "Q" (*p) : "memory"); \ + break; \ + case 8: \ + asm volatile ("ldar %0, %1" \ + : "=r" (*(__u64 *)p) \ + : "Q" (*p) : "memory"); \ + break; \ + } \ + ____p1; \ +}) + +#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() + +#define rseq_smp_store_release(p, v) \ +do { \ + switch (sizeof(*p)) { \ + case 1: \ + asm volatile ("stlrb %w1, %0" \ + : "=Q" (*p) \ + : "r" ((__u8)v) \ + : "memory"); \ + break; \ + case 2: \ + asm volatile ("stlrh %w1, %0" \ + : "=Q" (*p) \ + : "r" ((__u16)v) \ + : "memory"); \ + break; \ + case 4: \ + asm volatile ("stlr %w1, %0" \ + : "=Q" (*p) \ + : "r" ((__u32)v) \ + : "memory"); \ + break; \ + case 8: \ + asm volatile ("stlr %1, %0" \ + : "=Q" (*p) \ + : "r" ((__u64)v) \ + : "memory"); \ + break; \ + } \ +} while (0) + +#ifdef RSEQ_SKIP_FASTPATH +#include "rseq-skip.h" +#else /* !RSEQ_SKIP_FASTPATH */ + +#define RSEQ_ASM_TMP_REG32 "w15" +#define RSEQ_ASM_TMP_REG "x15" +#define RSEQ_ASM_TMP_REG_2 "x14" + +#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \ + post_commit_offset, abort_ip) \ + " .pushsection __rseq_table, \"aw\"\n" \ + " .balign 32\n" \ + __rseq_str(label) ":\n" \ + " .long " __rseq_str(version) ", " __rseq_str(flags) "\n" \ + " .quad " __rseq_str(start_ip) ", " \ + __rseq_str(post_commit_offset) ", " \ + __rseq_str(abort_ip) "\n" \ + " .popsection\n" + +#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \ + __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \ + (post_commit_ip - start_ip), abort_ip) + +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + " adrp " RSEQ_ASM_TMP_REG ", " __rseq_str(cs_label) "\n" \ + " add " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \ + ", :lo12:" __rseq_str(cs_label) "\n" \ + " str " RSEQ_ASM_TMP_REG ", %[" __rseq_str(rseq_cs) "]\n" \ + __rseq_str(label) ":\n" + +#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \ + " .pushsection __rseq_failure, \"ax\"\n" \ + " .long " __rseq_str(RSEQ_SIG) "\n" \ + __rseq_str(label) ":\n" \ + " b %l[" __rseq_str(abort_label) "]\n" \ + " .popsection\n" + +#define RSEQ_ASM_OP_STORE(value, var) \ + " str %[" __rseq_str(value) "], %[" __rseq_str(var) "]\n" + +#define RSEQ_ASM_OP_STORE_RELEASE(value, var) \ + " stlr %[" __rseq_str(value) "], %[" __rseq_str(var) "]\n" + +#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \ + RSEQ_ASM_OP_STORE(value, var) \ + __rseq_str(post_commit_label) ":\n" + +#define RSEQ_ASM_OP_FINAL_STORE_RELEASE(value, var, post_commit_label) \ + RSEQ_ASM_OP_STORE_RELEASE(value, var) \ + __rseq_str(post_commit_label) ":\n" + +#define RSEQ_ASM_OP_CMPEQ(var, expect, label) \ + " ldr " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" \ + " sub " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \ + ", %[" __rseq_str(expect) "]\n" \ + " cbnz " RSEQ_ASM_TMP_REG ", " __rseq_str(label) "\n" + +#define RSEQ_ASM_OP_CMPEQ32(var, expect, label) \ + " ldr " RSEQ_ASM_TMP_REG32 ", %[" __rseq_str(var) "]\n" \ + " sub " RSEQ_ASM_TMP_REG32 ", " RSEQ_ASM_TMP_REG32 \ + ", %w[" __rseq_str(expect) "]\n" \ + " cbnz " RSEQ_ASM_TMP_REG32 ", " __rseq_str(label) "\n" + +#define RSEQ_ASM_OP_CMPNE(var, expect, label) \ + " ldr " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" \ + " sub " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \ + ", %[" __rseq_str(expect) "]\n" \ + " cbz " RSEQ_ASM_TMP_REG ", " __rseq_str(label) "\n" + +#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ + RSEQ_INJECT_ASM(2) \ + RSEQ_ASM_OP_CMPEQ32(current_cpu_id, cpu_id, label) + +#define RSEQ_ASM_OP_R_LOAD(var) \ + " ldr " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" + +#define RSEQ_ASM_OP_R_STORE(var) \ + " str " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" + +#define RSEQ_ASM_OP_R_LOAD_OFF(offset) \ + " ldr " RSEQ_ASM_TMP_REG ", [" RSEQ_ASM_TMP_REG \ + ", %[" __rseq_str(offset) "]]\n" + +#define RSEQ_ASM_OP_R_ADD(count) \ + " add " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \ + ", %[" __rseq_str(count) "]\n" + +#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \ + " str " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" \ + __rseq_str(post_commit_label) ":\n" + +#define RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) \ + " cbz %[" __rseq_str(len) "], 333f\n" \ + " mov " RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(len) "]\n" \ + "222: sub " RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", #1\n" \ + " ldrb " RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(src) "]" \ + ", " RSEQ_ASM_TMP_REG_2 "]\n" \ + " strb " RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(dst) "]" \ + ", " RSEQ_ASM_TMP_REG_2 "]\n" \ + " cbnz " RSEQ_ASM_TMP_REG_2 ", 222b\n" \ + "333:\n" + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "Qo" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, + off_t voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CMPNE(v, expectnot, %l[error2]) +#endif + RSEQ_ASM_OP_R_LOAD(v) + RSEQ_ASM_OP_R_STORE(load) + RSEQ_ASM_OP_R_LOAD_OFF(voffp) + RSEQ_ASM_OP_R_FINAL_STORE(v, 3) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "Qo" (*v), + [expectnot] "r" (expectnot), + [load] "Qo" (*load), + [voffp] "r" (voffp) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_addv(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) +#endif + RSEQ_ASM_OP_R_LOAD(v) + RSEQ_ASM_OP_R_ADD(count) + RSEQ_ASM_OP_R_FINAL_STORE(v, 3) + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "Qo" (*v), + [count] "r" (count) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + RSEQ_ASM_OP_STORE(newv2, v2) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [expect] "r" (expect), + [v] "Qo" (*v), + [newv] "r" (newv), + [v2] "Qo" (*v2), + [newv2] "r" (newv2) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + RSEQ_ASM_OP_STORE(newv2, v2) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [expect] "r" (expect), + [v] "Qo" (*v), + [newv] "r" (newv), + [v2] "Qo" (*v2), + [newv2] "r" (newv2) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) + RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[cmpfail]) + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) + RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3]) +#endif + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "Qo" (*v), + [expect] "r" (expect), + [v2] "Qo" (*v2), + [expect2] "r" (expect2), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +error3: + rseq_bug("2nd expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_OP_FINAL_STORE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [expect] "r" (expect), + [v] "Qo" (*v), + [newv] "r" (newv), + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG, RSEQ_ASM_TMP_REG_2 + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f) + RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail]) + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2]) +#endif + RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) + RSEQ_INJECT_ASM(5) + RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3) + RSEQ_INJECT_ASM(6) + RSEQ_ASM_DEFINE_ABORT(4, abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "Qo" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [expect] "r" (expect), + [v] "Qo" (*v), + [newv] "r" (newv), + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len) + RSEQ_INJECT_INPUT + : "memory", RSEQ_ASM_TMP_REG, RSEQ_ASM_TMP_REG_2 + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +cmpfail: + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* !RSEQ_SKIP_FASTPATH */ diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index a4684112676c..b5d94087fe31 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -71,6 +71,8 @@ extern __thread volatile struct rseq __rseq_abi; #include <rseq-x86.h> #elif defined(__ARMEL__) #include <rseq-arm.h> +#elif defined (__AARCH64EL__) +#include <rseq-arm64.h> #elif defined(__PPC__) #include <rseq-ppc.h> #elif defined(__mips__)
Hook up arm64 support to the rseq selftests. Signed-off-by: Will Deacon <will.deacon@arm.com> --- tools/testing/selftests/rseq/param_test.c | 20 + tools/testing/selftests/rseq/rseq-arm64.h | 594 ++++++++++++++++++++++++++++++ tools/testing/selftests/rseq/rseq.h | 2 + 3 files changed, 616 insertions(+) create mode 100644 tools/testing/selftests/rseq/rseq-arm64.h -- 2.1.4