Message ID | 20240706191335.878142-8-pierrick.bouvier@linaro.org |
---|---|
State | New |
Headers | show |
Series | plugins: access values during a memory read/write | expand |
On 7/6/24 12:13, Pierrick Bouvier wrote: > +++ b/tests/tcg/x86_64/test-plugin-mem-access.c > @@ -0,0 +1,89 @@ > +#include <emmintrin.h> > +#include <pthread.h> All new files should have license boilerplate and description. You can use spdx to limit to just a couple of lines. r~
Pierrick Bouvier <pierrick.bouvier@linaro.org> writes: > Add an explicit test to check expected memory values are read/written. > For sizes 8, 16, 32, 64 and 128, we generate a load/store operation. > For size 8 -> 64, we generate an atomic __sync_val_compare_and_swap too. > For 128bits memory access, we rely on SSE2 instructions. > > By default, atomic accesses are non atomic if a single cpu is running, > so we force creation of a second one by creating a new thread first. > > load/store helpers code path can't be triggered easily in user mode (no > softmmu), so we can't test it here. > > Can be run with: > make -C build/tests/tcg/x86_64-linux-user run-plugin-test-plugin-mem-access-with-libmem.so > > Tested-by: Xingtao Yao <yaoxt.fnst@fujitsu.com> > Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> > --- > tests/tcg/x86_64/test-plugin-mem-access.c | 89 +++++++++++++++++++++ > tests/tcg/x86_64/Makefile.target | 7 ++ > tests/tcg/x86_64/check-plugin-mem-access.sh | 48 +++++++++++ > 3 files changed, 144 insertions(+) > create mode 100644 tests/tcg/x86_64/test-plugin-mem-access.c > create mode 100755 tests/tcg/x86_64/check-plugin-mem-access.sh > > diff --git a/tests/tcg/x86_64/test-plugin-mem-access.c b/tests/tcg/x86_64/test-plugin-mem-access.c > new file mode 100644 > index 00000000000..7fdd6a55829 > --- /dev/null > +++ b/tests/tcg/x86_64/test-plugin-mem-access.c > @@ -0,0 +1,89 @@ > +#include <emmintrin.h> > +#include <pthread.h> > +#include <stdint.h> > +#include <stdlib.h> > + > +static void *data; > + > +#define DEFINE_STORE(name, type, value) \ > +static void store_##name(void) \ > +{ \ > + *((type *)data) = value; \ > +} > + > +#define DEFINE_ATOMIC_OP(name, type, value) \ > +static void atomic_op_##name(void) \ > +{ \ > + *((type *)data) = 0x42; \ > + __sync_val_compare_and_swap((type *)data, 0x42, value); \ Should we exercise the other compare and swap ops? Do they all come through the same rwm path? > +} > + > +#define DEFINE_LOAD(name, type) \ > +static void load_##name(void) \ > +{ \ > + register type var asm("eax") = *((type *) data); \ > + (void)var; \ This is a bit weird. It's the only inline asm needed that makes this a non-multiarch test. Why? > +} > + > +DEFINE_STORE(u8, uint8_t, 0xf1) > +DEFINE_ATOMIC_OP(u8, uint8_t, 0xf1) > +DEFINE_LOAD(u8, uint8_t) > +DEFINE_STORE(u16, uint16_t, 0xf123) > +DEFINE_ATOMIC_OP(u16, uint16_t, 0xf123) > +DEFINE_LOAD(u16, uint16_t) > +DEFINE_STORE(u32, uint32_t, 0xff112233) > +DEFINE_ATOMIC_OP(u32, uint32_t, 0xff112233) > +DEFINE_LOAD(u32, uint32_t) > +DEFINE_STORE(u64, uint64_t, 0xf123456789abcdef) > +DEFINE_ATOMIC_OP(u64, uint64_t, 0xf123456789abcdef) > +DEFINE_LOAD(u64, uint64_t) > + > +static void store_u128(void) > +{ > + _mm_store_si128(data, _mm_set_epi32(0xf1223344, 0x55667788, > + 0xf1234567, 0x89abcdef)); > +} > + > +static void load_u128(void) > +{ > + __m128i var = _mm_load_si128(data); > + (void)var; > +} > + > +static void *f(void *p) > +{ > + return NULL; > +} > + > +int main(void) > +{ > + /* > + * We force creation of a second thread to enable cpu flag CF_PARALLEL. > + * This will generate atomic operations when needed. > + */ > + pthread_t thread; > + pthread_create(&thread, NULL, &f, NULL); > + pthread_join(thread, NULL); > + > + data = malloc(sizeof(__m128i)); > + atomic_op_u8(); > + store_u8(); > + load_u8(); > + > + atomic_op_u16(); > + store_u16(); > + load_u16(); > + > + atomic_op_u32(); > + store_u32(); > + load_u32(); > + > + atomic_op_u64(); > + store_u64(); > + load_u64(); > + > + store_u128(); > + load_u128(); > + > + free(data); > +} > diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target > index eda9bd7396c..3edc29b924d 100644 > --- a/tests/tcg/x86_64/Makefile.target > +++ b/tests/tcg/x86_64/Makefile.target > @@ -16,6 +16,7 @@ X86_64_TESTS += noexec > X86_64_TESTS += cmpxchg > X86_64_TESTS += adox > X86_64_TESTS += test-1648 > +PLUGINS_TESTS += test-plugin-mem-access > TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64 > else > TESTS=$(MULTIARCH_TESTS) > @@ -26,6 +27,12 @@ adox: CFLAGS=-O2 > run-test-i386-ssse3: QEMU_OPTS += -cpu max > run-plugin-test-i386-ssse3-%: QEMU_OPTS += -cpu max > > +run-plugin-test-plugin-mem-access-with-libmem.so: \ > + PLUGIN_ARGS=$(COMMA)print-accesses=true > +run-plugin-test-plugin-mem-access-with-libmem.so: \ > + CHECK_PLUGIN_OUTPUT_COMMAND= \ > + $(SRC_PATH)/tests/tcg/x86_64/check-plugin-mem-access.sh > + > test-x86_64: LDFLAGS+=-lm -lc > test-x86_64: test-i386.c test-i386.h test-i386-shift.h test-i386-muldiv.h > $(CC) $(CFLAGS) $< -o $@ $(LDFLAGS) > diff --git a/tests/tcg/x86_64/check-plugin-mem-access.sh b/tests/tcg/x86_64/check-plugin-mem-access.sh > new file mode 100755 > index 00000000000..163f1cfad34 > --- /dev/null > +++ b/tests/tcg/x86_64/check-plugin-mem-access.sh > @@ -0,0 +1,48 @@ > +#!/usr/bin/env bash > + > +set -euo pipefail > + > +die() > +{ > + echo "$@" 1>&2 > + exit 1 > +} > + > +check() > +{ > + file=$1 > + pattern=$2 > + grep "$pattern" "$file" > /dev/null || die "\"$pattern\" not found in $file" > +} > + > +[ $# -eq 1 ] || die "usage: plugin_out_file" > + > +plugin_out=$1 > + > +expected() > +{ > + cat << EOF > +,store_u8,.*,8,store,0xf1 > +,atomic_op_u8,.*,8,load,0x42 > +,atomic_op_u8,.*,8,store,0xf1 > +,load_u8,.*,8,load,0xf1 > +,store_u16,.*,16,store,0xf123 > +,atomic_op_u16,.*,16,load,0x0042 > +,atomic_op_u16,.*,16,store,0xf123 > +,load_u16,.*,16,load,0xf123 > +,store_u32,.*,32,store,0xff112233 > +,atomic_op_u32,.*,32,load,0x00000042 > +,atomic_op_u32,.*,32,store,0xff112233 > +,load_u32,.*,32,load,0xff112233 > +,store_u64,.*,64,store,0xf123456789abcdef > +,atomic_op_u64,.*,64,load,0x0000000000000042 > +,atomic_op_u64,.*,64,store,0xf123456789abcdef > +,load_u64,.*,64,load,0xf123456789abcdef > +,store_u128,.*,128,store,0xf122334455667788f123456789abcdef > +,load_u128,.*,128,load,0xf122334455667788f123456789abcdef > +EOF > +} > + > +expected | while read line; do > + check "$plugin_out" "$line" > +done
On 7/7/24 11:25, Richard Henderson wrote: > On 7/6/24 12:13, Pierrick Bouvier wrote: >> +++ b/tests/tcg/x86_64/test-plugin-mem-access.c >> @@ -0,0 +1,89 @@ >> +#include <emmintrin.h> >> +#include <pthread.h> > > All new files should have license boilerplate and description. > You can use spdx to limit to just a couple of lines. > Added this. > > r~ > >
On 7/8/24 12:15, Alex Bennée wrote: > Pierrick Bouvier <pierrick.bouvier@linaro.org> writes: > >> Add an explicit test to check expected memory values are read/written. >> For sizes 8, 16, 32, 64 and 128, we generate a load/store operation. >> For size 8 -> 64, we generate an atomic __sync_val_compare_and_swap too. >> For 128bits memory access, we rely on SSE2 instructions. >> >> By default, atomic accesses are non atomic if a single cpu is running, >> so we force creation of a second one by creating a new thread first. >> >> load/store helpers code path can't be triggered easily in user mode (no >> softmmu), so we can't test it here. >> >> Can be run with: >> make -C build/tests/tcg/x86_64-linux-user run-plugin-test-plugin-mem-access-with-libmem.so >> >> Tested-by: Xingtao Yao <yaoxt.fnst@fujitsu.com> >> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> >> --- >> tests/tcg/x86_64/test-plugin-mem-access.c | 89 +++++++++++++++++++++ >> tests/tcg/x86_64/Makefile.target | 7 ++ >> tests/tcg/x86_64/check-plugin-mem-access.sh | 48 +++++++++++ >> 3 files changed, 144 insertions(+) >> create mode 100644 tests/tcg/x86_64/test-plugin-mem-access.c >> create mode 100755 tests/tcg/x86_64/check-plugin-mem-access.sh >> >> diff --git a/tests/tcg/x86_64/test-plugin-mem-access.c b/tests/tcg/x86_64/test-plugin-mem-access.c >> new file mode 100644 >> index 00000000000..7fdd6a55829 >> --- /dev/null >> +++ b/tests/tcg/x86_64/test-plugin-mem-access.c >> @@ -0,0 +1,89 @@ >> +#include <emmintrin.h> >> +#include <pthread.h> >> +#include <stdint.h> >> +#include <stdlib.h> >> + >> +static void *data; >> + >> +#define DEFINE_STORE(name, type, value) \ >> +static void store_##name(void) \ >> +{ \ >> + *((type *)data) = value; \ >> +} >> + >> +#define DEFINE_ATOMIC_OP(name, type, value) \ >> +static void atomic_op_##name(void) \ >> +{ \ >> + *((type *)data) = 0x42; \ >> + __sync_val_compare_and_swap((type *)data, 0x42, value); \ > > Should we exercise the other compare and swap ops? Do they all come > through the same rwm path? > There are definitely several paths depending on the generated atomic op. However, the code is pretty straightforward (and implemented in a single function), so my thought was that one way to trigger this was enough. >> +} >> + >> +#define DEFINE_LOAD(name, type) \ >> +static void load_##name(void) \ >> +{ \ >> + register type var asm("eax") = *((type *) data); \ >> + (void)var; \ > > This is a bit weird. It's the only inline asm needed that makes this a > non-multiarch test. Why? > I'll answer here about why this test is arch specific, and not a multi arch. The problem I met is that all target architecture do not have native 64/128 bits types, and depending how code is compiled with gcc, you may or not generated expected vector instructions for 128bits operation. Same for atomic operations. Thus, I chose to specialize this test for x86_64, and use sse2 directly for 128 bits integers. You might say "How about adding ifdef for this". Yes sure, but the check script would become complicated too, and I just wanted to keep it simple. Our interest here is not to check that memory accesses are correctly implemented in QEMU, but to check that a specific behavior on one arch is the one expected. Does it make more sense for you? >> +} >> + >> +DEFINE_STORE(u8, uint8_t, 0xf1) >> +DEFINE_ATOMIC_OP(u8, uint8_t, 0xf1) >> +DEFINE_LOAD(u8, uint8_t) >> +DEFINE_STORE(u16, uint16_t, 0xf123) >> +DEFINE_ATOMIC_OP(u16, uint16_t, 0xf123) >> +DEFINE_LOAD(u16, uint16_t) >> +DEFINE_STORE(u32, uint32_t, 0xff112233) >> +DEFINE_ATOMIC_OP(u32, uint32_t, 0xff112233) >> +DEFINE_LOAD(u32, uint32_t) >> +DEFINE_STORE(u64, uint64_t, 0xf123456789abcdef) >> +DEFINE_ATOMIC_OP(u64, uint64_t, 0xf123456789abcdef) >> +DEFINE_LOAD(u64, uint64_t) >> + >> +static void store_u128(void) >> +{ >> + _mm_store_si128(data, _mm_set_epi32(0xf1223344, 0x55667788, >> + 0xf1234567, 0x89abcdef)); >> +} >> + >> +static void load_u128(void) >> +{ >> + __m128i var = _mm_load_si128(data); >> + (void)var; >> +} >> + >> +static void *f(void *p) >> +{ >> + return NULL; >> +} >> + >> +int main(void) >> +{ >> + /* >> + * We force creation of a second thread to enable cpu flag CF_PARALLEL. >> + * This will generate atomic operations when needed. >> + */ >> + pthread_t thread; >> + pthread_create(&thread, NULL, &f, NULL); >> + pthread_join(thread, NULL); >> + >> + data = malloc(sizeof(__m128i)); >> + atomic_op_u8(); >> + store_u8(); >> + load_u8(); >> + >> + atomic_op_u16(); >> + store_u16(); >> + load_u16(); >> + >> + atomic_op_u32(); >> + store_u32(); >> + load_u32(); >> + >> + atomic_op_u64(); >> + store_u64(); >> + load_u64(); >> + >> + store_u128(); >> + load_u128(); >> + >> + free(data); >> +} >> diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target >> index eda9bd7396c..3edc29b924d 100644 >> --- a/tests/tcg/x86_64/Makefile.target >> +++ b/tests/tcg/x86_64/Makefile.target >> @@ -16,6 +16,7 @@ X86_64_TESTS += noexec >> X86_64_TESTS += cmpxchg >> X86_64_TESTS += adox >> X86_64_TESTS += test-1648 >> +PLUGINS_TESTS += test-plugin-mem-access >> TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64 >> else >> TESTS=$(MULTIARCH_TESTS) >> @@ -26,6 +27,12 @@ adox: CFLAGS=-O2 >> run-test-i386-ssse3: QEMU_OPTS += -cpu max >> run-plugin-test-i386-ssse3-%: QEMU_OPTS += -cpu max >> >> +run-plugin-test-plugin-mem-access-with-libmem.so: \ >> + PLUGIN_ARGS=$(COMMA)print-accesses=true >> +run-plugin-test-plugin-mem-access-with-libmem.so: \ >> + CHECK_PLUGIN_OUTPUT_COMMAND= \ >> + $(SRC_PATH)/tests/tcg/x86_64/check-plugin-mem-access.sh >> + >> test-x86_64: LDFLAGS+=-lm -lc >> test-x86_64: test-i386.c test-i386.h test-i386-shift.h test-i386-muldiv.h >> $(CC) $(CFLAGS) $< -o $@ $(LDFLAGS) >> diff --git a/tests/tcg/x86_64/check-plugin-mem-access.sh b/tests/tcg/x86_64/check-plugin-mem-access.sh >> new file mode 100755 >> index 00000000000..163f1cfad34 >> --- /dev/null >> +++ b/tests/tcg/x86_64/check-plugin-mem-access.sh >> @@ -0,0 +1,48 @@ >> +#!/usr/bin/env bash >> + >> +set -euo pipefail >> + >> +die() >> +{ >> + echo "$@" 1>&2 >> + exit 1 >> +} >> + >> +check() >> +{ >> + file=$1 >> + pattern=$2 >> + grep "$pattern" "$file" > /dev/null || die "\"$pattern\" not found in $file" >> +} >> + >> +[ $# -eq 1 ] || die "usage: plugin_out_file" >> + >> +plugin_out=$1 >> + >> +expected() >> +{ >> + cat << EOF >> +,store_u8,.*,8,store,0xf1 >> +,atomic_op_u8,.*,8,load,0x42 >> +,atomic_op_u8,.*,8,store,0xf1 >> +,load_u8,.*,8,load,0xf1 >> +,store_u16,.*,16,store,0xf123 >> +,atomic_op_u16,.*,16,load,0x0042 >> +,atomic_op_u16,.*,16,store,0xf123 >> +,load_u16,.*,16,load,0xf123 >> +,store_u32,.*,32,store,0xff112233 >> +,atomic_op_u32,.*,32,load,0x00000042 >> +,atomic_op_u32,.*,32,store,0xff112233 >> +,load_u32,.*,32,load,0xff112233 >> +,store_u64,.*,64,store,0xf123456789abcdef >> +,atomic_op_u64,.*,64,load,0x0000000000000042 >> +,atomic_op_u64,.*,64,store,0xf123456789abcdef >> +,load_u64,.*,64,load,0xf123456789abcdef >> +,store_u128,.*,128,store,0xf122334455667788f123456789abcdef >> +,load_u128,.*,128,load,0xf122334455667788f123456789abcdef >> +EOF >> +} >> + >> +expected | while read line; do >> + check "$plugin_out" "$line" >> +done >
Pierrick Bouvier <pierrick.bouvier@linaro.org> writes: > On 7/8/24 12:15, Alex Bennée wrote: >> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes: >> >>> Add an explicit test to check expected memory values are read/written. >>> For sizes 8, 16, 32, 64 and 128, we generate a load/store operation. >>> For size 8 -> 64, we generate an atomic __sync_val_compare_and_swap too. >>> For 128bits memory access, we rely on SSE2 instructions. >>> >>> By default, atomic accesses are non atomic if a single cpu is running, >>> so we force creation of a second one by creating a new thread first. >>> >>> load/store helpers code path can't be triggered easily in user mode (no >>> softmmu), so we can't test it here. >>> >>> Can be run with: >>> make -C build/tests/tcg/x86_64-linux-user run-plugin-test-plugin-mem-access-with-libmem.so >>> >>> Tested-by: Xingtao Yao <yaoxt.fnst@fujitsu.com> >>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> >>> --- >>> tests/tcg/x86_64/test-plugin-mem-access.c | 89 +++++++++++++++++++++ >>> tests/tcg/x86_64/Makefile.target | 7 ++ >>> tests/tcg/x86_64/check-plugin-mem-access.sh | 48 +++++++++++ >>> 3 files changed, 144 insertions(+) >>> create mode 100644 tests/tcg/x86_64/test-plugin-mem-access.c >>> create mode 100755 tests/tcg/x86_64/check-plugin-mem-access.sh >>> >>> diff --git a/tests/tcg/x86_64/test-plugin-mem-access.c b/tests/tcg/x86_64/test-plugin-mem-access.c >>> new file mode 100644 >>> index 00000000000..7fdd6a55829 >>> --- /dev/null >>> +++ b/tests/tcg/x86_64/test-plugin-mem-access.c >>> @@ -0,0 +1,89 @@ >>> +#include <emmintrin.h> >>> +#include <pthread.h> >>> +#include <stdint.h> >>> +#include <stdlib.h> >>> + >>> +static void *data; >>> + >>> +#define DEFINE_STORE(name, type, value) \ >>> +static void store_##name(void) \ >>> +{ \ >>> + *((type *)data) = value; \ >>> +} >>> + >>> +#define DEFINE_ATOMIC_OP(name, type, value) \ >>> +static void atomic_op_##name(void) \ >>> +{ \ >>> + *((type *)data) = 0x42; \ >>> + __sync_val_compare_and_swap((type *)data, 0x42, value); \ >> Should we exercise the other compare and swap ops? Do they all come >> through the same rwm path? >> > > There are definitely several paths depending on the generated atomic op. > However, the code is pretty straightforward (and implemented in a > single function), so my thought was that one way to trigger this was > enough. If they all come through the same path I guess that's OK. >>> +} >>> + >>> +#define DEFINE_LOAD(name, type) \ >>> +static void load_##name(void) \ >>> +{ \ >>> + register type var asm("eax") = *((type *) data); \ >>> + (void)var; \ >> This is a bit weird. It's the only inline asm needed that makes this >> a >> non-multiarch test. Why? >> > > I'll answer here about why this test is arch specific, and not a multi arch. > > The problem I met is that all target architecture do not have native > 64/128 bits types, and depending how code is compiled with gcc, you > may or not generated expected vector instructions for 128bits > operation. Same for atomic operations. So we do handle this with the sha512 test, building variants of it with various compiler flags to trigger the use of vectors. So the code is multiarch but we have arch specific variants as dictated by the Makefiles, i.e.: sha512-sve: CFLAGS=-O3 -march=armv8.1-a+sve sha512-sve: sha512.c $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS) TESTS += sha512-sve > Thus, I chose to specialize this test for x86_64, and use sse2 > directly for 128 bits integers. > > You might say "How about adding ifdef for this". Yes sure, but the > check script would become complicated too, and I just wanted to keep > it simple. We can keep the check-script per arch if we have to. > Our interest here is not to check that memory accesses are > correctly implemented in QEMU, but to check that a specific behavior > on one arch is the one expected. So the problem with not being multiarch is we don't build all targets in all combinations. To limit CI time we often build a subset and now this particular subset won't test the plugin paths. > > Does it make more sense for you? > <snip>
On 7/12/24 07:51, Alex Bennée wrote: > Pierrick Bouvier <pierrick.bouvier@linaro.org> writes: > >> On 7/8/24 12:15, Alex Bennée wrote: >>> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes: >>> >>>> Add an explicit test to check expected memory values are read/written. >>>> For sizes 8, 16, 32, 64 and 128, we generate a load/store operation. >>>> For size 8 -> 64, we generate an atomic __sync_val_compare_and_swap too. >>>> For 128bits memory access, we rely on SSE2 instructions. >>>> >>>> By default, atomic accesses are non atomic if a single cpu is running, >>>> so we force creation of a second one by creating a new thread first. >>>> >>>> load/store helpers code path can't be triggered easily in user mode (no >>>> softmmu), so we can't test it here. >>>> >>>> Can be run with: >>>> make -C build/tests/tcg/x86_64-linux-user run-plugin-test-plugin-mem-access-with-libmem.so >>>> >>>> Tested-by: Xingtao Yao <yaoxt.fnst@fujitsu.com> >>>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> >>>> --- >>>> tests/tcg/x86_64/test-plugin-mem-access.c | 89 +++++++++++++++++++++ >>>> tests/tcg/x86_64/Makefile.target | 7 ++ >>>> tests/tcg/x86_64/check-plugin-mem-access.sh | 48 +++++++++++ >>>> 3 files changed, 144 insertions(+) >>>> create mode 100644 tests/tcg/x86_64/test-plugin-mem-access.c >>>> create mode 100755 tests/tcg/x86_64/check-plugin-mem-access.sh >>>> >>>> diff --git a/tests/tcg/x86_64/test-plugin-mem-access.c b/tests/tcg/x86_64/test-plugin-mem-access.c >>>> new file mode 100644 >>>> index 00000000000..7fdd6a55829 >>>> --- /dev/null >>>> +++ b/tests/tcg/x86_64/test-plugin-mem-access.c >>>> @@ -0,0 +1,89 @@ >>>> +#include <emmintrin.h> >>>> +#include <pthread.h> >>>> +#include <stdint.h> >>>> +#include <stdlib.h> >>>> + >>>> +static void *data; >>>> + >>>> +#define DEFINE_STORE(name, type, value) \ >>>> +static void store_##name(void) \ >>>> +{ \ >>>> + *((type *)data) = value; \ >>>> +} >>>> + >>>> +#define DEFINE_ATOMIC_OP(name, type, value) \ >>>> +static void atomic_op_##name(void) \ >>>> +{ \ >>>> + *((type *)data) = 0x42; \ >>>> + __sync_val_compare_and_swap((type *)data, 0x42, value); \ >>> Should we exercise the other compare and swap ops? Do they all come >>> through the same rwm path? >>> >> >> There are definitely several paths depending on the generated atomic op. >> However, the code is pretty straightforward (and implemented in a >> single function), so my thought was that one way to trigger this was >> enough. > > If they all come through the same path I guess that's OK. > >>>> +} >>>> + >>>> +#define DEFINE_LOAD(name, type) \ >>>> +static void load_##name(void) \ >>>> +{ \ >>>> + register type var asm("eax") = *((type *) data); \ >>>> + (void)var; \ >>> This is a bit weird. It's the only inline asm needed that makes this >>> a >>> non-multiarch test. Why? >>> >> >> I'll answer here about why this test is arch specific, and not a multi arch. >> >> The problem I met is that all target architecture do not have native >> 64/128 bits types, and depending how code is compiled with gcc, you >> may or not generated expected vector instructions for 128bits >> operation. Same for atomic operations. > > So we do handle this with the sha512 test, building variants of it with > various compiler flags to trigger the use of vectors. So the code is > multiarch but we have arch specific variants as dictated by the > Makefiles, i.e.: > > sha512-sve: CFLAGS=-O3 -march=armv8.1-a+sve > sha512-sve: sha512.c > $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS) > > TESTS += sha512-sve > I suspect this is gonna need several iterations to try all arch, and see which ones have native 64/128 bits and which ones have atomic instructions. Is that really where we want to spend our (precious) time? I'm not convinced of the value of that. We try to test plugins implementation, not how QEMU handles memory accesses in general. The specificity of this test, is what we don't test the correct output of a program, but we observe an expected behavior, via the plugins trace. So it's a bit different from sha512 example. >> Thus, I chose to specialize this test for x86_64, and use sse2 >> directly for 128 bits integers. >> >> You might say "How about adding ifdef for this". Yes sure, but the >> check script would become complicated too, and I just wanted to keep >> it simple. > > We can keep the check-script per arch if we have to. > I would add a target arch param, but not duplicate this to be honest. Will be a pain to update if needed. My goal was to replace this with LLVM filecheck in a following series. >> Our interest here is not to check that memory accesses are >> correctly implemented in QEMU, but to check that a specific behavior >> on one arch is the one expected. > > So the problem with not being multiarch is we don't build all targets in > all combinations. To limit CI time we often build a subset and now this > particular subset won't test the plugin paths. > Ok. Is linux-user-x86_64 frequently skipped? I could add support for aarch64 too, if you think it's worth it. I suspect we always have at least one of the two arch that is running in CI. >> >> Does it make more sense for you? >> > <snip> >
On 7/12/24 10:14, Pierrick Bouvier wrote: > On 7/12/24 07:51, Alex Bennée wrote: >> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes: >> >>> On 7/8/24 12:15, Alex Bennée wrote: >>>> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes: >>>> >>>>> Add an explicit test to check expected memory values are read/written. >>>>> For sizes 8, 16, 32, 64 and 128, we generate a load/store operation. >>>>> For size 8 -> 64, we generate an atomic __sync_val_compare_and_swap too. >>>>> For 128bits memory access, we rely on SSE2 instructions. >>>>> >>>>> By default, atomic accesses are non atomic if a single cpu is running, >>>>> so we force creation of a second one by creating a new thread first. >>>>> >>>>> load/store helpers code path can't be triggered easily in user mode (no >>>>> softmmu), so we can't test it here. >>>>> >>>>> Can be run with: >>>>> make -C build/tests/tcg/x86_64-linux-user run-plugin-test-plugin-mem-access-with-libmem.so >>>>> >>>>> Tested-by: Xingtao Yao <yaoxt.fnst@fujitsu.com> >>>>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org> >>>>> --- >>>>> tests/tcg/x86_64/test-plugin-mem-access.c | 89 +++++++++++++++++++++ >>>>> tests/tcg/x86_64/Makefile.target | 7 ++ >>>>> tests/tcg/x86_64/check-plugin-mem-access.sh | 48 +++++++++++ >>>>> 3 files changed, 144 insertions(+) >>>>> create mode 100644 tests/tcg/x86_64/test-plugin-mem-access.c >>>>> create mode 100755 tests/tcg/x86_64/check-plugin-mem-access.sh >>>>> >>>>> diff --git a/tests/tcg/x86_64/test-plugin-mem-access.c b/tests/tcg/x86_64/test-plugin-mem-access.c >>>>> new file mode 100644 >>>>> index 00000000000..7fdd6a55829 >>>>> --- /dev/null >>>>> +++ b/tests/tcg/x86_64/test-plugin-mem-access.c >>>>> @@ -0,0 +1,89 @@ >>>>> +#include <emmintrin.h> >>>>> +#include <pthread.h> >>>>> +#include <stdint.h> >>>>> +#include <stdlib.h> >>>>> + >>>>> +static void *data; >>>>> + >>>>> +#define DEFINE_STORE(name, type, value) \ >>>>> +static void store_##name(void) \ >>>>> +{ \ >>>>> + *((type *)data) = value; \ >>>>> +} >>>>> + >>>>> +#define DEFINE_ATOMIC_OP(name, type, value) \ >>>>> +static void atomic_op_##name(void) \ >>>>> +{ \ >>>>> + *((type *)data) = 0x42; \ >>>>> + __sync_val_compare_and_swap((type *)data, 0x42, value); \ >>>> Should we exercise the other compare and swap ops? Do they all come >>>> through the same rwm path? >>>> >>> >>> There are definitely several paths depending on the generated atomic op. >>> However, the code is pretty straightforward (and implemented in a >>> single function), so my thought was that one way to trigger this was >>> enough. >> >> If they all come through the same path I guess that's OK. >> >>>>> +} >>>>> + >>>>> +#define DEFINE_LOAD(name, type) \ >>>>> +static void load_##name(void) \ >>>>> +{ \ >>>>> + register type var asm("eax") = *((type *) data); \ >>>>> + (void)var; \ >>>> This is a bit weird. It's the only inline asm needed that makes this >>>> a >>>> non-multiarch test. Why? >>>> >>> >>> I'll answer here about why this test is arch specific, and not a multi arch. >>> >>> The problem I met is that all target architecture do not have native >>> 64/128 bits types, and depending how code is compiled with gcc, you >>> may or not generated expected vector instructions for 128bits >>> operation. Same for atomic operations. >> >> So we do handle this with the sha512 test, building variants of it with >> various compiler flags to trigger the use of vectors. So the code is >> multiarch but we have arch specific variants as dictated by the >> Makefiles, i.e.: >> >> sha512-sve: CFLAGS=-O3 -march=armv8.1-a+sve >> sha512-sve: sha512.c >> $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS) >> >> TESTS += sha512-sve >> > > I suspect this is gonna need several iterations to try all arch, and see > which ones have native 64/128 bits and which ones have atomic > instructions. Is that really where we want to spend our (precious) time? > I'm not convinced of the value of that. We try to test plugins > implementation, not how QEMU handles memory accesses in general. > > The specificity of this test, is what we don't test the correct output > of a program, but we observe an expected behavior, via the plugins > trace. So it's a bit different from sha512 example. > >>> Thus, I chose to specialize this test for x86_64, and use sse2 >>> directly for 128 bits integers. >>> >>> You might say "How about adding ifdef for this". Yes sure, but the >>> check script would become complicated too, and I just wanted to keep >>> it simple. >> >> We can keep the check-script per arch if we have to. >> > > I would add a target arch param, but not duplicate this to be honest. > Will be a pain to update if needed. > My goal was to replace this with LLVM filecheck in a following series. > >>> Our interest here is not to check that memory accesses are >>> correctly implemented in QEMU, but to check that a specific behavior >>> on one arch is the one expected. >> >> So the problem with not being multiarch is we don't build all targets in >> all combinations. To limit CI time we often build a subset and now this >> particular subset won't test the plugin paths. >> > > Ok. Is linux-user-x86_64 frequently skipped? > I could add support for aarch64 too, if you think it's worth it. I > suspect we always have at least one of the two arch that is running in CI. > >>> >>> Does it make more sense for you? >>> >> <snip> >> After discussing with Alex, our goal will be to have a multiarch test. I'll do this for the next version.
diff --git a/tests/tcg/x86_64/test-plugin-mem-access.c b/tests/tcg/x86_64/test-plugin-mem-access.c new file mode 100644 index 00000000000..7fdd6a55829 --- /dev/null +++ b/tests/tcg/x86_64/test-plugin-mem-access.c @@ -0,0 +1,89 @@ +#include <emmintrin.h> +#include <pthread.h> +#include <stdint.h> +#include <stdlib.h> + +static void *data; + +#define DEFINE_STORE(name, type, value) \ +static void store_##name(void) \ +{ \ + *((type *)data) = value; \ +} + +#define DEFINE_ATOMIC_OP(name, type, value) \ +static void atomic_op_##name(void) \ +{ \ + *((type *)data) = 0x42; \ + __sync_val_compare_and_swap((type *)data, 0x42, value); \ +} + +#define DEFINE_LOAD(name, type) \ +static void load_##name(void) \ +{ \ + register type var asm("eax") = *((type *) data); \ + (void)var; \ +} + +DEFINE_STORE(u8, uint8_t, 0xf1) +DEFINE_ATOMIC_OP(u8, uint8_t, 0xf1) +DEFINE_LOAD(u8, uint8_t) +DEFINE_STORE(u16, uint16_t, 0xf123) +DEFINE_ATOMIC_OP(u16, uint16_t, 0xf123) +DEFINE_LOAD(u16, uint16_t) +DEFINE_STORE(u32, uint32_t, 0xff112233) +DEFINE_ATOMIC_OP(u32, uint32_t, 0xff112233) +DEFINE_LOAD(u32, uint32_t) +DEFINE_STORE(u64, uint64_t, 0xf123456789abcdef) +DEFINE_ATOMIC_OP(u64, uint64_t, 0xf123456789abcdef) +DEFINE_LOAD(u64, uint64_t) + +static void store_u128(void) +{ + _mm_store_si128(data, _mm_set_epi32(0xf1223344, 0x55667788, + 0xf1234567, 0x89abcdef)); +} + +static void load_u128(void) +{ + __m128i var = _mm_load_si128(data); + (void)var; +} + +static void *f(void *p) +{ + return NULL; +} + +int main(void) +{ + /* + * We force creation of a second thread to enable cpu flag CF_PARALLEL. + * This will generate atomic operations when needed. + */ + pthread_t thread; + pthread_create(&thread, NULL, &f, NULL); + pthread_join(thread, NULL); + + data = malloc(sizeof(__m128i)); + atomic_op_u8(); + store_u8(); + load_u8(); + + atomic_op_u16(); + store_u16(); + load_u16(); + + atomic_op_u32(); + store_u32(); + load_u32(); + + atomic_op_u64(); + store_u64(); + load_u64(); + + store_u128(); + load_u128(); + + free(data); +} diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target index eda9bd7396c..3edc29b924d 100644 --- a/tests/tcg/x86_64/Makefile.target +++ b/tests/tcg/x86_64/Makefile.target @@ -16,6 +16,7 @@ X86_64_TESTS += noexec X86_64_TESTS += cmpxchg X86_64_TESTS += adox X86_64_TESTS += test-1648 +PLUGINS_TESTS += test-plugin-mem-access TESTS=$(MULTIARCH_TESTS) $(X86_64_TESTS) test-x86_64 else TESTS=$(MULTIARCH_TESTS) @@ -26,6 +27,12 @@ adox: CFLAGS=-O2 run-test-i386-ssse3: QEMU_OPTS += -cpu max run-plugin-test-i386-ssse3-%: QEMU_OPTS += -cpu max +run-plugin-test-plugin-mem-access-with-libmem.so: \ + PLUGIN_ARGS=$(COMMA)print-accesses=true +run-plugin-test-plugin-mem-access-with-libmem.so: \ + CHECK_PLUGIN_OUTPUT_COMMAND= \ + $(SRC_PATH)/tests/tcg/x86_64/check-plugin-mem-access.sh + test-x86_64: LDFLAGS+=-lm -lc test-x86_64: test-i386.c test-i386.h test-i386-shift.h test-i386-muldiv.h $(CC) $(CFLAGS) $< -o $@ $(LDFLAGS) diff --git a/tests/tcg/x86_64/check-plugin-mem-access.sh b/tests/tcg/x86_64/check-plugin-mem-access.sh new file mode 100755 index 00000000000..163f1cfad34 --- /dev/null +++ b/tests/tcg/x86_64/check-plugin-mem-access.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +set -euo pipefail + +die() +{ + echo "$@" 1>&2 + exit 1 +} + +check() +{ + file=$1 + pattern=$2 + grep "$pattern" "$file" > /dev/null || die "\"$pattern\" not found in $file" +} + +[ $# -eq 1 ] || die "usage: plugin_out_file" + +plugin_out=$1 + +expected() +{ + cat << EOF +,store_u8,.*,8,store,0xf1 +,atomic_op_u8,.*,8,load,0x42 +,atomic_op_u8,.*,8,store,0xf1 +,load_u8,.*,8,load,0xf1 +,store_u16,.*,16,store,0xf123 +,atomic_op_u16,.*,16,load,0x0042 +,atomic_op_u16,.*,16,store,0xf123 +,load_u16,.*,16,load,0xf123 +,store_u32,.*,32,store,0xff112233 +,atomic_op_u32,.*,32,load,0x00000042 +,atomic_op_u32,.*,32,store,0xff112233 +,load_u32,.*,32,load,0xff112233 +,store_u64,.*,64,store,0xf123456789abcdef +,atomic_op_u64,.*,64,load,0x0000000000000042 +,atomic_op_u64,.*,64,store,0xf123456789abcdef +,load_u64,.*,64,load,0xf123456789abcdef +,store_u128,.*,128,store,0xf122334455667788f123456789abcdef +,load_u128,.*,128,load,0xf122334455667788f123456789abcdef +EOF +} + +expected | while read line; do + check "$plugin_out" "$line" +done