Message ID | 20200701135652.1366-29-alex.bennee@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | testing/next (vm, gitlab, fixes) | expand |
On 7/1/20 3:56 PM, Alex Bennée wrote: > For some reason these tests fail all the time on GitLab. I can > re-create the hang around 3% of the time locally but it doesn't seem > to be MTTCG related. For now skipIf on GITLAB_CI. > > Signed-off-by: Alex Bennée <alex.bennee@linaro.org> > Cc: Aleksandar Markovic <aleksandar.qemu.devel@gmail.com> > --- > tests/acceptance/machine_mips_malta.py | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/tests/acceptance/machine_mips_malta.py b/tests/acceptance/machine_mips_malta.py > index 92b4f28a112..7c9a4ee4d2d 100644 > --- a/tests/acceptance/machine_mips_malta.py > +++ b/tests/acceptance/machine_mips_malta.py > @@ -15,6 +15,7 @@ from avocado import skipUnless > from avocado_qemu import Test > from avocado_qemu import wait_for_console_pattern > from avocado.utils import archive > +from avocado import skipIf > > > NUMPY_AVAILABLE = True > @@ -99,6 +100,7 @@ class MaltaMachineFramebuffer(Test): > """ > self.do_test_i6400_framebuffer_logo(1) > So the test works using a single core... Good we have a test to figure the bug! Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> > + @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab') > def test_mips_malta_i6400_framebuffer_logo_7cores(self): > """ > :avocado: tags=arch:mips64el > @@ -108,6 +110,7 @@ class MaltaMachineFramebuffer(Test): > """ > self.do_test_i6400_framebuffer_logo(7) > > + @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab') > def test_mips_malta_i6400_framebuffer_logo_8cores(self): > """ > :avocado: tags=arch:mips64el >
Philippe Mathieu-Daudé <f4bug@amsat.org> writes: > On 7/1/20 3:56 PM, Alex Bennée wrote: >> For some reason these tests fail all the time on GitLab. I can >> re-create the hang around 3% of the time locally but it doesn't seem >> to be MTTCG related. For now skipIf on GITLAB_CI. >> >> Signed-off-by: Alex Bennée <alex.bennee@linaro.org> >> Cc: Aleksandar Markovic <aleksandar.qemu.devel@gmail.com> >> --- >> tests/acceptance/machine_mips_malta.py | 3 +++ >> 1 file changed, 3 insertions(+) >> >> diff --git a/tests/acceptance/machine_mips_malta.py b/tests/acceptance/machine_mips_malta.py >> index 92b4f28a112..7c9a4ee4d2d 100644 >> --- a/tests/acceptance/machine_mips_malta.py >> +++ b/tests/acceptance/machine_mips_malta.py >> @@ -15,6 +15,7 @@ from avocado import skipUnless >> from avocado_qemu import Test >> from avocado_qemu import wait_for_console_pattern >> from avocado.utils import archive >> +from avocado import skipIf >> >> >> NUMPY_AVAILABLE = True >> @@ -99,6 +100,7 @@ class MaltaMachineFramebuffer(Test): >> """ >> self.do_test_i6400_framebuffer_logo(1) >> > > So the test works using a single core... > Good we have a test to figure the bug! It's about a 1-3% failure rate on my big test box but hits every time on CI. However I did disable MTTCG and still saw failures so I think it's a more subtle breakage than just a straight race. > > Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> > >> + @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab') >> def test_mips_malta_i6400_framebuffer_logo_7cores(self): >> """ >> :avocado: tags=arch:mips64el >> @@ -108,6 +110,7 @@ class MaltaMachineFramebuffer(Test): >> """ >> self.do_test_i6400_framebuffer_logo(7) >> >> + @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab') >> def test_mips_malta_i6400_framebuffer_logo_8cores(self): >> """ >> :avocado: tags=arch:mips64el >> -- Alex Bennée
On 7/1/20 6:43 PM, Alex Bennée wrote: > > Philippe Mathieu-Daudé <f4bug@amsat.org> writes: > >> On 7/1/20 3:56 PM, Alex Bennée wrote: >>> For some reason these tests fail all the time on GitLab. I can >>> re-create the hang around 3% of the time locally but it doesn't seem >>> to be MTTCG related. For now skipIf on GITLAB_CI. >>> >>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org> >>> Cc: Aleksandar Markovic <aleksandar.qemu.devel@gmail.com> >>> --- >>> tests/acceptance/machine_mips_malta.py | 3 +++ >>> 1 file changed, 3 insertions(+) >>> >>> diff --git a/tests/acceptance/machine_mips_malta.py b/tests/acceptance/machine_mips_malta.py >>> index 92b4f28a112..7c9a4ee4d2d 100644 >>> --- a/tests/acceptance/machine_mips_malta.py >>> +++ b/tests/acceptance/machine_mips_malta.py >>> @@ -15,6 +15,7 @@ from avocado import skipUnless >>> from avocado_qemu import Test >>> from avocado_qemu import wait_for_console_pattern >>> from avocado.utils import archive >>> +from avocado import skipIf >>> >>> >>> NUMPY_AVAILABLE = True >>> @@ -99,6 +100,7 @@ class MaltaMachineFramebuffer(Test): >>> """ >>> self.do_test_i6400_framebuffer_logo(1) >>> >> >> So the test works using a single core... >> Good we have a test to figure the bug! > > It's about a 1-3% failure rate on my big test box but hits every time on > CI. However I did disable MTTCG and still saw failures so I think it's a > more subtle breakage than just a straight race. I first thought it was a MTTCG problem, but then I realized you didn't disable the single core test. When using >1 core, the malta uses a different device, the CPS for Coherent Processing System. It contains a Inter-Thread Communication Unit and a Global Interrupt Controller. There might be a I/O locking problem. In particular, some of these devices access the &cpu->env (the ITU is more of micro-architecture). This is why I was excited by your finding :) We might have a way to figure it out. > >> >> Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org> >> >>> + @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab') >>> def test_mips_malta_i6400_framebuffer_logo_7cores(self): >>> """ >>> :avocado: tags=arch:mips64el >>> @@ -108,6 +110,7 @@ class MaltaMachineFramebuffer(Test): >>> """ >>> self.do_test_i6400_framebuffer_logo(7) >>> >>> + @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab') >>> def test_mips_malta_i6400_framebuffer_logo_8cores(self): >>> """ >>> :avocado: tags=arch:mips64el >>> > >
On Wed, Jul 1, 2020 at 4:03 PM Alex Bennée <alex.bennee@linaro.org> wrote: > > For some reason these tests fail all the time on GitLab. I can > re-create the hang around 3% of the time locally but it doesn't seem > to be MTTCG related. For now skipIf on GITLAB_CI. > > Signed-off-by: Alex Bennée <alex.bennee@linaro.org> > Cc: Aleksandar Markovic <aleksandar.qemu.devel@gmail.com> > --- Alex, Thanks for having this test at all. I will review its content, but here is my stupid question: How can I, as a regular developer, repro the test in question? I am not familiar with GitLab at all. Thanks, Aleksandar > tests/acceptance/machine_mips_malta.py | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/tests/acceptance/machine_mips_malta.py b/tests/acceptance/machine_mips_malta.py > index 92b4f28a112..7c9a4ee4d2d 100644 > --- a/tests/acceptance/machine_mips_malta.py > +++ b/tests/acceptance/machine_mips_malta.py > @@ -15,6 +15,7 @@ from avocado import skipUnless > from avocado_qemu import Test > from avocado_qemu import wait_for_console_pattern > from avocado.utils import archive > +from avocado import skipIf > > > NUMPY_AVAILABLE = True > @@ -99,6 +100,7 @@ class MaltaMachineFramebuffer(Test): > """ > self.do_test_i6400_framebuffer_logo(1) > > + @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab') > def test_mips_malta_i6400_framebuffer_logo_7cores(self): > """ > :avocado: tags=arch:mips64el > @@ -108,6 +110,7 @@ class MaltaMachineFramebuffer(Test): > """ > self.do_test_i6400_framebuffer_logo(7) > > + @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab') > def test_mips_malta_i6400_framebuffer_logo_8cores(self): > """ > :avocado: tags=arch:mips64el > -- > 2.20.1 >
在 2020/7/2 1:01, Philippe Mathieu-Daudé 写道: > On 7/1/20 6:43 PM, Alex Bennée wrote: >> Philippe Mathieu-Daudé <f4bug@amsat.org> writes: >> >>> On 7/1/20 3:56 PM, Alex Bennée wrote: >>>> For some reason these tests fail all the time on GitLab. I can >>>> re-create the hang around 3% of the time locally but it doesn't seem >>>> to be MTTCG related. For now skipIf on GITLAB_CI. >>>> >>>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org> >>>> Cc: Aleksandar Markovic <aleksandar.qemu.devel@gmail.com> >>>> --- >>>> tests/acceptance/machine_mips_malta.py | 3 +++ >>>> 1 file changed, 3 insertions(+) >>>> >>>> diff --git a/tests/acceptance/machine_mips_malta.py b/tests/acceptance/machine_mips_malta.py >>>> index 92b4f28a112..7c9a4ee4d2d 100644 >>>> --- a/tests/acceptance/machine_mips_malta.py >>>> +++ b/tests/acceptance/machine_mips_malta.py >>>> @@ -15,6 +15,7 @@ from avocado import skipUnless >>>> from avocado_qemu import Test >>>> from avocado_qemu import wait_for_console_pattern >>>> from avocado.utils import archive >>>> +from avocado import skipIf >>>> >>>> >>>> NUMPY_AVAILABLE = True >>>> @@ -99,6 +100,7 @@ class MaltaMachineFramebuffer(Test): >>>> """ >>>> self.do_test_i6400_framebuffer_logo(1) >>>> >>> So the test works using a single core... >>> Good we have a test to figure the bug! >> It's about a 1-3% failure rate on my big test box but hits every time on >> CI. However I did disable MTTCG and still saw failures so I think it's a >> more subtle breakage than just a straight race. > I first thought it was a MTTCG problem, but then I realized you didn't > disable the single core test. When using >1 core, the malta uses a > different device, the CPS for Coherent Processing System. It contains > a Inter-Thread Communication Unit and a Global Interrupt Controller. > There might be a I/O locking problem. In particular, some of these > devices access the &cpu->env (the ITU is more of micro-architecture). > > This is why I was excited by your finding :) We might have a way > to figure it out. FYI: This issue seems relative with the performence of host machine. I can reproduce the issue in high frequency if I unplug my laptop from AC adapter (it will switch to powersave governor). So my first thought was it just because TCG runs too slow so cores failed to respond IPI timely. Thanks. - Jiaxun
Aleksandar Markovic <aleksandar.qemu.devel@gmail.com> writes: > On Wed, Jul 1, 2020 at 4:03 PM Alex Bennée <alex.bennee@linaro.org> wrote: >> >> For some reason these tests fail all the time on GitLab. I can >> re-create the hang around 3% of the time locally but it doesn't seem >> to be MTTCG related. For now skipIf on GITLAB_CI. >> >> Signed-off-by: Alex Bennée <alex.bennee@linaro.org> >> Cc: Aleksandar Markovic <aleksandar.qemu.devel@gmail.com> >> --- > > Alex, > > Thanks for having this test at all. I will review its content, but > here is my stupid question: > > How can I, as a regular developer, repro the test in question? I am > not familiar with GitLab at all. To run the acceptance tests you just run: make check-acceptance To run the individual test: ./tests/venv/bin/avocado run tests/acceptance/machine_mips_malta.py:MaltaMachineFramebuffer.test_mips_malta_i64 00_framebuffer_logo_8cores I used my retry.py script to just loop running the test for 100 times to calculate the failure rate. > > Thanks, > Aleksandar > >> tests/acceptance/machine_mips_malta.py | 3 +++ >> 1 file changed, 3 insertions(+) >> >> diff --git a/tests/acceptance/machine_mips_malta.py b/tests/acceptance/machine_mips_malta.py >> index 92b4f28a112..7c9a4ee4d2d 100644 >> --- a/tests/acceptance/machine_mips_malta.py >> +++ b/tests/acceptance/machine_mips_malta.py >> @@ -15,6 +15,7 @@ from avocado import skipUnless >> from avocado_qemu import Test >> from avocado_qemu import wait_for_console_pattern >> from avocado.utils import archive >> +from avocado import skipIf >> >> >> NUMPY_AVAILABLE = True >> @@ -99,6 +100,7 @@ class MaltaMachineFramebuffer(Test): >> """ >> self.do_test_i6400_framebuffer_logo(1) >> >> + @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab') >> def test_mips_malta_i6400_framebuffer_logo_7cores(self): >> """ >> :avocado: tags=arch:mips64el >> @@ -108,6 +110,7 @@ class MaltaMachineFramebuffer(Test): >> """ >> self.do_test_i6400_framebuffer_logo(7) >> >> + @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab') >> def test_mips_malta_i6400_framebuffer_logo_8cores(self): >> """ >> :avocado: tags=arch:mips64el >> -- >> 2.20.1 >> -- Alex Bennée
diff --git a/tests/acceptance/machine_mips_malta.py b/tests/acceptance/machine_mips_malta.py index 92b4f28a112..7c9a4ee4d2d 100644 --- a/tests/acceptance/machine_mips_malta.py +++ b/tests/acceptance/machine_mips_malta.py @@ -15,6 +15,7 @@ from avocado import skipUnless from avocado_qemu import Test from avocado_qemu import wait_for_console_pattern from avocado.utils import archive +from avocado import skipIf NUMPY_AVAILABLE = True @@ -99,6 +100,7 @@ class MaltaMachineFramebuffer(Test): """ self.do_test_i6400_framebuffer_logo(1) + @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab') def test_mips_malta_i6400_framebuffer_logo_7cores(self): """ :avocado: tags=arch:mips64el @@ -108,6 +110,7 @@ class MaltaMachineFramebuffer(Test): """ self.do_test_i6400_framebuffer_logo(7) + @skipIf(os.getenv('GITLAB_CI'), 'Running on GitLab') def test_mips_malta_i6400_framebuffer_logo_8cores(self): """ :avocado: tags=arch:mips64el
For some reason these tests fail all the time on GitLab. I can re-create the hang around 3% of the time locally but it doesn't seem to be MTTCG related. For now skipIf on GITLAB_CI. Signed-off-by: Alex Bennée <alex.bennee@linaro.org> Cc: Aleksandar Markovic <aleksandar.qemu.devel@gmail.com> --- tests/acceptance/machine_mips_malta.py | 3 +++ 1 file changed, 3 insertions(+) -- 2.20.1