Message ID | 20201021105035.2477784-5-f4bug@amsat.org |
---|---|
State | Accepted |
Commit | ca8224492854a2930d0cadc76e715bf59582bf66 |
Headers | show |
Series | tests/acceptance: Test the Fuloong 2E machine | expand |
On 21/10/2020 12.50, Philippe Mathieu-Daudé wrote: > We are going to reuse the tesseract OCR code. > Create a new tesseract_ocr() helper and use it. > > Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org> > --- > tests/acceptance/machine_m68k_nextcube.py | 21 +++++---------------- > tests/acceptance/tesseract_utils.py | 18 ++++++++++++++++++ > 2 files changed, 23 insertions(+), 16 deletions(-) > > diff --git a/tests/acceptance/machine_m68k_nextcube.py b/tests/acceptance/machine_m68k_nextcube.py > index 3c7400c43e4..09e2745cc52 100644 > --- a/tests/acceptance/machine_m68k_nextcube.py > +++ b/tests/acceptance/machine_m68k_nextcube.py > @@ -7,13 +7,11 @@ > > import os > import time > -import logging > > from avocado_qemu import Test > from avocado import skipUnless > -from avocado.utils import process > > -from tesseract_utils import tesseract_available > +from tesseract_utils import tesseract_available, tesseract_ocr > > PIL_AVAILABLE = True > try: > @@ -61,12 +59,8 @@ def test_bootrom_framebuffer_size(self): > def test_bootrom_framebuffer_ocr_with_tesseract_v3(self): > screenshot_path = os.path.join(self.workdir, "dump.ppm") > self.check_bootrom_framebuffer(screenshot_path) > - > - console_logger = logging.getLogger('console') > - text = process.run("tesseract %s stdout" % screenshot_path).stdout_text > - for line in text.split('\n'): > - if len(line): > - console_logger.debug(line) > + lines = tesseract_ocr(screenshot_path, tesseract_version=3) > + text = '\n'.join(lines) > self.assertIn('Backplane', text) > self.assertIn('Ethernet address', text) > > @@ -77,13 +71,8 @@ def test_bootrom_framebuffer_ocr_with_tesseract_v3(self): > def test_bootrom_framebuffer_ocr_with_tesseract_v4(self): > screenshot_path = os.path.join(self.workdir, "dump.ppm") > self.check_bootrom_framebuffer(screenshot_path) > - > - console_logger = logging.getLogger('console') > - proc = process.run("tesseract --oem 1 %s stdout" % screenshot_path) > - text = proc.stdout_text > - for line in text.split('\n'): > - if len(line): > - console_logger.debug(line) > + lines = tesseract_ocr(screenshot_path, tesseract_version=4) > + text = '\n'.join(lines) > self.assertIn('Testing the FPU, SCC', text) > self.assertIn('System test failed. Error code', text) > self.assertIn('Boot command', text) > diff --git a/tests/acceptance/tesseract_utils.py b/tests/acceptance/tesseract_utils.py > index acd6e8c2faa..72cd9ab7989 100644 > --- a/tests/acceptance/tesseract_utils.py > +++ b/tests/acceptance/tesseract_utils.py > @@ -6,7 +6,9 @@ > # later. See the COPYING file in the top-level directory. > > import re > +import logging > > +from avocado.utils import process > from avocado.utils.path import find_command, CmdNotFoundError > > def tesseract_available(expected_version): > @@ -26,3 +28,19 @@ def tesseract_available(expected_version): > return False > # now this is guaranteed to be a digit > return int(match.groups()[0]) == expected_version > + > + > +def tesseract_ocr(image_path, tesseract_args='', tesseract_version=3): > + console_logger = logging.getLogger('tesseract') > + console_logger.debug(image_path) > + if tesseract_version == 4: > + tesseract_args += ' --oem 1' > + proc = process.run("tesseract {} {} stdout".format(tesseract_args, > + image_path)) > + lines = [] > + for line in proc.stdout_text.split('\n'): > + sline = line.strip() > + if len(sline): > + console_logger.debug(sline) > + lines += [sline] > + return lines Would it make sense to completely hide the tesseract version handling in this new tesseract_utils.py file now, so that the tests themselves do not have to worry about this anymore (i.e. would it be possible to merge test_bootrom_framebuffer_ocr_with_tesseract_v3 and test_bootrom_framebuffer_ocr_with_tesseract_v4 into one single test that way?) Thomas
On 24/10/2020 08.35, Thomas Huth wrote: > On 21/10/2020 12.50, Philippe Mathieu-Daudé wrote: >> We are going to reuse the tesseract OCR code. >> Create a new tesseract_ocr() helper and use it. >> >> Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org> >> --- >> tests/acceptance/machine_m68k_nextcube.py | 21 +++++---------------- >> tests/acceptance/tesseract_utils.py | 18 ++++++++++++++++++ >> 2 files changed, 23 insertions(+), 16 deletions(-) >> >> diff --git a/tests/acceptance/machine_m68k_nextcube.py b/tests/acceptance/machine_m68k_nextcube.py >> index 3c7400c43e4..09e2745cc52 100644 >> --- a/tests/acceptance/machine_m68k_nextcube.py >> +++ b/tests/acceptance/machine_m68k_nextcube.py >> @@ -7,13 +7,11 @@ >> >> import os >> import time >> -import logging >> >> from avocado_qemu import Test >> from avocado import skipUnless >> -from avocado.utils import process >> >> -from tesseract_utils import tesseract_available >> +from tesseract_utils import tesseract_available, tesseract_ocr >> >> PIL_AVAILABLE = True >> try: >> @@ -61,12 +59,8 @@ def test_bootrom_framebuffer_size(self): >> def test_bootrom_framebuffer_ocr_with_tesseract_v3(self): >> screenshot_path = os.path.join(self.workdir, "dump.ppm") >> self.check_bootrom_framebuffer(screenshot_path) >> - >> - console_logger = logging.getLogger('console') >> - text = process.run("tesseract %s stdout" % screenshot_path).stdout_text >> - for line in text.split('\n'): >> - if len(line): >> - console_logger.debug(line) >> + lines = tesseract_ocr(screenshot_path, tesseract_version=3) >> + text = '\n'.join(lines) >> self.assertIn('Backplane', text) >> self.assertIn('Ethernet address', text) >> >> @@ -77,13 +71,8 @@ def test_bootrom_framebuffer_ocr_with_tesseract_v3(self): >> def test_bootrom_framebuffer_ocr_with_tesseract_v4(self): >> screenshot_path = os.path.join(self.workdir, "dump.ppm") >> self.check_bootrom_framebuffer(screenshot_path) >> - >> - console_logger = logging.getLogger('console') >> - proc = process.run("tesseract --oem 1 %s stdout" % screenshot_path) >> - text = proc.stdout_text >> - for line in text.split('\n'): >> - if len(line): >> - console_logger.debug(line) >> + lines = tesseract_ocr(screenshot_path, tesseract_version=4) >> + text = '\n'.join(lines) >> self.assertIn('Testing the FPU, SCC', text) >> self.assertIn('System test failed. Error code', text) >> self.assertIn('Boot command', text) >> diff --git a/tests/acceptance/tesseract_utils.py b/tests/acceptance/tesseract_utils.py >> index acd6e8c2faa..72cd9ab7989 100644 >> --- a/tests/acceptance/tesseract_utils.py >> +++ b/tests/acceptance/tesseract_utils.py >> @@ -6,7 +6,9 @@ >> # later. See the COPYING file in the top-level directory. >> >> import re >> +import logging >> >> +from avocado.utils import process >> from avocado.utils.path import find_command, CmdNotFoundError >> >> def tesseract_available(expected_version): >> @@ -26,3 +28,19 @@ def tesseract_available(expected_version): >> return False >> # now this is guaranteed to be a digit >> return int(match.groups()[0]) == expected_version >> + >> + >> +def tesseract_ocr(image_path, tesseract_args='', tesseract_version=3): >> + console_logger = logging.getLogger('tesseract') >> + console_logger.debug(image_path) >> + if tesseract_version == 4: >> + tesseract_args += ' --oem 1' >> + proc = process.run("tesseract {} {} stdout".format(tesseract_args, >> + image_path)) >> + lines = [] >> + for line in proc.stdout_text.split('\n'): >> + sline = line.strip() >> + if len(sline): >> + console_logger.debug(sline) >> + lines += [sline] >> + return lines > > Would it make sense to completely hide the tesseract version handling in > this new tesseract_utils.py file now, so that the tests themselves do not > have to worry about this anymore (i.e. would it be possible to merge > test_bootrom_framebuffer_ocr_with_tesseract_v3 and > test_bootrom_framebuffer_ocr_with_tesseract_v4 into one single test that way?) If I've got that right, there is also now a proper release 4 of Tesseract, so maybe we can simply scratch the testing with version 3 now? Thomas
On 10/24/20 8:40 AM, Thomas Huth wrote: > On 24/10/2020 08.35, Thomas Huth wrote: >> On 21/10/2020 12.50, Philippe Mathieu-Daudé wrote: >>> We are going to reuse the tesseract OCR code. >>> Create a new tesseract_ocr() helper and use it. >>> >>> Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org> >>> --- >>> tests/acceptance/machine_m68k_nextcube.py | 21 +++++---------------- >>> tests/acceptance/tesseract_utils.py | 18 ++++++++++++++++++ >>> 2 files changed, 23 insertions(+), 16 deletions(-) >>> >>> diff --git a/tests/acceptance/machine_m68k_nextcube.py b/tests/acceptance/machine_m68k_nextcube.py >>> index 3c7400c43e4..09e2745cc52 100644 >>> --- a/tests/acceptance/machine_m68k_nextcube.py >>> +++ b/tests/acceptance/machine_m68k_nextcube.py >>> @@ -7,13 +7,11 @@ >>> >>> import os >>> import time >>> -import logging >>> >>> from avocado_qemu import Test >>> from avocado import skipUnless >>> -from avocado.utils import process >>> >>> -from tesseract_utils import tesseract_available >>> +from tesseract_utils import tesseract_available, tesseract_ocr >>> >>> PIL_AVAILABLE = True >>> try: >>> @@ -61,12 +59,8 @@ def test_bootrom_framebuffer_size(self): >>> def test_bootrom_framebuffer_ocr_with_tesseract_v3(self): >>> screenshot_path = os.path.join(self.workdir, "dump.ppm") >>> self.check_bootrom_framebuffer(screenshot_path) >>> - >>> - console_logger = logging.getLogger('console') >>> - text = process.run("tesseract %s stdout" % screenshot_path).stdout_text >>> - for line in text.split('\n'): >>> - if len(line): >>> - console_logger.debug(line) >>> + lines = tesseract_ocr(screenshot_path, tesseract_version=3) >>> + text = '\n'.join(lines) >>> self.assertIn('Backplane', text) >>> self.assertIn('Ethernet address', text) >>> >>> @@ -77,13 +71,8 @@ def test_bootrom_framebuffer_ocr_with_tesseract_v3(self): >>> def test_bootrom_framebuffer_ocr_with_tesseract_v4(self): >>> screenshot_path = os.path.join(self.workdir, "dump.ppm") >>> self.check_bootrom_framebuffer(screenshot_path) >>> - >>> - console_logger = logging.getLogger('console') >>> - proc = process.run("tesseract --oem 1 %s stdout" % screenshot_path) >>> - text = proc.stdout_text >>> - for line in text.split('\n'): >>> - if len(line): >>> - console_logger.debug(line) >>> + lines = tesseract_ocr(screenshot_path, tesseract_version=4) >>> + text = '\n'.join(lines) >>> self.assertIn('Testing the FPU, SCC', text) >>> self.assertIn('System test failed. Error code', text) >>> self.assertIn('Boot command', text) >>> diff --git a/tests/acceptance/tesseract_utils.py b/tests/acceptance/tesseract_utils.py >>> index acd6e8c2faa..72cd9ab7989 100644 >>> --- a/tests/acceptance/tesseract_utils.py >>> +++ b/tests/acceptance/tesseract_utils.py >>> @@ -6,7 +6,9 @@ >>> # later. See the COPYING file in the top-level directory. >>> >>> import re >>> +import logging >>> >>> +from avocado.utils import process >>> from avocado.utils.path import find_command, CmdNotFoundError >>> >>> def tesseract_available(expected_version): >>> @@ -26,3 +28,19 @@ def tesseract_available(expected_version): >>> return False >>> # now this is guaranteed to be a digit >>> return int(match.groups()[0]) == expected_version >>> + >>> + >>> +def tesseract_ocr(image_path, tesseract_args='', tesseract_version=3): >>> + console_logger = logging.getLogger('tesseract') >>> + console_logger.debug(image_path) >>> + if tesseract_version == 4: >>> + tesseract_args += ' --oem 1' >>> + proc = process.run("tesseract {} {} stdout".format(tesseract_args, >>> + image_path)) >>> + lines = [] >>> + for line in proc.stdout_text.split('\n'): >>> + sline = line.strip() >>> + if len(sline): >>> + console_logger.debug(sline) >>> + lines += [sline] >>> + return lines >> >> Would it make sense to completely hide the tesseract version handling in >> this new tesseract_utils.py file now, so that the tests themselves do not >> have to worry about this anymore Yes, good idea. > (i.e. would it be possible to merge >> test_bootrom_framebuffer_ocr_with_tesseract_v3 and >> test_bootrom_framebuffer_ocr_with_tesseract_v4 into one single test that way?) > > If I've got that right, there is also now a proper release 4 of Tesseract, > so maybe we can simply scratch the testing with version 3 now? Good to know, I'll have a look. Thanks! > > Thomas >
On 10/24/20 9:37 AM, Philippe Mathieu-Daudé wrote: > On 10/24/20 8:40 AM, Thomas Huth wrote: >> On 24/10/2020 08.35, Thomas Huth wrote: >>> On 21/10/2020 12.50, Philippe Mathieu-Daudé wrote: >>>> We are going to reuse the tesseract OCR code. >>>> Create a new tesseract_ocr() helper and use it. >>>> >>>> Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org> >>>> --- >>>> tests/acceptance/machine_m68k_nextcube.py | 21 +++++---------------- >>>> tests/acceptance/tesseract_utils.py | 18 ++++++++++++++++++ >>>> 2 files changed, 23 insertions(+), 16 deletions(-) ... >>> >>> Would it make sense to completely hide the tesseract version handling in >>> this new tesseract_utils.py file now, so that the tests themselves do >>> not >>> have to worry about this anymore The problem is the recognized strings differ between versions, see in tests/acceptance/machine_m68k_nextcube.py: lines = tesseract_ocr(screenshot_path, tesseract_version=3) text = '\n'.join(lines) self.assertIn('Backplane', text) self.assertIn('Ethernet address', text) and: lines = tesseract_ocr(screenshot_path, tesseract_version=4) text = '\n'.join(lines) self.assertIn('Testing the FPU, SCC', text) self.assertIn('System test failed. Error code', text) self.assertIn('Boot command', text) self.assertIn('Next>', text) > > Yes, good idea. > >> (i.e. would it be possible to merge >>> test_bootrom_framebuffer_ocr_with_tesseract_v3 and >>> test_bootrom_framebuffer_ocr_with_tesseract_v4 into one single test >>> that way?) >> >> If I've got that right, there is also now a proper release 4 of >> Tesseract, >> so maybe we can simply scratch the testing with version 3 now? > > Good to know, I'll have a look. Thanks! > >> >> Thomas >> >
On 24/10/2020 19.40, Philippe Mathieu-Daudé wrote: > On 10/24/20 9:37 AM, Philippe Mathieu-Daudé wrote: >> On 10/24/20 8:40 AM, Thomas Huth wrote: >>> On 24/10/2020 08.35, Thomas Huth wrote: >>>> On 21/10/2020 12.50, Philippe Mathieu-Daudé wrote: >>>>> We are going to reuse the tesseract OCR code. >>>>> Create a new tesseract_ocr() helper and use it. >>>>> >>>>> Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org> >>>>> --- >>>>> tests/acceptance/machine_m68k_nextcube.py | 21 +++++---------------- >>>>> tests/acceptance/tesseract_utils.py | 18 ++++++++++++++++++ >>>>> 2 files changed, 23 insertions(+), 16 deletions(-) > ... > >>>> >>>> Would it make sense to completely hide the tesseract version handling in >>>> this new tesseract_utils.py file now, so that the tests themselves do not >>>> have to worry about this anymore > > The problem is the recognized strings differ between versions, > see in tests/acceptance/machine_m68k_nextcube.py: > > lines = tesseract_ocr(screenshot_path, tesseract_version=3) > text = '\n'.join(lines) > self.assertIn('Backplane', text) > self.assertIn('Ethernet address', text) > > and: > > lines = tesseract_ocr(screenshot_path, tesseract_version=4) > text = '\n'.join(lines) > self.assertIn('Testing the FPU, SCC', text) > self.assertIn('System test failed. Error code', text) > self.assertIn('Boot command', text) > self.assertIn('Next>', text) Ah, right, I forgot about that ... well, one more reason to completely switch to tesseract v4 now ;-) Thomas
diff --git a/tests/acceptance/machine_m68k_nextcube.py b/tests/acceptance/machine_m68k_nextcube.py index 3c7400c43e4..09e2745cc52 100644 --- a/tests/acceptance/machine_m68k_nextcube.py +++ b/tests/acceptance/machine_m68k_nextcube.py @@ -7,13 +7,11 @@ import os import time -import logging from avocado_qemu import Test from avocado import skipUnless -from avocado.utils import process -from tesseract_utils import tesseract_available +from tesseract_utils import tesseract_available, tesseract_ocr PIL_AVAILABLE = True try: @@ -61,12 +59,8 @@ def test_bootrom_framebuffer_size(self): def test_bootrom_framebuffer_ocr_with_tesseract_v3(self): screenshot_path = os.path.join(self.workdir, "dump.ppm") self.check_bootrom_framebuffer(screenshot_path) - - console_logger = logging.getLogger('console') - text = process.run("tesseract %s stdout" % screenshot_path).stdout_text - for line in text.split('\n'): - if len(line): - console_logger.debug(line) + lines = tesseract_ocr(screenshot_path, tesseract_version=3) + text = '\n'.join(lines) self.assertIn('Backplane', text) self.assertIn('Ethernet address', text) @@ -77,13 +71,8 @@ def test_bootrom_framebuffer_ocr_with_tesseract_v3(self): def test_bootrom_framebuffer_ocr_with_tesseract_v4(self): screenshot_path = os.path.join(self.workdir, "dump.ppm") self.check_bootrom_framebuffer(screenshot_path) - - console_logger = logging.getLogger('console') - proc = process.run("tesseract --oem 1 %s stdout" % screenshot_path) - text = proc.stdout_text - for line in text.split('\n'): - if len(line): - console_logger.debug(line) + lines = tesseract_ocr(screenshot_path, tesseract_version=4) + text = '\n'.join(lines) self.assertIn('Testing the FPU, SCC', text) self.assertIn('System test failed. Error code', text) self.assertIn('Boot command', text) diff --git a/tests/acceptance/tesseract_utils.py b/tests/acceptance/tesseract_utils.py index acd6e8c2faa..72cd9ab7989 100644 --- a/tests/acceptance/tesseract_utils.py +++ b/tests/acceptance/tesseract_utils.py @@ -6,7 +6,9 @@ # later. See the COPYING file in the top-level directory. import re +import logging +from avocado.utils import process from avocado.utils.path import find_command, CmdNotFoundError def tesseract_available(expected_version): @@ -26,3 +28,19 @@ def tesseract_available(expected_version): return False # now this is guaranteed to be a digit return int(match.groups()[0]) == expected_version + + +def tesseract_ocr(image_path, tesseract_args='', tesseract_version=3): + console_logger = logging.getLogger('tesseract') + console_logger.debug(image_path) + if tesseract_version == 4: + tesseract_args += ' --oem 1' + proc = process.run("tesseract {} {} stdout".format(tesseract_args, + image_path)) + lines = [] + for line in proc.stdout_text.split('\n'): + sline = line.strip() + if len(sline): + console_logger.debug(sline) + lines += [sline] + return lines
We are going to reuse the tesseract OCR code. Create a new tesseract_ocr() helper and use it. Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org> --- tests/acceptance/machine_m68k_nextcube.py | 21 +++++---------------- tests/acceptance/tesseract_utils.py | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 16 deletions(-)