updated local tests and added bash scripts to help automate testing.

2024-12-21 12:40:08 +00:00 · 2020-06-03 12:17:44 +00:00 · 2020-06-03 12:17:44 +00:00 · d2e869f05c
commit d2e869f05c
parent 0b0d14afd9
20 changed files with 405 additions and 104 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -0,0 +1 @@
+test export-ignore
--- a/.gitignore
+++ b/.gitignore
@ -14,19 +14,19 @@ CTestTestfile.cmake
 gmon.out
 install_manifest.txt
 Makefile
-pdf2htmlEX
-pdf2htmlEX.1
+pdf2htmlEX/build
+pdf2htmlEX/pdf2htmlEX.1
 *.pyc
-share/base.css
-share/base.min.css
-share/fancy.css
-share/fancy.min.css
-share/pdf2htmlEX.js
-share/pdf2htmlEX.min.js
-src/pdf2htmlEX-config.h
-src/util/css_const.h
-test export-ignore
+pdf2htmlEX/share/base.css
+pdf2htmlEX/share/base.min.css
+pdf2htmlEX/share/fancy.css
+pdf2htmlEX/share/fancy.min.css
+pdf2htmlEX/share/pdf2htmlEX.js
+pdf2htmlEX/share/pdf2htmlEX.min.js
+pdf2htmlEX/src/pdf2htmlEX-config.h
+pdf2htmlEX/src/util/css_const.h
 Testing/*
 DartConfiguration.tcl
-test/test.py
+pdf2htmlEX/test/test.py
+pdf2htmlEX/test/geckodriver.log
 *.swp
--- a/buildScripts/buildInstallLocally
+++ b/buildScripts/buildInstallLocally
@ -19,6 +19,10 @@ export PDF2HTMLEX_BRANCH="$(git rev-parse --abbrev-ref HEAD)"
 #
 export PDF2HTMLEX_PREFIX=/usr/local

+# Ensure all Apt packages are installed with no user interaction
+#
+export DEBIAN_FRONTEND=noninteractive
+
 set -ev

 ################
--- a/buildScripts/travisLinuxDoItAll
+++ b/buildScripts/travisLinuxDoItAll
@ -11,6 +11,10 @@ export PDF2HTMLEX_BRANCH="$(git rev-parse --abbrev-ref HEAD)"

 export PDF2HTMLEX_PREFIX=/usr/local

+# Ensure all Apt packages are installed with no user interaction
+#
+export DEBIAN_FRONTEND=noninteractive
+
 ################
 # do the build

--- a/pdf2htmlEX/test/README.md
+++ b/pdf2htmlEX/test/README.md
@ -1,3 +1,93 @@
+# pdf2htmlEX tests
+
+This directory contains a collection of python3 unittests of the output of 
+pdf2htmlEX.
+
+The graphical output of pdf2htmlEX can be tested both locally and remotely 
+using [Selenium](https://www.selenium.dev/) and the [Pillow Python Imaging 
+Library](https://python-pillow.org/).
+
+The browser tests use Selenium to take a screenshot of a FireFox browser's 
+rendering of the pdf2htmlEX output for a given pdf file and compares that 
+image to an image of the previously saved reference html. 
+
+## Tests which are currently failing:
+
+- **browser_tests/text_visibility** At the moment clipping has been broken 
+and needs to be fixed. Rerun `runLocalBrowserTests` and use the 
+`compareTestImages` for the `test_visibility` test to see the problem. 
+
+## Running tests
+
+There are three bash scripts which automate the running of a given 
+collection of tests: 
+
+1. **runLocalTests** runs a simple collection of tests which do not 
+require Selenium or a browser. 
+
+```
+  ./runLocalTests
+```
+
+2. **runLocalBrowserTests** runs a more complex collection of tests which 
+*require* Selenium, a FireFox browser, as well as a 'virtual frame buffer' 
+(Xvfb) to be installed. 
+
+```
+  ./runLocalBrowserTests
+```
+
+3. **runRemoteBrowserTests** runs the same complex collection of tests as 
+run by `runLocalBrowserTests` but this time using 'Sauce Connect'. (At the 
+moment this is not fully implemented or (re)tested) 
+
+```
+  ./runRemoteBrowserTests
+```
+
+In order to run these tests, you *must* have the correct testing software 
+installed locally. To do this you can run the command: 
+
+```
+  ./installAutomaticTestSoftware
+```
+
+## Understanding browser test failures
+
+If any of the automatic browser tests *fail* then you might want to 
+manually view the PNG images for a given test using the command: 
+
+```
+  ./compareTestImages <<testNam>>
+```
+
+This command opens the three PNG images associated with a given failed 
+test so that you can manually compare the new output (`*.out.png`), the 
+reference output (`*.ref.png`) and an image of the 'difference' between 
+the two images (`*.diff.png`). To pass, the 'difference' image must be 
+*completely* black. 
+
+Usually it will be obvious that the newer version of pdf2htmlEX has only 
+slightly moved various image elements. Any such tests can be made to pass 
+by updating the reference html using the tool:
+
+```
+  ./regenerateTestHtml <<testName>>
+```
+
+This command will regenerate the reference html for the specifed test.
+
+All of these manual comparison tools require additional software which can 
+be installed using the command: 
+
+```
+  ./installManualTestSoftware
+```
+
+--- 
+
+## OLD README contents:
+
 ### Dependencies

 - python2 and packages
--- a/pdf2htmlEX/test/browser_tests.py
+++ b/pdf2htmlEX/test/browser_tests.py
@ -5,6 +5,7 @@ import subprocess
 import shutil
 import unittest

+#from selenium.common.exceptions import WebDriverException
 from PIL import Image, ImageChops
 from test import Common

@ -30,7 +31,7 @@ class BrowserTests(Common):

    def run_test_case(self, filename, args=[], page_must_load=True):
        basefilename, extension = os.path.splitext(filename)
-        self.assertEquals(extension.lower(), '.pdf', 'Input file is not PDF')
+        self.assertEqual(extension.lower(), '.pdf', 'Input file is not PDF')

        htmlfilename = basefilename + '.html'
        ref_htmlfolder = os.path.join(self.TEST_DATA_DIR, basefilename)
@ -52,21 +53,28 @@ class BrowserTests(Common):

        pngfilename_out = os.path.join(self.PNGDIR, basefilename + '.out.png')
        self.generate_image(out_htmlfilename, pngfilename_out)
-        out_img = Image.open(pngfilename_out)
+        out_img = Image.open(pngfilename_out).convert('RGB')

        pngfilename_ref = os.path.join(self.PNGDIR, basefilename + '.ref.png')
        self.generate_image(ref_htmlfilename, pngfilename_ref, page_must_load=page_must_load)
-        ref_img = Image.open(pngfilename_ref)
+        ref_img = Image.open(pngfilename_ref).convert('RGB')

        diff_img = ImageChops.difference(ref_img, out_img);

+        # ALWAYS save the diff image so we can manually check the diff
+        # see: (http://stackoverflow.com/questions/15721484):
+        diff_file_name = os.path.join(self.PNGDIR, basefilename + '.diff.png')
+        diff_img.convert('RGB').save(diff_file_name)
+        
        diff_bbox = diff_img.getbbox()
-        if diff_bbox is not None:
+        print("\nTesting at: [", basefilename, "]")
+        
+        if diff_bbox is None:
+            print("  passed")
+        else:
+            print("  diff bounding box: ", diff_bbox, " should be None")
            diff_size = (diff_bbox[2] - diff_bbox[0]) * (diff_bbox[3] - diff_bbox[1])
            img_size = ref_img.size[0] * ref_img.size[1]
-            # save the diff image (http://stackoverflow.com/questions/15721484):
-            diff_file_name = os.path.join(self.PNGDIR, basefilename + '.diff.png')
-            diff_img.convert('RGB').save(diff_file_name)
            self.fail(('PNG files %s and %s differ by at most %d pixels, '+
                       '(%f%% of %d pixels in total), difference: %s') %
                      (pngfilename_out, pngfilename_ref,
--- a/pdf2htmlEX/test/browser_tests/fontfile3_opentype/fontfile3_opentype.html
+++ b/pdf2htmlEX/test/browser_tests/fontfile3_opentype/fontfile3_opentype.html
--- a/pdf2htmlEX/test/browser_tests/geneve_1564/geneve_1564.html
+++ b/pdf2htmlEX/test/browser_tests/geneve_1564/geneve_1564.html
--- a/pdf2htmlEX/test/browser_tests/svg_background_with_page_rotation_issue402/svg_background_with_page_rotation_issue402.html
+++ b/pdf2htmlEX/test/browser_tests/svg_background_with_page_rotation_issue402/svg_background_with_page_rotation_issue402.html
--- a/pdf2htmlEX/test/browser_tests/with_form/with_form.html
+++ b/pdf2htmlEX/test/browser_tests/with_form/with_form.html
--- a/pdf2htmlEX/test/compareTestImages
+++ b/pdf2htmlEX/test/compareTestImages
@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+
+# This python script displays the three images for a given test 
+#
+# ref  image: is the currently "corrrect" image archived with pdf2htmlEX 
+# out  image: is the result of pdf2htmlEXing a given test pdf
+# diff image: is the difference of the ref and out ('RGB') images
+#
+# In all cases, the diff image MUST be completely BLACK for a test to 
+# pass. 
+#
+# Type Ctrl-Q inside the image to quit each image display.
+#
+
+import os
+import sys
+
+if len(sys.argv) != 2 :
+	print("usage: compareTestImages <<aTestFileName>>")
+	sys.exit(-1)
+
+baseFileName = os.path.splitext(sys.argv[1])[0]
+testPNGDir  = "/tmp/pdf2htmlEX/png"
+
+outPNGFile  = os.path.join(testPNGDir, baseFileName+".out.png")
+refPNGFile  = os.path.join(testPNGDir, baseFileName+".ref.png")
+diffPNGFile = os.path.join(testPNGDir, baseFileName+".diff.png")
+
+os.system("display "+outPNGFile+"&")
+os.system("display "+refPNGFile+"&")
+os.system("display "+diffPNGFile+"&")
--- a/pdf2htmlEX/test/installAutomaticTestSoftware
+++ b/pdf2htmlEX/test/installAutomaticTestSoftware
@ -0,0 +1,33 @@
+#!/bin/bash
+
+# This bash script installs all local software required to run the 
+# pdf2htmlEX tests 
+
+export DEBIAN_FRONTEND=noninteractive
+
+# Start by making sure all required apt packages exist
+#
+sudo apt -y install \
+  python3           \
+  python3-pip       \
+  xvfb              \
+  firefox
+
+# Now get the geckodriver for firefox (as required by selenium)
+#
+pushd /tmp
+#
+wget https://github.com/mozilla/geckodriver/releases/download/v0.26.0/geckodriver-v0.26.0-linux64.tar.gz
+#
+tar xvf geckodriver-v0.26.0-linux64.tar.gz
+#
+sudo mv geckodriver /usr/local/bin
+#
+popd
+
+# Now make sure all python packages exist (install into the local user's 
+# PyPI archive) 
+#
+pip3 install \
+  selenium  \
+  Pillow
--- a/pdf2htmlEX/test/installManualTestSoftware
+++ b/pdf2htmlEX/test/installManualTestSoftware
@ -0,0 +1,13 @@
+#!/bin/bash
+
+# This bash script installs all local software required to run the 
+# pdf2htmlEX tests 
+
+export DEBIAN_FRONTEND=noninteractive
+
+# Start by making sure all required apt packages exist
+#
+sudo apt -y install                 \
+  graphicsmagick-imagemagick-compat \
+  okular
+
--- a/pdf2htmlEX/test/regenerateTest
+++ b/pdf2htmlEX/test/regenerateTest
@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+
+# This python script regenerates the html files associated with a given browser test.
+#
+# This script MUST ONLY be run after the full (local) browser test.
+#
+
+import os
+import sys
+import shutil
+
+if len(sys.argv) != 2 :
+	print("usage: compareTestImages <<aTestFileName>>")
+	sys.exit(-1)
+
+print("")
+baseFileName  = os.path.splitext(sys.argv[1])[0]
+testDir       = "/tmp/pdf2htmlEX/out"
+ref_htmlDir   = os.path.join("browser_tests", baseFileName)
+test_htmlFile = os.path.join(testDir, baseFileName+'.html')
+
+if baseFileName == "test_fail" :
+  print("The test_fail test can not be regenerated")
+  print("")
+  sys.exit(-1)
+
+if not os.path.isfile(test_htmlFile) :
+  print("The test file [", test_htmlFile, "] has not been found")
+  print("Do you need to re-run the browser tests?")
+  print("")
+  sys.exit(-1)
+
+print("Are you sure you want to copy: ")
+print("  ", test_htmlFile)
+print("to: ")
+print("  ", ref_htmlDir)
+try:
+  input("\nType Ctrl-C now to abort: ")
+except:
+  print("\n")
+  sys.exit(0)
+
+shutil.rmtree(ref_htmlDir, True)
+os.makedirs(ref_htmlDir, 0o755, True)
+shutil.copy(test_htmlFile, ref_htmlDir)
+
+print("")
+
--- a/pdf2htmlEX/test/runLocalBrowserTests
+++ b/pdf2htmlEX/test/runLocalBrowserTests
@ -0,0 +1,51 @@
+#!/bin/bash
+
+# This bash script runs the local browser tests
+
+# We start by running a virtual frame buffer as display 99.0
+#
+/sbin/start-stop-daemon             \
+  --start                           \
+  --pidfile /tmp/custom_xvfb_99.pid \
+  --make-pidfile                    \
+  --background                      \
+  --exec /usr/bin/Xvfb -- :99 -ac -screen 0 1280x1920x16
+#
+echo ""
+echo "check that Xvfb is running:"
+echo "-----------------------------------------------------------"
+ps -ef | grep -v grep | grep Xvfb
+echo "-----------------------------------------------------------"
+echo ""
+
+# Now we tell the test drivers about this display
+# (Note: you MUST not specify the host!)
+#
+export DISPLAY=:99.0
+#
+echo "xwindows (xvfb) display: $DISPLAY"
+
+# Now we actually run the python3 based tests
+#
+echo ""
+echo "running local browser tests:"
+echo "-----------------------------------------------------------"
+python3 test_local_browser.py
+export returnCode=$?
+echo "-----------------------------------------------------------"
+echo ""
+
+# Now we shutdown the virtual frame buffer
+#
+/sbin/start-stop-daemon             \
+  --stop                            \
+  --pidfile /tmp/custom_xvfb_99.pid \
+  --retry 5
+#
+echo "check that Xvfb is no longer running:"
+echo "-----------------------------------------------------------"
+ps -ef | grep -v grep | grep Xvfb
+echo "-----------------------------------------------------------"
+echo ""
+
+exit $returnCode
--- a/pdf2htmlEX/test/runLocalTests
+++ b/pdf2htmlEX/test/runLocalTests
@ -0,0 +1,5 @@
+#!/bin/bash
+
+# This bash script runs the (simple non-browser) tests
+
+python3 test_output.py
--- a/pdf2htmlEX/test/start_xvfb.sh
+++ b/pdf2htmlEX/test/start_xvfb.sh
@ -1,2 +0,0 @@
-#!/bin/sh
-/sbin/start-stop-daemon --start --quiet --pidfile /tmp/custom_xvfb_99.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -ac -screen 0 1280x1920x16
--- a/pdf2htmlEX/test/test.py.in
+++ b/pdf2htmlEX/test/test.py.in
@ -69,7 +69,7 @@ class Common(object):
        with open(os.devnull, 'w') as fnull:
            return_code = subprocess.call(list(map(str, args)), stderr=fnull)

-        self.assertEquals(return_code, 0, 'cannot execute pdf2htmlEX')
+        self.assertEqual(return_code, 0, 'cannot execute pdf2htmlEX')

        files = os.listdir(self.TMPDIR)
        for file in files:
--- a/pdf2htmlEX/test/test_local_browser.py
+++ b/pdf2htmlEX/test/test_local_browser.py
@ -8,33 +8,39 @@ from selenium import webdriver
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions
+from selenium.common.exceptions import WebDriverException
 from browser_tests import BrowserTests

-# class test_local_browser(BrowserTests, unittest.TestCase):
-#     @classmethod
-#     def setUpClass(cls):
-#         super(test_local_browser, cls).setUpClass()
-#         if not cls.GENERATING_MODE:
-#             cls.browser = webdriver.Firefox()
-#             cls.browser.maximize_window()
-#             size = cls.browser.get_window_size()
-#             assert ((size['width'] >= cls.BROWSER_WIDTH) and (size['height'] >= cls.BROWSER_HEIGHT)), 'Screen is not large enough'
-#             cls.browser.set_window_size(cls.BROWSER_WIDTH, cls.BROWSER_HEIGHT)
-# 
-#     @classmethod
-#     def tearDownClass(cls):
-#         if not cls.GENERATING_MODE:
-#             cls.browser.quit()
-#         super(test_local_browser, cls).tearDownClass()
-# 
-#     def generate_image(self, html_file, png_file, page_must_load=True):
-#         self.browser.get('file://' + html_file)
-#         try:
-#             WebDriverWait(self.browser, 5).until(expected_conditions.presence_of_element_located((By.ID, 'page-container')))
-#         except:
-#             if page_must_load:
-#                 raise
-#         self.browser.save_screenshot(png_file)
-# 
-# if __name__ == '__main__':
-#     unittest.main()
+class test_local_browser(BrowserTests, unittest.TestCase):
+     @classmethod
+     def setUpClass(cls):
+         super(test_local_browser, cls).setUpClass()
+         if not cls.GENERATING_MODE:
+             cls.browser = webdriver.Firefox()
+             cls.browser.set_window_size(810, 1210)
+             #cls.browser.maximize_window() # (does not seem to maximize to size of the frame buffer)
+             size = cls.browser.get_window_size()
+             print(" browser size  width: ", size['width'])
+             print(" browser size height: ", size['height'])
+             assert ((size['width'] >= cls.BROWSER_WIDTH) and (size['height'] >= cls.BROWSER_HEIGHT)), 'Screen is not large enough'
+             cls.browser.set_window_size(cls.BROWSER_WIDTH, cls.BROWSER_HEIGHT)
+ 
+     @classmethod
+     def tearDownClass(cls):
+         if not cls.GENERATING_MODE:
+             cls.browser.quit()
+         super(test_local_browser, cls).tearDownClass()
+ 
+     def generate_image(self, html_file, png_file, page_must_load=True):
+         try:
+             self.browser.get('file://' + html_file)
+             WebDriverWait(self.browser, 5) \
+             .until(expected_conditions.presence_of_element_located((By.ID, 'page-container')))
+         except WebDriverException as e:
+             if page_must_load:
+                 raise e
+         finally:
+             self.browser.save_screenshot(png_file)
+
+if __name__ == '__main__':
+     unittest.main()
--- a/pdf2htmlEX/test/test_output.py
+++ b/pdf2htmlEX/test/test_output.py
@ -16,7 +16,11 @@ class test_output(Common, unittest.TestCase):
        result = self.run_pdf2htmlEX(args)
        self.maxDiff = None
        if expected_output_files:
-            self.assertItemsEqual(result['output_files'], expected_output_files)
+            # assertItemsEqual has change in python 3.2 to assertCountEqual
+            # "Test that sequence first contains the same elements as second, regardless of their order."
+            # see: https://docs.python.org/3/library/unittest.html#assert-methods
+            #
+            self.assertCountEqual(result['output_files'], expected_output_files)
            print("test_output ", input_file, ": matched ", expected_output_files)
        else:
            print("test_output ", input_file, ": passed")