1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 13:00:08 +00:00

Merge pull request #27 from stephengaito/updateTests

updated local tests and added bash scripts to help automate testing.
updated local tests (currently ignoring remote browser tests)
currently the browser_tests/text_visibility fails because of clipping issues (probably actually layering interactions with clipping)
added bash scripts to help install run and check the test outputs, as well as regenerate tests which fail simply due to slight layout changes.
This commit is contained in:
stephengaito 2020-06-03 14:42:54 +01:00 committed by GitHub
commit fbf06c21f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 405 additions and 104 deletions

1
.gitattributes vendored Normal file
View File

@ -0,0 +1 @@
test export-ignore

24
.gitignore vendored
View File

@ -14,19 +14,19 @@ CTestTestfile.cmake
gmon.out gmon.out
install_manifest.txt install_manifest.txt
Makefile Makefile
pdf2htmlEX pdf2htmlEX/build
pdf2htmlEX.1 pdf2htmlEX/pdf2htmlEX.1
*.pyc *.pyc
share/base.css pdf2htmlEX/share/base.css
share/base.min.css pdf2htmlEX/share/base.min.css
share/fancy.css pdf2htmlEX/share/fancy.css
share/fancy.min.css pdf2htmlEX/share/fancy.min.css
share/pdf2htmlEX.js pdf2htmlEX/share/pdf2htmlEX.js
share/pdf2htmlEX.min.js pdf2htmlEX/share/pdf2htmlEX.min.js
src/pdf2htmlEX-config.h pdf2htmlEX/src/pdf2htmlEX-config.h
src/util/css_const.h pdf2htmlEX/src/util/css_const.h
test export-ignore
Testing/* Testing/*
DartConfiguration.tcl DartConfiguration.tcl
test/test.py pdf2htmlEX/test/test.py
pdf2htmlEX/test/geckodriver.log
*.swp *.swp

View File

@ -19,6 +19,10 @@ export PDF2HTMLEX_BRANCH="$(git rev-parse --abbrev-ref HEAD)"
# #
export PDF2HTMLEX_PREFIX=/usr/local export PDF2HTMLEX_PREFIX=/usr/local
# Ensure all Apt packages are installed with no user interaction
#
export DEBIAN_FRONTEND=noninteractive
set -ev set -ev
################ ################

View File

@ -11,6 +11,10 @@ export PDF2HTMLEX_BRANCH="$(git rev-parse --abbrev-ref HEAD)"
export PDF2HTMLEX_PREFIX=/usr/local export PDF2HTMLEX_PREFIX=/usr/local
# Ensure all Apt packages are installed with no user interaction
#
export DEBIAN_FRONTEND=noninteractive
################ ################
# do the build # do the build

View File

@ -1,3 +1,93 @@
# pdf2htmlEX tests
This directory contains a collection of python3 unittests of the output of
pdf2htmlEX.
The graphical output of pdf2htmlEX can be tested both locally and remotely
using [Selenium](https://www.selenium.dev/) and the [Pillow Python Imaging
Library](https://python-pillow.org/).
The browser tests use Selenium to take a screenshot of a FireFox browser's
rendering of the pdf2htmlEX output for a given pdf file and compares that
image to an image of the previously saved reference html.
## Tests which are currently failing:
- **browser_tests/text_visibility** At the moment clipping has been broken
and needs to be fixed. Rerun `runLocalBrowserTests` and use the
`compareTestImages` for the `test_visibility` test to see the problem.
## Running tests
There are three bash scripts which automate the running of a given
collection of tests:
1. **runLocalTests** runs a simple collection of tests which do not
require Selenium or a browser.
```
./runLocalTests
```
2. **runLocalBrowserTests** runs a more complex collection of tests which
*require* Selenium, a FireFox browser, as well as a 'virtual frame buffer'
(Xvfb) to be installed.
```
./runLocalBrowserTests
```
3. **runRemoteBrowserTests** runs the same complex collection of tests as
run by `runLocalBrowserTests` but this time using 'Sauce Connect'. (At the
moment this is not fully implemented or (re)tested)
```
./runRemoteBrowserTests
```
In order to run these tests, you *must* have the correct testing software
installed locally. To do this you can run the command:
```
./installAutomaticTestSoftware
```
## Understanding browser test failures
If any of the automatic browser tests *fail* then you might want to
manually view the PNG images for a given test using the command:
```
./compareTestImages <<testNam>>
```
This command opens the three PNG images associated with a given failed
test so that you can manually compare the new output (`*.out.png`), the
reference output (`*.ref.png`) and an image of the 'difference' between
the two images (`*.diff.png`). To pass, the 'difference' image must be
*completely* black.
Usually it will be obvious that the newer version of pdf2htmlEX has only
slightly moved various image elements. Any such tests can be made to pass
by updating the reference html using the tool:
```
./regenerateTestHtml <<testName>>
```
This command will regenerate the reference html for the specifed test.
All of these manual comparison tools require additional software which can
be installed using the command:
```
./installManualTestSoftware
```
---
## OLD README contents:
### Dependencies ### Dependencies
- python2 and packages - python2 and packages

View File

@ -5,6 +5,7 @@ import subprocess
import shutil import shutil
import unittest import unittest
#from selenium.common.exceptions import WebDriverException
from PIL import Image, ImageChops from PIL import Image, ImageChops
from test import Common from test import Common
@ -30,7 +31,7 @@ class BrowserTests(Common):
def run_test_case(self, filename, args=[], page_must_load=True): def run_test_case(self, filename, args=[], page_must_load=True):
basefilename, extension = os.path.splitext(filename) basefilename, extension = os.path.splitext(filename)
self.assertEquals(extension.lower(), '.pdf', 'Input file is not PDF') self.assertEqual(extension.lower(), '.pdf', 'Input file is not PDF')
htmlfilename = basefilename + '.html' htmlfilename = basefilename + '.html'
ref_htmlfolder = os.path.join(self.TEST_DATA_DIR, basefilename) ref_htmlfolder = os.path.join(self.TEST_DATA_DIR, basefilename)
@ -52,21 +53,28 @@ class BrowserTests(Common):
pngfilename_out = os.path.join(self.PNGDIR, basefilename + '.out.png') pngfilename_out = os.path.join(self.PNGDIR, basefilename + '.out.png')
self.generate_image(out_htmlfilename, pngfilename_out) self.generate_image(out_htmlfilename, pngfilename_out)
out_img = Image.open(pngfilename_out) out_img = Image.open(pngfilename_out).convert('RGB')
pngfilename_ref = os.path.join(self.PNGDIR, basefilename + '.ref.png') pngfilename_ref = os.path.join(self.PNGDIR, basefilename + '.ref.png')
self.generate_image(ref_htmlfilename, pngfilename_ref, page_must_load=page_must_load) self.generate_image(ref_htmlfilename, pngfilename_ref, page_must_load=page_must_load)
ref_img = Image.open(pngfilename_ref) ref_img = Image.open(pngfilename_ref).convert('RGB')
diff_img = ImageChops.difference(ref_img, out_img); diff_img = ImageChops.difference(ref_img, out_img);
# ALWAYS save the diff image so we can manually check the diff
# see: (http://stackoverflow.com/questions/15721484):
diff_file_name = os.path.join(self.PNGDIR, basefilename + '.diff.png')
diff_img.convert('RGB').save(diff_file_name)
diff_bbox = diff_img.getbbox() diff_bbox = diff_img.getbbox()
if diff_bbox is not None: print("\nTesting at: [", basefilename, "]")
if diff_bbox is None:
print(" passed")
else:
print(" diff bounding box: ", diff_bbox, " should be None")
diff_size = (diff_bbox[2] - diff_bbox[0]) * (diff_bbox[3] - diff_bbox[1]) diff_size = (diff_bbox[2] - diff_bbox[0]) * (diff_bbox[3] - diff_bbox[1])
img_size = ref_img.size[0] * ref_img.size[1] img_size = ref_img.size[0] * ref_img.size[1]
# save the diff image (http://stackoverflow.com/questions/15721484):
diff_file_name = os.path.join(self.PNGDIR, basefilename + '.diff.png')
diff_img.convert('RGB').save(diff_file_name)
self.fail(('PNG files %s and %s differ by at most %d pixels, '+ self.fail(('PNG files %s and %s differ by at most %d pixels, '+
'(%f%% of %d pixels in total), difference: %s') % '(%f%% of %d pixels in total), difference: %s') %
(pngfilename_out, pngfilename_ref, (pngfilename_out, pngfilename_ref,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,31 @@
#!/usr/bin/env python3
# This python script displays the three images for a given test
#
# ref image: is the currently "corrrect" image archived with pdf2htmlEX
# out image: is the result of pdf2htmlEXing a given test pdf
# diff image: is the difference of the ref and out ('RGB') images
#
# In all cases, the diff image MUST be completely BLACK for a test to
# pass.
#
# Type Ctrl-Q inside the image to quit each image display.
#
import os
import sys
if len(sys.argv) != 2 :
print("usage: compareTestImages <<aTestFileName>>")
sys.exit(-1)
baseFileName = os.path.splitext(sys.argv[1])[0]
testPNGDir = "/tmp/pdf2htmlEX/png"
outPNGFile = os.path.join(testPNGDir, baseFileName+".out.png")
refPNGFile = os.path.join(testPNGDir, baseFileName+".ref.png")
diffPNGFile = os.path.join(testPNGDir, baseFileName+".diff.png")
os.system("display "+outPNGFile+"&")
os.system("display "+refPNGFile+"&")
os.system("display "+diffPNGFile+"&")

View File

@ -0,0 +1,33 @@
#!/bin/bash
# This bash script installs all local software required to run the
# pdf2htmlEX tests
export DEBIAN_FRONTEND=noninteractive
# Start by making sure all required apt packages exist
#
sudo apt -y install \
python3 \
python3-pip \
xvfb \
firefox
# Now get the geckodriver for firefox (as required by selenium)
#
pushd /tmp
#
wget https://github.com/mozilla/geckodriver/releases/download/v0.26.0/geckodriver-v0.26.0-linux64.tar.gz
#
tar xvf geckodriver-v0.26.0-linux64.tar.gz
#
sudo mv geckodriver /usr/local/bin
#
popd
# Now make sure all python packages exist (install into the local user's
# PyPI archive)
#
pip3 install \
selenium \
Pillow

View File

@ -0,0 +1,13 @@
#!/bin/bash
# This bash script installs all local software required to run the
# pdf2htmlEX tests
export DEBIAN_FRONTEND=noninteractive
# Start by making sure all required apt packages exist
#
sudo apt -y install \
graphicsmagick-imagemagick-compat \
okular

48
pdf2htmlEX/test/regenerateTest Executable file
View File

@ -0,0 +1,48 @@
#!/usr/bin/env python3
# This python script regenerates the html files associated with a given browser test.
#
# This script MUST ONLY be run after the full (local) browser test.
#
import os
import sys
import shutil
if len(sys.argv) != 2 :
print("usage: compareTestImages <<aTestFileName>>")
sys.exit(-1)
print("")
baseFileName = os.path.splitext(sys.argv[1])[0]
testDir = "/tmp/pdf2htmlEX/out"
ref_htmlDir = os.path.join("browser_tests", baseFileName)
test_htmlFile = os.path.join(testDir, baseFileName+'.html')
if baseFileName == "test_fail" :
print("The test_fail test can not be regenerated")
print("")
sys.exit(-1)
if not os.path.isfile(test_htmlFile) :
print("The test file [", test_htmlFile, "] has not been found")
print("Do you need to re-run the browser tests?")
print("")
sys.exit(-1)
print("Are you sure you want to copy: ")
print(" ", test_htmlFile)
print("to: ")
print(" ", ref_htmlDir)
try:
input("\nType Ctrl-C now to abort: ")
except:
print("\n")
sys.exit(0)
shutil.rmtree(ref_htmlDir, True)
os.makedirs(ref_htmlDir, 0o755, True)
shutil.copy(test_htmlFile, ref_htmlDir)
print("")

View File

@ -0,0 +1,51 @@
#!/bin/bash
# This bash script runs the local browser tests
# We start by running a virtual frame buffer as display 99.0
#
/sbin/start-stop-daemon \
--start \
--pidfile /tmp/custom_xvfb_99.pid \
--make-pidfile \
--background \
--exec /usr/bin/Xvfb -- :99 -ac -screen 0 1280x1920x16
#
echo ""
echo "check that Xvfb is running:"
echo "-----------------------------------------------------------"
ps -ef | grep -v grep | grep Xvfb
echo "-----------------------------------------------------------"
echo ""
# Now we tell the test drivers about this display
# (Note: you MUST not specify the host!)
#
export DISPLAY=:99.0
#
echo "xwindows (xvfb) display: $DISPLAY"
# Now we actually run the python3 based tests
#
echo ""
echo "running local browser tests:"
echo "-----------------------------------------------------------"
python3 test_local_browser.py
export returnCode=$?
echo "-----------------------------------------------------------"
echo ""
# Now we shutdown the virtual frame buffer
#
/sbin/start-stop-daemon \
--stop \
--pidfile /tmp/custom_xvfb_99.pid \
--retry 5
#
echo "check that Xvfb is no longer running:"
echo "-----------------------------------------------------------"
ps -ef | grep -v grep | grep Xvfb
echo "-----------------------------------------------------------"
echo ""
exit $returnCode

5
pdf2htmlEX/test/runLocalTests Executable file
View File

@ -0,0 +1,5 @@
#!/bin/bash
# This bash script runs the (simple non-browser) tests
python3 test_output.py

View File

@ -1,2 +0,0 @@
#!/bin/sh
/sbin/start-stop-daemon --start --quiet --pidfile /tmp/custom_xvfb_99.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -ac -screen 0 1280x1920x16

View File

@ -69,7 +69,7 @@ class Common(object):
with open(os.devnull, 'w') as fnull: with open(os.devnull, 'w') as fnull:
return_code = subprocess.call(list(map(str, args)), stderr=fnull) return_code = subprocess.call(list(map(str, args)), stderr=fnull)
self.assertEquals(return_code, 0, 'cannot execute pdf2htmlEX') self.assertEqual(return_code, 0, 'cannot execute pdf2htmlEX')
files = os.listdir(self.TMPDIR) files = os.listdir(self.TMPDIR)
for file in files: for file in files:

View File

@ -8,33 +8,39 @@ from selenium import webdriver
from selenium.webdriver.common.by import By from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions from selenium.webdriver.support import expected_conditions
from selenium.common.exceptions import WebDriverException
from browser_tests import BrowserTests from browser_tests import BrowserTests
# class test_local_browser(BrowserTests, unittest.TestCase): class test_local_browser(BrowserTests, unittest.TestCase):
# @classmethod @classmethod
# def setUpClass(cls): def setUpClass(cls):
# super(test_local_browser, cls).setUpClass() super(test_local_browser, cls).setUpClass()
# if not cls.GENERATING_MODE: if not cls.GENERATING_MODE:
# cls.browser = webdriver.Firefox() cls.browser = webdriver.Firefox()
# cls.browser.maximize_window() cls.browser.set_window_size(810, 1210)
# size = cls.browser.get_window_size() #cls.browser.maximize_window() # (does not seem to maximize to size of the frame buffer)
# assert ((size['width'] >= cls.BROWSER_WIDTH) and (size['height'] >= cls.BROWSER_HEIGHT)), 'Screen is not large enough' size = cls.browser.get_window_size()
# cls.browser.set_window_size(cls.BROWSER_WIDTH, cls.BROWSER_HEIGHT) print(" browser size width: ", size['width'])
# print(" browser size height: ", size['height'])
# @classmethod assert ((size['width'] >= cls.BROWSER_WIDTH) and (size['height'] >= cls.BROWSER_HEIGHT)), 'Screen is not large enough'
# def tearDownClass(cls): cls.browser.set_window_size(cls.BROWSER_WIDTH, cls.BROWSER_HEIGHT)
# if not cls.GENERATING_MODE:
# cls.browser.quit() @classmethod
# super(test_local_browser, cls).tearDownClass() def tearDownClass(cls):
# if not cls.GENERATING_MODE:
# def generate_image(self, html_file, png_file, page_must_load=True): cls.browser.quit()
# self.browser.get('file://' + html_file) super(test_local_browser, cls).tearDownClass()
# try:
# WebDriverWait(self.browser, 5).until(expected_conditions.presence_of_element_located((By.ID, 'page-container'))) def generate_image(self, html_file, png_file, page_must_load=True):
# except: try:
# if page_must_load: self.browser.get('file://' + html_file)
# raise WebDriverWait(self.browser, 5) \
# self.browser.save_screenshot(png_file) .until(expected_conditions.presence_of_element_located((By.ID, 'page-container')))
# except WebDriverException as e:
# if __name__ == '__main__': if page_must_load:
# unittest.main() raise e
finally:
self.browser.save_screenshot(png_file)
if __name__ == '__main__':
unittest.main()

View File

@ -16,7 +16,11 @@ class test_output(Common, unittest.TestCase):
result = self.run_pdf2htmlEX(args) result = self.run_pdf2htmlEX(args)
self.maxDiff = None self.maxDiff = None
if expected_output_files: if expected_output_files:
self.assertItemsEqual(result['output_files'], expected_output_files) # assertItemsEqual has change in python 3.2 to assertCountEqual
# "Test that sequence first contains the same elements as second, regardless of their order."
# see: https://docs.python.org/3/library/unittest.html#assert-methods
#
self.assertCountEqual(result['output_files'], expected_output_files)
print("test_output ", input_file, ": matched ", expected_output_files) print("test_output ", input_file, ": matched ", expected_output_files)
else: else:
print("test_output ", input_file, ": passed") print("test_output ", input_file, ": passed")