1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 13:00:08 +00:00

improve test runner

This commit is contained in:
Lu Wang 2014-11-16 13:40:02 +08:00
parent 22d19329c0
commit 527e7b6216
13 changed files with 197 additions and 16 deletions

View File

@ -31,9 +31,7 @@
@base.min.css @base.min.css
# fancy CSS styles - Optional # fancy CSS styles - Optional
#TEST_IGNORE_BEGIN
@fancy.min.css @fancy.min.css
#TEST_IGNORE_END
# PDF specific CSS styles - Do not modify # PDF specific CSS styles - Do not modify
$css $css

26
test/README.md Normal file
View File

@ -0,0 +1,26 @@
### Dependencies
- wkhtmltoimage
- python2
- Python Imaging Library
### Usage
- Run all tests:
- `./test.py`
- Run selected tests:
- `./test.py test_A test_B ...`
- Environment variables:
- set `P2H_TEST_SAVE_TMP=1` to keep the temporary files
- set `P2H_TEST_GEN=1` to generate new reference images instead of comparing with old ones
### Guidelines for test cases
- Make sure you have the proper copyrights.
- Using meaningful file names, a description of the file, or issueXXX.pdf.
- Make each test case minimal:
- One page only, unless the test case is about multiple pages.
- Grayscale only, unless the test case is about colors.
- Remove unnecessary elements.
- Set proper parameters for cropping in `wkhtml2image_args`.
- [Optional] Include the source files that the PDF file is generated from.

4
test/fancy.min.css vendored Normal file
View File

@ -0,0 +1,4 @@
/* CSS for test cases */
#page-container {
overflow:hidden;
}

View File

@ -13,8 +13,17 @@ class Common(object):
DATA_DIR = os.path.join(SRC_DIR, 'share') DATA_DIR = os.path.join(SRC_DIR, 'share')
PDF2HTMLEX_PATH = os.path.join(SRC_DIR, 'pdf2htmlEX') PDF2HTMLEX_PATH = os.path.join(SRC_DIR, 'pdf2htmlEX')
SAVE_TMP = os.environ.get('P2H_TEST_SAVE_TMP')
CANONICAL_TEMPDIR = '/tmp/pdf2htmlEX_test'
def setUp(self): def setUp(self):
if not self.SAVE_TMP:
self.cur_temp_dir = tempfile.mkdtemp(prefix='pdf2htmlEX_test') self.cur_temp_dir = tempfile.mkdtemp(prefix='pdf2htmlEX_test')
else:
shutil.rmtree(self.CANONICAL_TEMPDIR, True)
os.mkdir(self.CANONICAL_TEMPDIR)
self.cur_temp_dir = self.CANONICAL_TEMPDIR
self.cur_data_dir = os.path.join(self.cur_temp_dir, 'share') self.cur_data_dir = os.path.join(self.cur_temp_dir, 'share')
self.cur_output_dir = os.path.join(self.cur_temp_dir, 'out') self.cur_output_dir = os.path.join(self.cur_temp_dir, 'out')
os.mkdir(self.cur_data_dir) os.mkdir(self.cur_data_dir)
@ -36,8 +45,11 @@ class Common(object):
# copy files # copy files
shutil.copy(os.path.join(self.DATA_DIR, 'base.min.css'), shutil.copy(os.path.join(self.DATA_DIR, 'base.min.css'),
os.path.join(self.cur_data_dir, 'base.min.css')) os.path.join(self.cur_data_dir, 'base.min.css'))
shutil.copy(os.path.join(self.TEST_DIR, 'fancy.min.css'),
os.path.join(self.cur_data_dir, 'fancy.min.css'))
def tearDown(self): def tearDown(self):
if not self.SAVE_TMP:
shutil.rmtree(self.cur_temp_dir, True) shutil.rmtree(self.cur_temp_dir, True)
def run_pdf2htmlEX(self, args): def run_pdf2htmlEX(self, args):
@ -49,19 +61,15 @@ class Common(object):
:return: an object of relevant info :return: an object of relevant info
""" """
cmd = [self.PDF2HTMLEX_PATH, args = [self.PDF2HTMLEX_PATH,
'--data-dir', self.cur_data_dir, '--data-dir', self.cur_data_dir,
'--dest-dir', self.cur_output_dir '--dest-dir', self.cur_output_dir
] ] + args
for val in args: return_code = subprocess.call(list(map(str, args)))
cmd.append(str(val)) self.assertEquals(return_code, 0, 'cannot execute pdf2htmlEX')
return_code = subprocess.call(cmd)
self.assertEquals(return_code, 0)
files = os.listdir(self.cur_output_dir) files = os.listdir(self.cur_output_dir)
files.sort()
return { return {
'return_code' : return_code, 'return_code' : return_code,
@ -75,10 +83,21 @@ if __name__ == '__main__':
exit(1) exit(1)
suites = [] suites = []
loader = unittest.TestLoader() loader = unittest.TestLoader()
for module_name in ['test_naming']: test_names = list(map(lambda x: 'T.'+x, sys.argv[1:]))
for module_name in ['test_naming', 'test_conversion']:
__import__(module_name) __import__(module_name)
if len(test_names) > 0:
try:
suites.append(loader.loadTestsFromNames(test_names, sys.modules[module_name]))
except:
pass
else:
suites.append(loader.loadTestsFromModule(sys.modules[module_name])) suites.append(loader.loadTestsFromModule(sys.modules[module_name]))
if len(suites) == 0:
print >>sys.stderr, 'No test found'
exit(1)
failure_count = 0 failure_count = 0
runner = unittest.TextTestRunner(verbosity=2) runner = unittest.TextTestRunner(verbosity=2)
for suite in suites: for suite in suites:

98
test/test_conversion.py Executable file
View File

@ -0,0 +1,98 @@
#!/usr/bin/env python
import unittest
import os
import subprocess
from PIL import Image, ImageChops
from test import Common
class T(Common, unittest.TestCase):
GENERATING_MODE = os.environ.get('P2H_TEST_GEN')
WKHTML2IMAGE = 'wkhtmltoimage'
TTFAUTOHINT = 'ttfautohint'
TEST_DATA_DIR = os.path.join(Common.TEST_DIR, 'test_conversion')
DEFAULT_PDF2HTMLEX_ARGS = [
'--external-hint-tool', 'ttfautohint',
'--fit-width', 800,
'--last-page', 1,
'--correct-text-visibility', 1,
]
DEFAULT_WKHTML2IMAGE_ARGS = [
'-f', 'png',
'--height', 600,
'--width', 800,
'--quality', 0,
'--quiet'
]
@classmethod
def setUpClass(cls):
subprocess.check_call([cls.WKHTML2IMAGE, '--version'])
subprocess.check_call([cls.TTFAUTOHINT, '--version'])
def run_test_case(self, filename, pdf2htmlEX_args=[], wkhtml2image_args=[]):
basefilename, extension = os.path.splitext(filename)
htmlfilename = basefilename + '.html'
pngfilename = basefilename + '.png'
self.assertEquals(extension.lower(), '.pdf', 'Input file is not PDF')
pdf2htmlEX_args = self.DEFAULT_PDF2HTMLEX_ARGS \
+ list(pdf2htmlEX_args) + [
os.path.join(self.TEST_DATA_DIR, filename),
htmlfilename
]
result = self.run_pdf2htmlEX(pdf2htmlEX_args)
self.assertIn(htmlfilename, result['output_files'], 'HTML file is not generated')
png_out_dir = os.path.join(self.cur_temp_dir, 'png_out')
os.mkdir(png_out_dir)
pngfilename_out_fullpath = os.path.join(png_out_dir, pngfilename)
pngfilename_raw_fullpath = os.path.join(self.TEST_DATA_DIR, pngfilename)
wkhtml2image_args = [self.WKHTML2IMAGE] \
+ self.DEFAULT_WKHTML2IMAGE_ARGS \
+ list(wkhtml2image_args) + [
os.path.join(self.cur_output_dir, htmlfilename),
pngfilename_out_fullpath
]
return_code = subprocess.call(list(map(str, wkhtml2image_args)))
self.assertEquals(return_code, 0, 'cannot execute ' + self.WKHTML2IMAGE)
if self.GENERATING_MODE:
shutil.copy(pngfilename_out_fullpath, pngfilename_raw_fullpath)
else:
original_img = Image.open(pngfilename_raw_fullpath)
new_img = Image.open(pngfilename_out_fullpath)
diff_img = ImageChops.difference(original_img, new_img);
if diff_img.getbbox() is not None:
if self.SAVE_TMP:
# save the diff image
# http://stackoverflow.com/questions/15721484/saving-in-png-using-pil-library-after-taking-imagechops-difference-of-two-png
diff_img.convert('RGB').save(os.path.join(png_out_dir, basefilename + '.diff.png'))
self.fail('PNG files differ')
def test_basic_text(self):
self.run_test_case('basic_text.pdf',
wkhtml2image_args=[
'--crop-x', 180,
'--crop-y', 150,
'--crop-w', 220,
'--crop-h', 260
])
def test_geneve_1564(self):
self.run_test_case('geneve_1564.pdf', wkhtml2image_args=['--height', 1100])
def test_text_visibility(self):
self.run_test_case('text_visibility.pdf', wkhtml2image_args=['--height', 1200])

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

View File

@ -0,0 +1,36 @@
\documentclass{article}
\begin{document}
Normal\hspace{10pt}{\tiny tiny}\hspace{10pt}Text
\pdfliteral{5 Ts}
Rise \\
\pdfliteral{0 Ts}
\pdfliteral{5 Tc}
CharSpace \\
\pdfliteral{0 Tc}
\pdfliteral{200 Tz}
Horizontal\hspace{10pt}Scale \\
\pdfliteral{100 Tz}
\vspace{3cm}
\pdfliteral{q}
\pdfliteral{0.71 0.71 -0.71 0.71 0 0 cm}
Rotated
\pdfliteral{5 Ts}
Rise \\
\pdfliteral{0 Ts}
\pdfliteral{5 Tc}
CharSpace \\
\pdfliteral{0 Tc}
\pdfliteral{200 Tz}
Horizontal\hspace{10pt}Scale \\
\pdfliteral{100 Tz}
\pdfliteral{Q}
\end{document}

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.4 MiB

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 127 KiB

View File

@ -7,11 +7,11 @@ import os
from test import Common from test import Common
class OutputNamingTests(Common, unittest.TestCase): class T(Common, unittest.TestCase):
def run_test_case(self, input_file, expected_output_files, args=[]): def run_test_case(self, input_file, expected_output_files, args=[]):
args = list(args) args = list(args)
args.insert(0, os.path.join(self.TEST_DIR, 'test_naming', input_file)) args.insert(0, os.path.join(self.TEST_DIR, 'test_naming', input_file))
self.assertEquals(self.run_pdf2htmlEX(args)['output_files'], sorted(expected_output_files)) self.assertItemsEquals(self.run_pdf2htmlEX(args)['output_files'], expected_output_files)
def test_generate_single_html_default_name_single_page_pdf(self): def test_generate_single_html_default_name_single_page_pdf(self):
self.run_test_case('1-page.pdf', ['1-page.html']) self.run_test_case('1-page.pdf', ['1-page.html'])