diff --git a/share/manifest b/share/manifest index 751bdb2..b03560f 100644 --- a/share/manifest +++ b/share/manifest @@ -31,9 +31,7 @@ @base.min.css # fancy CSS styles - Optional -#TEST_IGNORE_BEGIN @fancy.min.css -#TEST_IGNORE_END # PDF specific CSS styles - Do not modify $css diff --git a/test/README.md b/test/README.md new file mode 100644 index 0000000..9b7bc7b --- /dev/null +++ b/test/README.md @@ -0,0 +1,26 @@ +### Dependencies + +- wkhtmltoimage +- python2 +- Python Imaging Library + +### Usage +- Run all tests: + - `./test.py` +- Run selected tests: + - `./test.py test_A test_B ...` +- Environment variables: + - set `P2H_TEST_SAVE_TMP=1` to keep the temporary files + - set `P2H_TEST_GEN=1` to generate new reference images instead of comparing with old ones + +### Guidelines for test cases + +- Make sure you have the proper copyrights. +- Using meaningful file names, a description of the file, or issueXXX.pdf. +- Make each test case minimal: + - One page only, unless the test case is about multiple pages. + - Grayscale only, unless the test case is about colors. + - Remove unnecessary elements. + - Set proper parameters for cropping in `wkhtml2image_args`. +- [Optional] Include the source files that the PDF file is generated from. + diff --git a/test/fancy.min.css b/test/fancy.min.css new file mode 100644 index 0000000..f6bfaf6 --- /dev/null +++ b/test/fancy.min.css @@ -0,0 +1,4 @@ +/* CSS for test cases */ +#page-container { + overflow:hidden; +} diff --git a/test/test.py b/test/test.py index ae3e41c..85db25e 100755 --- a/test/test.py +++ b/test/test.py @@ -12,9 +12,18 @@ class Common(object): TEST_DIR = os.path.join(SRC_DIR, 'test') DATA_DIR = os.path.join(SRC_DIR, 'share') PDF2HTMLEX_PATH = os.path.join(SRC_DIR, 'pdf2htmlEX') + + SAVE_TMP = os.environ.get('P2H_TEST_SAVE_TMP') + CANONICAL_TEMPDIR = '/tmp/pdf2htmlEX_test' def setUp(self): - self.cur_temp_dir = tempfile.mkdtemp(prefix='pdf2htmlEX_test') + if not self.SAVE_TMP: + self.cur_temp_dir = tempfile.mkdtemp(prefix='pdf2htmlEX_test') + else: + shutil.rmtree(self.CANONICAL_TEMPDIR, True) + os.mkdir(self.CANONICAL_TEMPDIR) + self.cur_temp_dir = self.CANONICAL_TEMPDIR + self.cur_data_dir = os.path.join(self.cur_temp_dir, 'share') self.cur_output_dir = os.path.join(self.cur_temp_dir, 'out') os.mkdir(self.cur_data_dir) @@ -36,9 +45,12 @@ class Common(object): # copy files shutil.copy(os.path.join(self.DATA_DIR, 'base.min.css'), os.path.join(self.cur_data_dir, 'base.min.css')) + shutil.copy(os.path.join(self.TEST_DIR, 'fancy.min.css'), + os.path.join(self.cur_data_dir, 'fancy.min.css')) def tearDown(self): - shutil.rmtree(self.cur_temp_dir, True) + if not self.SAVE_TMP: + shutil.rmtree(self.cur_temp_dir, True) def run_pdf2htmlEX(self, args): """ @@ -49,19 +61,15 @@ class Common(object): :return: an object of relevant info """ - cmd = [self.PDF2HTMLEX_PATH, + args = [self.PDF2HTMLEX_PATH, '--data-dir', self.cur_data_dir, '--dest-dir', self.cur_output_dir - ] + ] + args - for val in args: - cmd.append(str(val)) - - return_code = subprocess.call(cmd) - self.assertEquals(return_code, 0) + return_code = subprocess.call(list(map(str, args))) + self.assertEquals(return_code, 0, 'cannot execute pdf2htmlEX') files = os.listdir(self.cur_output_dir) - files.sort() return { 'return_code' : return_code, @@ -75,9 +83,20 @@ if __name__ == '__main__': exit(1) suites = [] loader = unittest.TestLoader() - for module_name in ['test_naming']: + test_names = list(map(lambda x: 'T.'+x, sys.argv[1:])) + for module_name in ['test_naming', 'test_conversion']: __import__(module_name) - suites.append(loader.loadTestsFromModule(sys.modules[module_name])) + if len(test_names) > 0: + try: + suites.append(loader.loadTestsFromNames(test_names, sys.modules[module_name])) + except: + pass + else: + suites.append(loader.loadTestsFromModule(sys.modules[module_name])) + + if len(suites) == 0: + print >>sys.stderr, 'No test found' + exit(1) failure_count = 0 runner = unittest.TextTestRunner(verbosity=2) diff --git a/test/test_conversion.py b/test/test_conversion.py new file mode 100755 index 0000000..ab2b70c --- /dev/null +++ b/test/test_conversion.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python + +import unittest +import os +import subprocess + +from PIL import Image, ImageChops +from test import Common + +class T(Common, unittest.TestCase): + GENERATING_MODE = os.environ.get('P2H_TEST_GEN') + + WKHTML2IMAGE = 'wkhtmltoimage' + TTFAUTOHINT = 'ttfautohint' + TEST_DATA_DIR = os.path.join(Common.TEST_DIR, 'test_conversion') + + DEFAULT_PDF2HTMLEX_ARGS = [ + '--external-hint-tool', 'ttfautohint', + '--fit-width', 800, + '--last-page', 1, + '--correct-text-visibility', 1, + ] + + DEFAULT_WKHTML2IMAGE_ARGS = [ + '-f', 'png', + '--height', 600, + '--width', 800, + '--quality', 0, + '--quiet' + ] + + @classmethod + def setUpClass(cls): + subprocess.check_call([cls.WKHTML2IMAGE, '--version']) + subprocess.check_call([cls.TTFAUTOHINT, '--version']) + + def run_test_case(self, filename, pdf2htmlEX_args=[], wkhtml2image_args=[]): + basefilename, extension = os.path.splitext(filename) + htmlfilename = basefilename + '.html' + pngfilename = basefilename + '.png' + + self.assertEquals(extension.lower(), '.pdf', 'Input file is not PDF') + + pdf2htmlEX_args = self.DEFAULT_PDF2HTMLEX_ARGS \ + + list(pdf2htmlEX_args) + [ + os.path.join(self.TEST_DATA_DIR, filename), + htmlfilename + ] + + result = self.run_pdf2htmlEX(pdf2htmlEX_args) + self.assertIn(htmlfilename, result['output_files'], 'HTML file is not generated') + + png_out_dir = os.path.join(self.cur_temp_dir, 'png_out') + os.mkdir(png_out_dir) + + pngfilename_out_fullpath = os.path.join(png_out_dir, pngfilename) + pngfilename_raw_fullpath = os.path.join(self.TEST_DATA_DIR, pngfilename) + + wkhtml2image_args = [self.WKHTML2IMAGE] \ + + self.DEFAULT_WKHTML2IMAGE_ARGS \ + + list(wkhtml2image_args) + [ + os.path.join(self.cur_output_dir, htmlfilename), + pngfilename_out_fullpath + ] + + return_code = subprocess.call(list(map(str, wkhtml2image_args))) + self.assertEquals(return_code, 0, 'cannot execute ' + self.WKHTML2IMAGE) + + if self.GENERATING_MODE: + shutil.copy(pngfilename_out_fullpath, pngfilename_raw_fullpath) + else: + original_img = Image.open(pngfilename_raw_fullpath) + new_img = Image.open(pngfilename_out_fullpath) + + diff_img = ImageChops.difference(original_img, new_img); + + if diff_img.getbbox() is not None: + if self.SAVE_TMP: + # save the diff image + # http://stackoverflow.com/questions/15721484/saving-in-png-using-pil-library-after-taking-imagechops-difference-of-two-png + diff_img.convert('RGB').save(os.path.join(png_out_dir, basefilename + '.diff.png')) + self.fail('PNG files differ') + + def test_basic_text(self): + self.run_test_case('basic_text.pdf', + wkhtml2image_args=[ + '--crop-x', 180, + '--crop-y', 150, + '--crop-w', 220, + '--crop-h', 260 + ]) + + def test_geneve_1564(self): + self.run_test_case('geneve_1564.pdf', wkhtml2image_args=['--height', 1100]) + + def test_text_visibility(self): + self.run_test_case('text_visibility.pdf', wkhtml2image_args=['--height', 1200]) + diff --git a/test/test_conversion/basic_text.pdf b/test/test_conversion/basic_text.pdf new file mode 100644 index 0000000..a9a0c5c Binary files /dev/null and b/test/test_conversion/basic_text.pdf differ diff --git a/test/test_conversion/basic_text.png b/test/test_conversion/basic_text.png new file mode 100644 index 0000000..9451e11 Binary files /dev/null and b/test/test_conversion/basic_text.png differ diff --git a/test/test_conversion/basic_text.tex b/test/test_conversion/basic_text.tex new file mode 100644 index 0000000..43ba1d2 --- /dev/null +++ b/test/test_conversion/basic_text.tex @@ -0,0 +1,36 @@ +\documentclass{article} +\begin{document} +Normal\hspace{10pt}{\tiny tiny}\hspace{10pt}Text +\pdfliteral{5 Ts} +Rise \\ +\pdfliteral{0 Ts} + +\pdfliteral{5 Tc} +CharSpace \\ +\pdfliteral{0 Tc} + +\pdfliteral{200 Tz} +Horizontal\hspace{10pt}Scale \\ +\pdfliteral{100 Tz} + +\vspace{3cm} + +\pdfliteral{q} +\pdfliteral{0.71 0.71 -0.71 0.71 0 0 cm} +Rotated +\pdfliteral{5 Ts} +Rise \\ +\pdfliteral{0 Ts} + +\pdfliteral{5 Tc} +CharSpace \\ +\pdfliteral{0 Tc} + +\pdfliteral{200 Tz} +Horizontal\hspace{10pt}Scale \\ +\pdfliteral{100 Tz} + +\pdfliteral{Q} + + +\end{document} diff --git a/test/test_conversion/geneve_1564.pdf b/test/test_conversion/geneve_1564.pdf new file mode 100644 index 0000000..ea60837 Binary files /dev/null and b/test/test_conversion/geneve_1564.pdf differ diff --git a/test/test_conversion/geneve_1564.png b/test/test_conversion/geneve_1564.png new file mode 100644 index 0000000..c768327 Binary files /dev/null and b/test/test_conversion/geneve_1564.png differ diff --git a/test/test_conversion/text_visibility.pdf b/test/test_conversion/text_visibility.pdf new file mode 100644 index 0000000..9cdcc90 Binary files /dev/null and b/test/test_conversion/text_visibility.pdf differ diff --git a/test/test_conversion/text_visibility.png b/test/test_conversion/text_visibility.png new file mode 100644 index 0000000..1456678 Binary files /dev/null and b/test/test_conversion/text_visibility.png differ diff --git a/test/test_naming.py b/test/test_naming.py index fd9f051..0811080 100644 --- a/test/test_naming.py +++ b/test/test_naming.py @@ -7,11 +7,11 @@ import os from test import Common -class OutputNamingTests(Common, unittest.TestCase): +class T(Common, unittest.TestCase): def run_test_case(self, input_file, expected_output_files, args=[]): args = list(args) args.insert(0, os.path.join(self.TEST_DIR, 'test_naming', input_file)) - self.assertEquals(self.run_pdf2htmlEX(args)['output_files'], sorted(expected_output_files)) + self.assertItemsEquals(self.run_pdf2htmlEX(args)['output_files'], expected_output_files) def test_generate_single_html_default_name_single_page_pdf(self): self.run_test_case('1-page.pdf', ['1-page.html'])