#!/usr/bin/env python import unittest import os import sys import tempfile import shutil import subprocess # We assume that this file is put inside SRC_DIR/test TEST_DIR = os.path.dirname(__file__) # The location where our test PDFs are stored TEST_DATA_DIR = os.path.join(TEST_DIR, 'test_data') # The location where the base css file, etc is stored in the build folder DATA_DIR = os.path.join(TEST_DIR, '../share') # The script should be run in the directory containing the binary # The location where the executable is generated by the build PDF2HTMLEX_PATH = './pdf2htmlEX' def execute_pdf2htmlex_with_args(args): """ Execute the pdf2htmlEX with the specified arguments. :type args: list of values :param args: list of arguments to pass to executable. First part of each tuple is the argument, second part is the value. :rtype: int :return: The exit code of the command """ executable = os.path.abspath(PDF2HTMLEX_PATH) cmd = [executable, '--data-dir', os.path.abspath(DATA_DIR)] for val in args: cmd.append(str(val)) return_code = subprocess.call(cmd) if return_code != 0: print >> sys.stderr, "Command return code %d: %s" % (return_code, ' '.join(cmd)) return return_code def execute_pdf2htmlex_and_get_files(args): """ Execute the pdf2htmlEX with the specified arguments, and get the names of the output files. Will automatically create a temporary directory for the output, pass that as the output dir to pdf2htmlEX, determine the files generated, and clean up the temporary directory afterwards. :type args: list of values :param args: list of arguments to pass to executable. First part of each tuple is the argument, second part is the value. :rtype: list of str :return: List of the file names that were generated as output in alphabetical order. None if the command does not execute successfully. """ temp_dir = tempfile.mkdtemp() try: if execute_pdf2htmlex_with_args(['--dest-dir', temp_dir] + args) != 0: return None files = os.listdir(temp_dir) files.sort() return files finally: shutil.rmtree(path=temp_dir, ignore_errors=True) def path_to_test_file(filename): """ Retrieve an absolute path to the specified test file. :type filename: :param filename: the name of the test file to get the path to :rtype: str :returns: the full path to the test file """ return os.path.abspath(os.path.join(TEST_DATA_DIR, filename)) class OutputNamingTests(unittest.TestCase): def test_generate_single_html_default_name_single_page_pdf(self): files = execute_pdf2htmlex_and_get_files([ path_to_test_file('1-page.pdf') ]) self.assertEquals(files, ['1-page.html']) def test_generate_single_html_default_name_multiple_page_pdf(self): files = execute_pdf2htmlex_and_get_files([ path_to_test_file('2-pages.pdf') ]) self.assertEquals(files, ['2-pages.html']) def test_generate_single_html_specify_name_single_page_pdf(self): files = execute_pdf2htmlex_and_get_files([ path_to_test_file('1-page.pdf'), 'foo.html' ]) self.assertEquals(files, ['foo.html']) def test_generate_single_html_specify_name_multiple_page_pdf(self): files = execute_pdf2htmlex_and_get_files([ path_to_test_file('2-pages.pdf'), 'foo.html' ]) self.assertEquals(files, ['foo.html']) def test_generate_split_pages_default_name_single_page(self): files = execute_pdf2htmlex_and_get_files([ '--split-pages', 1, path_to_test_file('1-page.pdf') ]) self.assertEquals(files, sorted(['1-page.html', '1-page1.page'])) def test_generate_split_pages_default_name_multiple_pages(self): files = execute_pdf2htmlex_and_get_files([ '--split-pages', 1, path_to_test_file('3-pages.pdf') ]) self.assertEquals(files, sorted(['3-pages.html', '3-pages1.page', '3-pages2.page', '3-pages3.page'])) def test_generate_split_pages_specify_name_single_page(self): files = execute_pdf2htmlex_and_get_files([ '--split-pages', 1, '--page-filename', 'foo.xyz', path_to_test_file('1-page.pdf'), ]) self.assertEquals(files, sorted(['1-page.html', 'foo1.xyz'])) def test_generate_split_pages_specify_name_multiple_pages(self): files = execute_pdf2htmlex_and_get_files([ '--split-pages', 1, '--page-filename', 'foo.xyz', path_to_test_file('3-pages.pdf'), ]) self.assertEquals(files, sorted(['3-pages.html', 'foo1.xyz', 'foo2.xyz', 'foo3.xyz'])) def test_generate_split_pages_specify_name_formatter_multiple_pages(self): files = execute_pdf2htmlex_and_get_files([ '--split-pages', 1, '--page-filename', 'fo%do.xyz', path_to_test_file('3-pages.pdf'), ]) self.assertEquals(files, sorted(['3-pages.html', 'fo1o.xyz', 'fo2o.xyz', 'fo3o.xyz'])) def test_generate_split_pages_specify_name_formatter_with_padded_zeros_multiple_pages(self): files = execute_pdf2htmlex_and_get_files([ '--split-pages', 1, '--page-filename', 'fo%03do.xyz', path_to_test_file('3-pages.pdf') ]) self.assertEquals(files, sorted(['3-pages.html', 'fo001o.xyz', 'fo002o.xyz', 'fo003o.xyz'])) def test_generate_split_pages_specify_name_only_first_formatter_gets_taken(self): files = execute_pdf2htmlex_and_get_files([ '--split-pages', 1, '--page-filename', 'f%do%do.xyz', path_to_test_file('3-pages.pdf') ]) self.assertEquals(files, sorted(['3-pages.html', 'f1o%do.xyz', 'f2o%do.xyz', 'f3o%do.xyz'])) def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_s(self): files = execute_pdf2htmlex_and_get_files([ '--split-pages', 1, '--page-filename', 'f%soo.xyz', path_to_test_file('3-pages.pdf') ]) self.assertEquals(files, sorted(['3-pages.html', 'f%soo1.xyz', 'f%soo2.xyz', 'f%soo3.xyz'])) def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_p(self): files = execute_pdf2htmlex_and_get_files([ '--split-pages', 1, '--page-filename', 'f%poo.xyz', path_to_test_file('3-pages.pdf'), ]) self.assertEquals(files, sorted(['3-pages.html', 'f%poo1.xyz', 'f%poo2.xyz', 'f%poo3.xyz'])) def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_n(self): files = execute_pdf2htmlex_and_get_files([ '--split-pages', 1, '--page-filename', 'f%noo.xyz', path_to_test_file('3-pages.pdf') ]) self.assertEquals(files, sorted(['3-pages.html', 'f%noo1.xyz', 'f%noo2.xyz', 'f%noo3.xyz'])) def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_percent(self): files = execute_pdf2htmlex_and_get_files([ '--split-pages', 1, '--page-filename', 'f%%oo.xyz', path_to_test_file('3-pages.pdf') ]) self.assertEquals(files, sorted(['3-pages.html', 'f%%oo1.xyz', 'f%%oo2.xyz', 'f%%oo3.xyz'])) def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_percent_with_actual_placeholder(self): files = execute_pdf2htmlex_and_get_files([ '--split-pages', 1, '--page-filename', 'f%%o%do.xyz', path_to_test_file('3-pages.pdf') ]) self.assertEquals(files, sorted(['3-pages.html', 'f%%o1o.xyz', 'f%%o2o.xyz', 'f%%o3o.xyz'])) def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_percent_with_actual_placeholder(self): files = execute_pdf2htmlex_and_get_files([ '--split-pages', 1, '--page-filename', 'fo%do%%.xyz', path_to_test_file('3-pages.pdf') ]) self.assertEquals(files, sorted(['3-pages.html', 'fo1o%%.xyz', 'fo2o%%.xyz', 'fo3o%%.xyz'])) def test_generate_split_pages_specify_name_only_formatter_starts_part_way_through_invalid_formatter(self): files = execute_pdf2htmlex_and_get_files([ '--split-pages', 1, '--page-filename', 'f%02%doo.xyz', path_to_test_file('3-pages.pdf'), ]) self.assertEquals(files, sorted(['3-pages.html', 'f%021oo.xyz', 'f%022oo.xyz', 'f%023oo.xyz'])) def test_generate_split_pages_specify_output_filename_no_formatter_no_extension(self): files = execute_pdf2htmlex_and_get_files([ '--split-pages', 1, '--page-filename', 'foo', path_to_test_file('1-page.pdf'), ]) self.assertEquals(files, sorted(['1-page.html', 'foo1'])) def test_generate_single_html_name_specified_format_characters_percent_d(self): files = execute_pdf2htmlex_and_get_files([ path_to_test_file('2-pages.pdf'), 'foo%d.html' ]) self.assertEquals(files, ['foo%d.html']) def test_generate_single_html_name_specified_format_characters_percent_p(self): files = execute_pdf2htmlex_and_get_files([ path_to_test_file('2-pages.pdf'), 'foo%p.html' ]) self.assertEquals(files, ['foo%p.html']) def test_generate_single_html_name_specified_format_characters_percent_n(self): files = execute_pdf2htmlex_and_get_files([ path_to_test_file('2-pages.pdf'), 'foo%n.html' ]) self.assertEquals(files, ['foo%n.html']) def test_generate_single_html_name_specified_format_characters_percent_percent(self): files = execute_pdf2htmlex_and_get_files([ path_to_test_file('2-pages.pdf'), 'foo%%.html' ]) self.assertEquals(files, ['foo%%.html']) if __name__=="__main__": executable = os.path.abspath(PDF2HTMLEX_PATH) if not os.path.isfile(executable) or not os.access(executable, os.X_OK): print >> sys.stderr, "Cannot locate pdf2htmlEX executable. Make sure source was built before running this test." exit(1) unittest.main()