mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 04:50:09 +00:00
263 lines
10 KiB
Python
263 lines
10 KiB
Python
#!/usr/bin/env python
|
|
|
|
import unittest
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
import shutil
|
|
import subprocess
|
|
|
|
# We assume that this file is put inside SRC_DIR/test
|
|
TEST_DIR = os.path.dirname(__file__)
|
|
# The location where our test PDFs are stored
|
|
TEST_DATA_DIR = os.path.join(TEST_DIR, 'test_data')
|
|
# The location where the base css file, etc is stored in the build folder
|
|
DATA_DIR = os.path.join(TEST_DIR, '../share')
|
|
|
|
# The script should be run in the directory containing the binary
|
|
# The location where the executable is generated by the build
|
|
PDF2HTMLEX_PATH = './pdf2htmlEX'
|
|
|
|
|
|
def execute_pdf2htmlex_with_args(args):
|
|
"""
|
|
Execute the pdf2htmlEX with the specified arguments.
|
|
|
|
:type args: list of values
|
|
:param args: list of arguments to pass to executable. First part of each tuple is the argument, second part is the value.
|
|
|
|
:rtype: int
|
|
:return: The exit code of the command
|
|
"""
|
|
executable = os.path.abspath(PDF2HTMLEX_PATH)
|
|
|
|
cmd = [executable, '--data-dir', os.path.abspath(DATA_DIR)]
|
|
|
|
for val in args:
|
|
cmd.append(str(val))
|
|
|
|
return_code = subprocess.call(cmd)
|
|
|
|
if return_code != 0:
|
|
print >> sys.stderr, "Command return code %d: %s" % (return_code, ' '.join(cmd))
|
|
|
|
return return_code
|
|
|
|
def execute_pdf2htmlex_and_get_files(args):
|
|
"""
|
|
Execute the pdf2htmlEX with the specified arguments, and get the names of the output files. Will automatically create
|
|
a temporary directory for the output, pass that as the output dir to pdf2htmlEX, determine the files generated, and
|
|
clean up the temporary directory afterwards.
|
|
|
|
:type args: list of values
|
|
:param args: list of arguments to pass to executable. First part of each tuple is the argument, second part is the value.
|
|
|
|
:rtype: list of str
|
|
:return: List of the file names that were generated as output in alphabetical order. None if the command does not execute successfully.
|
|
"""
|
|
temp_dir = tempfile.mkdtemp()
|
|
|
|
try:
|
|
if execute_pdf2htmlex_with_args(['--dest-dir', temp_dir] + args) != 0:
|
|
return None
|
|
|
|
files = os.listdir(temp_dir)
|
|
files.sort()
|
|
return files
|
|
finally:
|
|
shutil.rmtree(path=temp_dir, ignore_errors=True)
|
|
|
|
def path_to_test_file(filename):
|
|
"""
|
|
Retrieve an absolute path to the specified test file.
|
|
|
|
:type filename:
|
|
:param filename: the name of the test file to get the path to
|
|
|
|
:rtype: str
|
|
:returns: the full path to the test file
|
|
"""
|
|
return os.path.abspath(os.path.join(TEST_DATA_DIR, filename))
|
|
|
|
class OutputNamingTests(unittest.TestCase):
|
|
def test_generate_single_html_default_name_single_page_pdf(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
path_to_test_file('1-page.pdf')
|
|
])
|
|
self.assertEquals(files, ['1-page.html'])
|
|
|
|
def test_generate_single_html_default_name_multiple_page_pdf(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
path_to_test_file('2-pages.pdf')
|
|
])
|
|
self.assertEquals(files, ['2-pages.html'])
|
|
|
|
def test_generate_single_html_specify_name_single_page_pdf(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
path_to_test_file('1-page.pdf'),
|
|
'foo.html'
|
|
])
|
|
self.assertEquals(files, ['foo.html'])
|
|
|
|
def test_generate_single_html_specify_name_multiple_page_pdf(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
path_to_test_file('2-pages.pdf'),
|
|
'foo.html'
|
|
])
|
|
self.assertEquals(files, ['foo.html'])
|
|
|
|
def test_generate_split_pages_default_name_single_page(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
'--split-pages', 1,
|
|
path_to_test_file('1-page.pdf')
|
|
])
|
|
self.assertEquals(files, sorted(['1-page.html', '1-page1.page']))
|
|
|
|
def test_generate_split_pages_default_name_multiple_pages(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
'--split-pages', 1,
|
|
path_to_test_file('3-pages.pdf')
|
|
])
|
|
self.assertEquals(files, sorted(['3-pages.html', '3-pages1.page', '3-pages2.page', '3-pages3.page']))
|
|
|
|
def test_generate_split_pages_specify_name_single_page(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
'--split-pages', 1,
|
|
'--page-filename', 'foo.xyz',
|
|
path_to_test_file('1-page.pdf'),
|
|
])
|
|
self.assertEquals(files, sorted(['1-page.html', 'foo1.xyz']))
|
|
|
|
def test_generate_split_pages_specify_name_multiple_pages(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
'--split-pages', 1,
|
|
'--page-filename', 'foo.xyz',
|
|
path_to_test_file('3-pages.pdf'),
|
|
])
|
|
self.assertEquals(files, sorted(['3-pages.html', 'foo1.xyz', 'foo2.xyz', 'foo3.xyz']))
|
|
|
|
def test_generate_split_pages_specify_name_formatter_multiple_pages(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
'--split-pages', 1,
|
|
'--page-filename', 'fo%do.xyz',
|
|
path_to_test_file('3-pages.pdf'),
|
|
])
|
|
self.assertEquals(files, sorted(['3-pages.html', 'fo1o.xyz', 'fo2o.xyz', 'fo3o.xyz']))
|
|
|
|
def test_generate_split_pages_specify_name_formatter_with_padded_zeros_multiple_pages(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
'--split-pages', 1,
|
|
'--page-filename', 'fo%03do.xyz',
|
|
path_to_test_file('3-pages.pdf')
|
|
])
|
|
self.assertEquals(files, sorted(['3-pages.html', 'fo001o.xyz', 'fo002o.xyz', 'fo003o.xyz']))
|
|
|
|
def test_generate_split_pages_specify_name_only_first_formatter_gets_taken(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
'--split-pages', 1,
|
|
'--page-filename', 'f%do%do.xyz',
|
|
path_to_test_file('3-pages.pdf')
|
|
])
|
|
self.assertEquals(files, sorted(['3-pages.html', 'f1o%do.xyz', 'f2o%do.xyz', 'f3o%do.xyz']))
|
|
|
|
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_s(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
'--split-pages', 1,
|
|
'--page-filename', 'f%soo.xyz',
|
|
path_to_test_file('3-pages.pdf')
|
|
])
|
|
self.assertEquals(files, sorted(['3-pages.html', 'f%soo1.xyz', 'f%soo2.xyz', 'f%soo3.xyz']))
|
|
|
|
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_p(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
'--split-pages', 1,
|
|
'--page-filename', 'f%poo.xyz',
|
|
path_to_test_file('3-pages.pdf'),
|
|
])
|
|
self.assertEquals(files, sorted(['3-pages.html', 'f%poo1.xyz', 'f%poo2.xyz', 'f%poo3.xyz']))
|
|
|
|
|
|
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_n(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
'--split-pages', 1,
|
|
'--page-filename', 'f%noo.xyz',
|
|
path_to_test_file('3-pages.pdf')
|
|
])
|
|
self.assertEquals(files, sorted(['3-pages.html', 'f%noo1.xyz', 'f%noo2.xyz', 'f%noo3.xyz']))
|
|
|
|
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_percent(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
'--split-pages', 1,
|
|
'--page-filename', 'f%%oo.xyz',
|
|
path_to_test_file('3-pages.pdf')
|
|
])
|
|
self.assertEquals(files, sorted(['3-pages.html', 'f%%oo1.xyz', 'f%%oo2.xyz', 'f%%oo3.xyz']))
|
|
|
|
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_percent_with_actual_placeholder(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
'--split-pages', 1,
|
|
'--page-filename', 'f%%o%do.xyz',
|
|
path_to_test_file('3-pages.pdf')
|
|
])
|
|
self.assertEquals(files, sorted(['3-pages.html', 'f%%o1o.xyz', 'f%%o2o.xyz', 'f%%o3o.xyz']))
|
|
|
|
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_percent_with_actual_placeholder(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
'--split-pages', 1,
|
|
'--page-filename', 'fo%do%%.xyz',
|
|
path_to_test_file('3-pages.pdf')
|
|
])
|
|
self.assertEquals(files, sorted(['3-pages.html', 'fo1o%%.xyz', 'fo2o%%.xyz', 'fo3o%%.xyz']))
|
|
|
|
def test_generate_split_pages_specify_name_only_formatter_starts_part_way_through_invalid_formatter(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
'--split-pages', 1,
|
|
'--page-filename', 'f%02%doo.xyz',
|
|
path_to_test_file('3-pages.pdf'),
|
|
])
|
|
self.assertEquals(files, sorted(['3-pages.html', 'f%021oo.xyz', 'f%022oo.xyz', 'f%023oo.xyz']))
|
|
|
|
def test_generate_split_pages_specify_output_filename_no_formatter_no_extension(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
'--split-pages', 1,
|
|
'--page-filename', 'foo',
|
|
path_to_test_file('1-page.pdf'),
|
|
])
|
|
self.assertEquals(files, sorted(['1-page.html', 'foo1']))
|
|
|
|
def test_generate_single_html_name_specified_format_characters_percent_d(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
path_to_test_file('2-pages.pdf'),
|
|
'foo%d.html'
|
|
])
|
|
self.assertEquals(files, ['foo%d.html'])
|
|
|
|
def test_generate_single_html_name_specified_format_characters_percent_p(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
path_to_test_file('2-pages.pdf'),
|
|
'foo%p.html'
|
|
])
|
|
self.assertEquals(files, ['foo%p.html'])
|
|
|
|
def test_generate_single_html_name_specified_format_characters_percent_n(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
path_to_test_file('2-pages.pdf'),
|
|
'foo%n.html'
|
|
])
|
|
self.assertEquals(files, ['foo%n.html'])
|
|
|
|
def test_generate_single_html_name_specified_format_characters_percent_percent(self):
|
|
files = execute_pdf2htmlex_and_get_files([
|
|
path_to_test_file('2-pages.pdf'),
|
|
'foo%%.html'
|
|
])
|
|
self.assertEquals(files, ['foo%%.html'])
|
|
|
|
if __name__=="__main__":
|
|
executable = os.path.abspath(PDF2HTMLEX_PATH)
|
|
if not os.path.isfile(executable) or not os.access(executable, os.X_OK):
|
|
print >> sys.stderr, "Cannot locate pdf2htmlEX executable. Make sure source was built before running this test."
|
|
exit(1)
|
|
|
|
unittest.main()
|