pdf2htmlEX/test/test_naming.py

263 lines
10 KiB
Python

#!/usr/bin/env python
import unittest
import os
import sys
import tempfile
import shutil
import subprocess
# We assume that this file is put inside SRC_DIR/test
TEST_DIR = os.path.dirname(__file__)
# The location where our test PDFs are stored
TEST_DATA_DIR = os.path.join(TEST_DIR, 'test_data')
# The location where the base css file, etc is stored in the build folder
DATA_DIR = os.path.join(TEST_DIR, '../share')
# The script should be run in the directory containing the binary
# The location where the executable is generated by the build
PDF2HTMLEX_PATH = './pdf2htmlEX'
def execute_pdf2htmlex_with_args(args):
"""
Execute the pdf2htmlEX with the specified arguments.
:type args: list of values
:param args: list of arguments to pass to executable. First part of each tuple is the argument, second part is the value.
:rtype: int
:return: The exit code of the command
"""
executable = os.path.abspath(PDF2HTMLEX_PATH)
cmd = [executable, '--data-dir', os.path.abspath(DATA_DIR)]
for val in args:
cmd.append(str(val))
return_code = subprocess.call(cmd)
if return_code != 0:
print >> sys.stderr, "Command return code %d: %s" % (return_code, ' '.join(cmd))
return return_code
def execute_pdf2htmlex_and_get_files(args):
"""
Execute the pdf2htmlEX with the specified arguments, and get the names of the output files. Will automatically create
a temporary directory for the output, pass that as the output dir to pdf2htmlEX, determine the files generated, and
clean up the temporary directory afterwards.
:type args: list of values
:param args: list of arguments to pass to executable. First part of each tuple is the argument, second part is the value.
:rtype: list of str
:return: List of the file names that were generated as output in alphabetical order. None if the command does not execute successfully.
"""
temp_dir = tempfile.mkdtemp()
try:
if execute_pdf2htmlex_with_args(['--dest-dir', temp_dir] + args) != 0:
return None
files = os.listdir(temp_dir)
files.sort()
return files
finally:
shutil.rmtree(path=temp_dir, ignore_errors=True)
def path_to_test_file(filename):
"""
Retrieve an absolute path to the specified test file.
:type filename:
:param filename: the name of the test file to get the path to
:rtype: str
:returns: the full path to the test file
"""
return os.path.abspath(os.path.join(TEST_DATA_DIR, filename))
class OutputNamingTests(unittest.TestCase):
def test_generate_single_html_default_name_single_page_pdf(self):
files = execute_pdf2htmlex_and_get_files([
path_to_test_file('1-page.pdf')
])
self.assertEquals(files, ['1-page.html'])
def test_generate_single_html_default_name_multiple_page_pdf(self):
files = execute_pdf2htmlex_and_get_files([
path_to_test_file('2-pages.pdf')
])
self.assertEquals(files, ['2-pages.html'])
def test_generate_single_html_specify_name_single_page_pdf(self):
files = execute_pdf2htmlex_and_get_files([
path_to_test_file('1-page.pdf'),
'foo.html'
])
self.assertEquals(files, ['foo.html'])
def test_generate_single_html_specify_name_multiple_page_pdf(self):
files = execute_pdf2htmlex_and_get_files([
path_to_test_file('2-pages.pdf'),
'foo.html'
])
self.assertEquals(files, ['foo.html'])
def test_generate_split_pages_default_name_single_page(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
path_to_test_file('1-page.pdf')
])
self.assertEquals(files, sorted(['1-page.html', '1-page1.page']))
def test_generate_split_pages_default_name_multiple_pages(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
path_to_test_file('3-pages.pdf')
])
self.assertEquals(files, sorted(['3-pages.html', '3-pages1.page', '3-pages2.page', '3-pages3.page']))
def test_generate_split_pages_specify_name_single_page(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'foo.xyz',
path_to_test_file('1-page.pdf'),
])
self.assertEquals(files, sorted(['1-page.html', 'foo1.xyz']))
def test_generate_split_pages_specify_name_multiple_pages(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'foo.xyz',
path_to_test_file('3-pages.pdf'),
])
self.assertEquals(files, sorted(['3-pages.html', 'foo1.xyz', 'foo2.xyz', 'foo3.xyz']))
def test_generate_split_pages_specify_name_formatter_multiple_pages(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'fo%do.xyz',
path_to_test_file('3-pages.pdf'),
])
self.assertEquals(files, sorted(['3-pages.html', 'fo1o.xyz', 'fo2o.xyz', 'fo3o.xyz']))
def test_generate_split_pages_specify_name_formatter_with_padded_zeros_multiple_pages(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'fo%03do.xyz',
path_to_test_file('3-pages.pdf')
])
self.assertEquals(files, sorted(['3-pages.html', 'fo001o.xyz', 'fo002o.xyz', 'fo003o.xyz']))
def test_generate_split_pages_specify_name_only_first_formatter_gets_taken(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'f%do%do.xyz',
path_to_test_file('3-pages.pdf')
])
self.assertEquals(files, sorted(['3-pages.html', 'f1o%do.xyz', 'f2o%do.xyz', 'f3o%do.xyz']))
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_s(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'f%soo.xyz',
path_to_test_file('3-pages.pdf')
])
self.assertEquals(files, sorted(['3-pages.html', 'f%soo1.xyz', 'f%soo2.xyz', 'f%soo3.xyz']))
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_p(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'f%poo.xyz',
path_to_test_file('3-pages.pdf'),
])
self.assertEquals(files, sorted(['3-pages.html', 'f%poo1.xyz', 'f%poo2.xyz', 'f%poo3.xyz']))
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_n(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'f%noo.xyz',
path_to_test_file('3-pages.pdf')
])
self.assertEquals(files, sorted(['3-pages.html', 'f%noo1.xyz', 'f%noo2.xyz', 'f%noo3.xyz']))
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_percent(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'f%%oo.xyz',
path_to_test_file('3-pages.pdf')
])
self.assertEquals(files, sorted(['3-pages.html', 'f%%oo1.xyz', 'f%%oo2.xyz', 'f%%oo3.xyz']))
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_percent_with_actual_placeholder(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'f%%o%do.xyz',
path_to_test_file('3-pages.pdf')
])
self.assertEquals(files, sorted(['3-pages.html', 'f%%o1o.xyz', 'f%%o2o.xyz', 'f%%o3o.xyz']))
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_percent_with_actual_placeholder(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'fo%do%%.xyz',
path_to_test_file('3-pages.pdf')
])
self.assertEquals(files, sorted(['3-pages.html', 'fo1o%%.xyz', 'fo2o%%.xyz', 'fo3o%%.xyz']))
def test_generate_split_pages_specify_name_only_formatter_starts_part_way_through_invalid_formatter(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'f%02%doo.xyz',
path_to_test_file('3-pages.pdf'),
])
self.assertEquals(files, sorted(['3-pages.html', 'f%021oo.xyz', 'f%022oo.xyz', 'f%023oo.xyz']))
def test_generate_split_pages_specify_output_filename_no_formatter_no_extension(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'foo',
path_to_test_file('1-page.pdf'),
])
self.assertEquals(files, sorted(['1-page.html', 'foo1']))
def test_generate_single_html_name_specified_format_characters_percent_d(self):
files = execute_pdf2htmlex_and_get_files([
path_to_test_file('2-pages.pdf'),
'foo%d.html'
])
self.assertEquals(files, ['foo%d.html'])
def test_generate_single_html_name_specified_format_characters_percent_p(self):
files = execute_pdf2htmlex_and_get_files([
path_to_test_file('2-pages.pdf'),
'foo%p.html'
])
self.assertEquals(files, ['foo%p.html'])
def test_generate_single_html_name_specified_format_characters_percent_n(self):
files = execute_pdf2htmlex_and_get_files([
path_to_test_file('2-pages.pdf'),
'foo%n.html'
])
self.assertEquals(files, ['foo%n.html'])
def test_generate_single_html_name_specified_format_characters_percent_percent(self):
files = execute_pdf2htmlex_and_get_files([
path_to_test_file('2-pages.pdf'),
'foo%%.html'
])
self.assertEquals(files, ['foo%%.html'])
if __name__=="__main__":
executable = os.path.abspath(PDF2HTMLEX_PATH)
if not os.path.isfile(executable) or not os.access(executable, os.X_OK):
print >> sys.stderr, "Cannot locate pdf2htmlEX executable. Make sure source was built before running this test."
exit(1)
unittest.main()