diff --git a/pdf2htmlEX/test/browser_tests.py b/pdf2htmlEX/test/browser_tests.py index 93906ad..b839692 100644 --- a/pdf2htmlEX/test/browser_tests.py +++ b/pdf2htmlEX/test/browser_tests.py @@ -37,13 +37,23 @@ class BrowserTests(Common): ref_htmlfolder = os.path.join(self.TEST_DATA_DIR, basefilename) ref_htmlfilename = os.path.join(ref_htmlfolder, htmlfilename) out_htmlfilename = os.path.join(self.OUTDIR, htmlfilename) - - pdf2htmlEX_args = self.DEFAULT_PDF2HTMLEX_ARGS + args + [ + pre_htmlfilename = os.path.join(self.PREDIR, htmlfilename) + + try: + # see if we have pre-compiled the html file... + # if so simply copy it into place + # + shutil.copy(pre_htmlfilename, out_htmlfilename) + except: + # we have not pre-compiled the html file + # so create it using pdf2htmlEX + # + pdf2htmlEX_args = self.DEFAULT_PDF2HTMLEX_ARGS + args + [ os.path.join(self.TEST_DATA_DIR, filename), htmlfilename ] - result = self.run_pdf2htmlEX(pdf2htmlEX_args) - - self.assertIn(htmlfilename, result['output_files'], 'HTML file is not generated') + result = self.run_pdf2htmlEX(pdf2htmlEX_args) + # + self.assertIn(htmlfilename, result['output_files'], 'HTML file is not generated') if self.GENERATING_MODE: # copy generated html files diff --git a/pdf2htmlEX/test/produceHtmlForBrowserTests b/pdf2htmlEX/test/produceHtmlForBrowserTests new file mode 100755 index 0000000..3ef8f0f --- /dev/null +++ b/pdf2htmlEX/test/produceHtmlForBrowserTests @@ -0,0 +1,82 @@ +#!/bin/bash + +# This bash script walks through the browser_tests directory running +# pdf2htmlEX on each *.pdf file. + +# This is how we run pdf2htmlEX on a particular file, and arguments. +# +function runPdf2htmlEX { + pdfFileName=$1 + htmlFileName=$(echo $pdfFileName | cut -d'.' -f1).html + arguments=$2 + # + echo "" + echo "---" + echo " pdfFileName: [$pdfFileName]" + echo "htmlFileName: [$htmlFileName]" + echo " arguments: [$arguments]" + # + # now run pdf2htmlEX to produce the output files + # + echo $PDF2HTMLEX_PATH \ + --data-dir=$PDF2HTMLEX_DATDIR \ + --dest-dir $PDF2HTMLEX_TMPDIR \ + --fit-width=800 --last-page=1 \ + $arguments \ + browser_tests/$pdfFileName \ + $htmlFileName + # + $PDF2HTMLEX_PATH \ + --data-dir=$PDF2HTMLEX_DATDIR \ + --dest-dir $PDF2HTMLEX_TMPDIR \ + --fit-width=800 --last-page=1 \ + $arguments \ + browser_tests/$pdfFileName \ + $htmlFileName +} + +if test -z "$PDF2HTMLEX_PATH" ; then + echo "PANIC: we do not know where to find the pdf2htmlEX executable" + exit 1 +fi + +if test -z "$PDF2HTMLEX_DATDIR" ; then + export PDF2HTMLEX_DATDIR=/tmp/pdf2htmlEX/dat +fi + +if test -z "$PDF2HTMLEX_TMPDIR" ; then + export PDF2HTMLEX_TMPDIR=/tmp/pdf2htmlEX/tmp +fi + +if test -z "$PDF2HTMLEX_PREDIR" ; then + export PDF2HTMLEX_PREDIR=/tmp/pdf2htmlEX/pre +fi + +# clear out the TMPDIR +# +rm -rf $PDF2HTMLEX_TMPDIR +mkdir -p $PDF2HTMLEX_TMPDIR +# + +runPdf2htmlEX 'test_fail.pdf' + +runPdf2htmlEX 'basic_text.pdf' + +runPdf2htmlEX 'geneve_1564.pdf' + +runPdf2htmlEX 'text_visibility.pdf' '--correct-text-visibility=1' + +runPdf2htmlEX 'with_form.pdf' '--process-form=1' + +runPdf2htmlEX 'invalid_unicode_issue477.pdf' + +runPdf2htmlEX 'svg_background_with_page_rotation_issue402.pdf' '--bg-format=svg' + +runPdf2htmlEX 'fontfile3_opentype.pdf' + +# clear out the PREDIR +# +rm -rf $PDF2HTMLEX_PREDIR +mkdir -p $PDF2HTMLEX_PREDIR +# +cp $PDF2HTMLEX_TMPDIR/* $PDF2HTMLEX_PREDIR diff --git a/pdf2htmlEX/test/test.py.in b/pdf2htmlEX/test/test.py.in index 63f9983..8748d64 100755 --- a/pdf2htmlEX/test/test.py.in +++ b/pdf2htmlEX/test/test.py.in @@ -28,6 +28,7 @@ class Common(object): PNGDIR = "@PDF2HTMLEX_PNGDIR@" DATDIR = "@PDF2HTMLEX_DATDIR@" OUTDIR = "@PDF2HTMLEX_OUTDIR@" + PREDIR = "@PDF2HTMLEX_PREDIR@" HTMDIR = "@PDF2HTMLEX_HTMDIR@" def setUp(self):