1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 13:00:08 +00:00

browser_tests now use pre computed html

This commit is contained in:
Stephen Gaito 2020-06-15 14:27:28 +01:00
parent df62ee7f21
commit 16c88c49c4
3 changed files with 98 additions and 5 deletions

View File

@ -37,12 +37,22 @@ class BrowserTests(Common):
ref_htmlfolder = os.path.join(self.TEST_DATA_DIR, basefilename) ref_htmlfolder = os.path.join(self.TEST_DATA_DIR, basefilename)
ref_htmlfilename = os.path.join(ref_htmlfolder, htmlfilename) ref_htmlfilename = os.path.join(ref_htmlfolder, htmlfilename)
out_htmlfilename = os.path.join(self.OUTDIR, htmlfilename) out_htmlfilename = os.path.join(self.OUTDIR, htmlfilename)
pre_htmlfilename = os.path.join(self.PREDIR, htmlfilename)
try:
# see if we have pre-compiled the html file...
# if so simply copy it into place
#
shutil.copy(pre_htmlfilename, out_htmlfilename)
except:
# we have not pre-compiled the html file
# so create it using pdf2htmlEX
#
pdf2htmlEX_args = self.DEFAULT_PDF2HTMLEX_ARGS + args + [ pdf2htmlEX_args = self.DEFAULT_PDF2HTMLEX_ARGS + args + [
os.path.join(self.TEST_DATA_DIR, filename), os.path.join(self.TEST_DATA_DIR, filename),
htmlfilename ] htmlfilename ]
result = self.run_pdf2htmlEX(pdf2htmlEX_args) result = self.run_pdf2htmlEX(pdf2htmlEX_args)
#
self.assertIn(htmlfilename, result['output_files'], 'HTML file is not generated') self.assertIn(htmlfilename, result['output_files'], 'HTML file is not generated')
if self.GENERATING_MODE: if self.GENERATING_MODE:

View File

@ -0,0 +1,82 @@
#!/bin/bash
# This bash script walks through the browser_tests directory running
# pdf2htmlEX on each *.pdf file.
# This is how we run pdf2htmlEX on a particular file, and arguments.
#
function runPdf2htmlEX {
pdfFileName=$1
htmlFileName=$(echo $pdfFileName | cut -d'.' -f1).html
arguments=$2
#
echo ""
echo "---"
echo " pdfFileName: [$pdfFileName]"
echo "htmlFileName: [$htmlFileName]"
echo " arguments: [$arguments]"
#
# now run pdf2htmlEX to produce the output files
#
echo $PDF2HTMLEX_PATH \
--data-dir=$PDF2HTMLEX_DATDIR \
--dest-dir $PDF2HTMLEX_TMPDIR \
--fit-width=800 --last-page=1 \
$arguments \
browser_tests/$pdfFileName \
$htmlFileName
#
$PDF2HTMLEX_PATH \
--data-dir=$PDF2HTMLEX_DATDIR \
--dest-dir $PDF2HTMLEX_TMPDIR \
--fit-width=800 --last-page=1 \
$arguments \
browser_tests/$pdfFileName \
$htmlFileName
}
if test -z "$PDF2HTMLEX_PATH" ; then
echo "PANIC: we do not know where to find the pdf2htmlEX executable"
exit 1
fi
if test -z "$PDF2HTMLEX_DATDIR" ; then
export PDF2HTMLEX_DATDIR=/tmp/pdf2htmlEX/dat
fi
if test -z "$PDF2HTMLEX_TMPDIR" ; then
export PDF2HTMLEX_TMPDIR=/tmp/pdf2htmlEX/tmp
fi
if test -z "$PDF2HTMLEX_PREDIR" ; then
export PDF2HTMLEX_PREDIR=/tmp/pdf2htmlEX/pre
fi
# clear out the TMPDIR
#
rm -rf $PDF2HTMLEX_TMPDIR
mkdir -p $PDF2HTMLEX_TMPDIR
#
runPdf2htmlEX 'test_fail.pdf'
runPdf2htmlEX 'basic_text.pdf'
runPdf2htmlEX 'geneve_1564.pdf'
runPdf2htmlEX 'text_visibility.pdf' '--correct-text-visibility=1'
runPdf2htmlEX 'with_form.pdf' '--process-form=1'
runPdf2htmlEX 'invalid_unicode_issue477.pdf'
runPdf2htmlEX 'svg_background_with_page_rotation_issue402.pdf' '--bg-format=svg'
runPdf2htmlEX 'fontfile3_opentype.pdf'
# clear out the PREDIR
#
rm -rf $PDF2HTMLEX_PREDIR
mkdir -p $PDF2HTMLEX_PREDIR
#
cp $PDF2HTMLEX_TMPDIR/* $PDF2HTMLEX_PREDIR

View File

@ -28,6 +28,7 @@ class Common(object):
PNGDIR = "@PDF2HTMLEX_PNGDIR@" PNGDIR = "@PDF2HTMLEX_PNGDIR@"
DATDIR = "@PDF2HTMLEX_DATDIR@" DATDIR = "@PDF2HTMLEX_DATDIR@"
OUTDIR = "@PDF2HTMLEX_OUTDIR@" OUTDIR = "@PDF2HTMLEX_OUTDIR@"
PREDIR = "@PDF2HTMLEX_PREDIR@"
HTMDIR = "@PDF2HTMLEX_HTMDIR@" HTMDIR = "@PDF2HTMLEX_HTMDIR@"
def setUp(self): def setUp(self):