diff --git a/lib/base.css b/lib/base.css new file mode 100644 index 0000000..5317c33 --- /dev/null +++ b/lib/base.css @@ -0,0 +1,36 @@ +#pdf-main { + font-family: sans-serif; + position:absolute; + top:0; + left:0; + bottom:0; + right:0; + overflow:auto; + background-color:grey; + /* for Chrome & Safari */ + -webkit-text-stroke-width:0.2px; +} +#pdf-main > .p { + position:relative; + margin:13px auto; + background-color:white; + overflow:hidden; + display:none; +} +.p > .l { + position:absolute; + white-space:pre; +} +.l > .w { + display:inline-block; + font-family: monospace; +} +::selection{ + background: rgba(168,209,255,0.5); +} +::-moz-selection{ + background: rgba(168,209,255,0.5); +} +.p > .i { + position:absolute; +} diff --git a/lib/head.html b/lib/head.html index c96ff9c..5fbf4d1 100644 --- a/lib/head.html +++ b/lib/head.html @@ -5,62 +5,3 @@ - - - - - -
diff --git a/lib/neck.html b/lib/neck.html new file mode 100644 index 0000000..7fffe8e --- /dev/null +++ b/lib/neck.html @@ -0,0 +1,20 @@ + + + +
diff --git a/src/HTMLRenderer.h b/src/HTMLRenderer.h index 98b39ce..8fa5cd3 100644 --- a/src/HTMLRenderer.h +++ b/src/HTMLRenderer.h @@ -56,8 +56,11 @@ class HTMLRenderer : public OutputDev // Does this device need non-text content? virtual GBool needNonText() { return gFalse; } - virtual void write_html_head(); - virtual void write_html_tail(); + virtual void pre_process(); + virtual void post_process(); + virtual void process_single_html(); + + virtual boost::filesystem::path working_dir() const { return (param->single_html ? tmp_dir : dest_dir); } // Start a page. virtual void startPage(int pageNum, GfxState *state); diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index d0b9a96..0102686 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -9,10 +9,7 @@ * 2012.08.14 */ -#include - -#include -#include +#include #include @@ -21,15 +18,14 @@ #include "config.h" #include "namespace.h" +using std::flush; + HTMLRenderer::HTMLRenderer(const Param * param) :line_opened(false) ,image_count(0) ,param(param) ,dest_dir(param->dest_dir) ,tmp_dir(TMP_DIR) - ,html_fout(dest_dir / param->output_filename, ofstream::binary) // we may output utf8 characters, so use binary - ,allcss_fout(dest_dir / "all.css", ofstream::binary) - ,fontscript_fout(tmp_dir / "convert.pe", ofstream::binary) { // install default font & size install_font(nullptr); @@ -47,55 +43,80 @@ HTMLRenderer::~HTMLRenderer() void HTMLRenderer::process(PDFDoc *doc) { - cerr << "Processing Text: "; - write_html_head(); - xref = doc->getXRef(); - for(int i = param->first_page; i <= param->last_page ; ++i) - { - doc->displayPage(this, i, param->h_dpi, param->v_dpi, - 0, true, false, false, - nullptr, nullptr, nullptr, nullptr); + cerr << "Working: "; - cerr << "."; - cerr.flush(); - } - write_html_tail(); - cerr << endl; + xref = doc->getXRef(); + + BackgroundRenderer * bg_renderer = nullptr; if(param->process_nontext) { // Render non-text objects as image - cerr << "Processing Others: "; // copied from poppler SplashColor color; color[0] = color[1] = color[2] = 255; - auto bg_renderer = new BackgroundRenderer(splashModeRGB8, 4, gFalse, color); + bg_renderer = new BackgroundRenderer(splashModeRGB8, 4, gFalse, color); bg_renderer->startDoc(doc); + } - for(int i = param->first_page; i <= param->last_page ; ++i) + pre_process(); + for(int i = param->first_page; i <= param->last_page ; ++i) + { + if(param->process_nontext) { doc->displayPage(bg_renderer, i, param->h_dpi2, param->v_dpi2, 0, true, false, false, nullptr, nullptr, nullptr, nullptr); - bg_renderer->getBitmap()->writeImgFile(splashFormatPng, (char*)(dest_dir / (format("p%|1$x|.png")%i).str()).c_str(), param->h_dpi2, param->v_dpi2); - - cerr << "."; - cerr.flush(); + bg_renderer->getBitmap()->writeImgFile(splashFormatPng, (char*)(working_dir() / (format("p%|1$x|.png")%i).str()).c_str(), param->h_dpi2, param->v_dpi2); } - delete bg_renderer; - cerr << endl; + + + doc->displayPage(this, i, param->h_dpi, param->v_dpi, + 0, true, false, false, + nullptr, nullptr, nullptr, nullptr); + + cerr << "." << flush; } + post_process(); + + if(bg_renderer) + delete bg_renderer; + + cerr << endl; } -void HTMLRenderer::write_html_head() +void HTMLRenderer::pre_process() { - html_fout << ifstream(PDF2HTMLEX_LIB_PATH / "head.html", ifstream::binary).rdbuf(); + html_fout.open(working_dir() / param->output_filename, ofstream::binary); // we may output utf8 characters, so use binary + allcss_fout.open(working_dir() / "all.css", ofstream::binary); + fontscript_fout.open(tmp_dir / "pdf2htmlEX.pe", ofstream::binary); + + if(!param->single_html) + { + html_fout << ifstream(PDF2HTMLEX_LIB_PATH / "head.html", ifstream::binary).rdbuf(); + html_fout << "" << endl; + html_fout << ifstream(PDF2HTMLEX_LIB_PATH / "neck.html", ifstream::binary).rdbuf(); + } + + allcss_fout << ifstream(PDF2HTMLEX_LIB_PATH / "base.css", ifstream::binary).rdbuf(); } -void HTMLRenderer::write_html_tail() +void HTMLRenderer::post_process() { - html_fout << ifstream(PDF2HTMLEX_LIB_PATH / "tail.html", ifstream::binary).rdbuf(); + if(!param->single_html) + { + html_fout << ifstream(PDF2HTMLEX_LIB_PATH / "tail.html", ifstream::binary).rdbuf(); + } + + html_fout.close(); + allcss_fout.close(); + fontscript_fout.close(); + + if(param->single_html) + { + process_single_html(); + } } void HTMLRenderer::startPage(int pageNum, GfxState *state) @@ -133,7 +154,22 @@ void HTMLRenderer::endPage() { html_fout << "
" << endl; } - +void HTMLRenderer::process_single_html() +{ + ofstream out (dest_dir / param->output_filename, ofstream::binary); + + out << ifstream(PDF2HTMLEX_LIB_PATH / "head.html", ifstream::binary).rdbuf(); + + out << "" << endl; + + out << ifstream(PDF2HTMLEX_LIB_PATH / "neck.html", ifstream::binary).rdbuf(); + + out << ifstream(tmp_dir / param->output_filename, ifstream::binary).rdbuf(); + + out << ifstream(PDF2HTMLEX_LIB_PATH / "tail.html", ifstream::binary).rdbuf(); +} diff --git a/src/Param.h b/src/Param.h index b304da1..9e7dafc 100644 --- a/src/Param.h +++ b/src/Param.h @@ -27,6 +27,8 @@ struct Param int process_nontext; int debug; + + int single_html; }; diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 35b8079..a25dc34 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -132,6 +132,7 @@ po::variables_map parse_options (int argc, char **argv) ("veps", po::value(¶m.v_eps)->default_value(1.0), "max tolerated vertical offset (in pixels)") ("process-nontext", po::value(¶m.process_nontext)->default_value(1), "process nontext objects") ("debug", po::value(¶m.debug)->default_value(0), "output debug information") + ("single-html", po::value(¶m.single_html)->default_value(0), "combine everything into one single HTML file") ; opt_hidden.add_options()