From ceea8193d851c76fbb53ef997844e532550b2806 Mon Sep 17 00:00:00 2001 From: Duan Yao Date: Sat, 7 Jun 2014 00:51:19 +0800 Subject: [PATCH 01/10] Fix XHR failure for file: protocol. --- share/pdf2htmlEX.js.in | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/share/pdf2htmlEX.js.in b/share/pdf2htmlEX.js.in index 25352fe..f21d3df 100644 --- a/share/pdf2htmlEX.js.in +++ b/share/pdf2htmlEX.js.in @@ -372,9 +372,8 @@ Viewer.prototype = { var _idx = idx; var xhr = new XMLHttpRequest(); xhr.open('GET', url, true); - xhr.onreadystatechange = function(){ - if (xhr.readyState != 4) return; - if (xhr.status === 200) { + xhr.onload = function(){ + if (xhr.status === 200 || xhr.status === 0) { // find the page element in the data var div = document.createElement('div'); div.innerHTML = xhr.responseText; From 4865d3aa5c7ad2b67d9c2da0e55121316962320c Mon Sep 17 00:00:00 2001 From: Duan Yao Date: Sat, 7 Jun 2014 16:20:20 +0800 Subject: [PATCH 02/10] Add Duan Yao to AUTHORS --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 95f0be3..ed7cc51 100644 --- a/AUTHORS +++ b/AUTHORS @@ -6,6 +6,7 @@ Chris Cinelli Daniel Bonniot de Ruisselet Deepak Denis Sablic +Duan Yao filodej hasufell Herbert Jones From 82b0569c06d1d0dd9fe8e6ab2801cff1dde4bd8c Mon Sep 17 00:00:00 2001 From: Duan Yao Date: Sat, 7 Jun 2014 02:01:10 +0800 Subject: [PATCH 03/10] Add svg-nodes-limit option to avoid overly complicated svg background. --- .../CairoBackgroundRenderer.cc | 57 +++++++++++++++++-- .../CairoBackgroundRenderer.h | 11 ++-- .../SplashBackgroundRenderer.cc | 47 +++++++++------ .../SplashBackgroundRenderer.h | 7 +-- src/Param.h | 1 + src/pdf2htmlEX.cc | 3 +- 6 files changed, 92 insertions(+), 34 deletions(-) diff --git a/src/BackgroundRenderer/CairoBackgroundRenderer.cc b/src/BackgroundRenderer/CairoBackgroundRenderer.cc index 074e4c1..c92703b 100644 --- a/src/BackgroundRenderer/CairoBackgroundRenderer.cc +++ b/src/BackgroundRenderer/CairoBackgroundRenderer.cc @@ -15,12 +15,28 @@ #if ENABLE_SVG #include "CairoBackgroundRenderer.h" +#include "SplashBackgroundRenderer.h" namespace pdf2htmlEX { using std::string; using std::ifstream; +CairoBackgroundRenderer::CairoBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param) + : CairoOutputDev() + , html_renderer(html_renderer) + , param(param) + , surface(nullptr) + , use_bitmap(false) +{ + if (param.svg_nodes_limit > 0) + { + this->bitmap_renderer = new SplashBackgroundRenderer(html_renderer, param); + } + else + this->bitmap_renderer = nullptr; +} + void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, @@ -46,6 +62,8 @@ void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y, void CairoBackgroundRenderer::init(PDFDoc * doc) { startDoc(doc); + if (this->bitmap_renderer != nullptr) + this->bitmap_renderer->init(doc); } static GBool annot_cb(Annot *, void * pflag) { @@ -67,13 +85,11 @@ void CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) page_height = doc->getPageMediaHeight(pageno); } - { - auto fn = html_renderer->str_fmt("%s/bg%x.svg", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno); - if(param.embed_image) - html_renderer->tmp_files.add((char*)fn); + auto fn = html_renderer->str_fmt("%s/bg%x.svg", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno); + if(param.embed_image) + html_renderer->tmp_files.add((char*)fn); - surface = cairo_svg_surface_create((char*)fn, page_width * param.h_dpi / DEFAULT_DPI, page_height * param.v_dpi / DEFAULT_DPI); - } + surface = cairo_svg_surface_create((char*)fn, page_width * param.h_dpi / DEFAULT_DPI, page_height * param.v_dpi / DEFAULT_DPI); cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2); cairo_surface_set_fallback_resolution(surface, param.h_dpi, param.v_dpi); @@ -105,10 +121,39 @@ void CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) if(status) throw string("Error in cairo: ") + cairo_status_to_string(status); } + + //check node count in the svg file, fall back to bitmap_renderer if necessary. + if (param.svg_nodes_limit > 0) + { + int n = 0; + char c; + ifstream svgfile((char*)fn); + //count of '<' in the file should be an approximation of node count. + while(svgfile >> c) + { + if (c == '<') + ++n; + } + svgfile.close(); + if (n > param.svg_nodes_limit) + { + html_renderer->tmp_files.add((char*)fn); + use_bitmap = true; + bitmap_renderer->render_page(doc, pageno); + } + else + use_bitmap = false; + } } void CairoBackgroundRenderer::embed_image(int pageno) { + if (use_bitmap) + { + bitmap_renderer->embed_image(pageno); + return; + } + auto & f_page = *(html_renderer->f_curpage); f_page << "str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, param.bg_format.c_str()); + auto fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str()); if(param.embed_image) html_renderer->tmp_files.add((char*)fn); @@ -118,21 +142,21 @@ void SplashBackgroundRenderer::embed_image(int pageno) if(param.embed_image) { - auto path = html_renderer->str_fmt("%s/bg%x.%s", param.tmp_dir.c_str(), pageno, param.bg_format.c_str()); + auto path = html_renderer->str_fmt("%s/bg%x.%s", param.tmp_dir.c_str(), pageno, format.c_str()); ifstream fin((char*)path, ifstream::binary); if(!fin) throw string("Cannot read background image ") + (char*)path; - auto iter = FORMAT_MIME_TYPE_MAP.find(param.bg_format); + auto iter = FORMAT_MIME_TYPE_MAP.find(format); if(iter == FORMAT_MIME_TYPE_MAP.end()) - throw string("Image format not supported: ") + param.bg_format; + throw string("Image format not supported: ") + format; string mime_type = iter->second; f_page << "data:" << mime_type << ";base64," << Base64Stream(fin); } else { - f_page << (char*)html_renderer->str_fmt("bg%x.%s", pageno, param.bg_format.c_str()); + f_page << (char*)html_renderer->str_fmt("bg%x.%s", pageno, format.c_str()); } f_page << "\"/>"; } @@ -153,23 +177,14 @@ void SplashBackgroundRenderer::dump_image(const char * filename, int x1, int y1, // use unique_ptr to auto delete the object upon exception unique_ptr writer; - if(false) { } -#ifdef ENABLE_LIBPNG - else if(param.bg_format == "png") + if(format == "png") { writer = unique_ptr(new PNGWriter); } -#endif -#ifdef ENABLE_LIBJPEG - else if(param.bg_format == "jpg") + else if(format == "jpg") { writer = unique_ptr(new JpegWriter); } -#endif - else - { - throw string("Image format not supported: ") + param.bg_format; - } if(!writer->init(f, width, height, param.h_dpi, param.v_dpi)) throw "Cannot initialize PNGWriter"; diff --git a/src/BackgroundRenderer/SplashBackgroundRenderer.h b/src/BackgroundRenderer/SplashBackgroundRenderer.h index e999a10..55b9a97 100644 --- a/src/BackgroundRenderer/SplashBackgroundRenderer.h +++ b/src/BackgroundRenderer/SplashBackgroundRenderer.h @@ -27,11 +27,7 @@ class SplashBackgroundRenderer : public BackgroundRenderer, SplashOutputDev public: static const SplashColor white; - SplashBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param) - : SplashOutputDev(splashModeRGB8, 4, gFalse, (SplashColorPtr)(&white), gTrue, gTrue) - , html_renderer(html_renderer) - , param(param) - { } + SplashBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param); virtual ~SplashBackgroundRenderer() { } @@ -68,6 +64,7 @@ protected: void dump_image(const char * filename, int x1, int y1, int x2, int y2); HTMLRenderer * html_renderer; const Param & param; + std::string format; }; } // namespace pdf2htmlEX diff --git a/src/Param.h b/src/Param.h index 9d42620..806f837 100644 --- a/src/Param.h +++ b/src/Param.h @@ -63,6 +63,7 @@ struct Param // background image std::string bg_format; + int svg_nodes_limit = 0; // encryption std::string owner_password, user_password; diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 23e8d73..0df3d30 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -190,7 +190,8 @@ void parse_options (int argc, char **argv) // background image .add("bg-format", ¶m.bg_format, "png", "specify background image format") - + .add("svg-nodes-limit", ¶m.svg_nodes_limit, 0, "if node count in a svg background image exceeds this limit," + " fall back to bitmap background. 0 or negative means no limit.") // encryption .add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", true) .add("user-password,u", ¶m.user_password, "", "user password (for encrypted files)", true) From 1764c1c33439ed02b8740705b81ab074b55bcd51 Mon Sep 17 00:00:00 2001 From: Duan Yao Date: Sat, 7 Jun 2014 09:02:51 +0800 Subject: [PATCH 04/10] fix travis build --- src/Param.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Param.h b/src/Param.h index 806f837..8d19361 100644 --- a/src/Param.h +++ b/src/Param.h @@ -63,7 +63,7 @@ struct Param // background image std::string bg_format; - int svg_nodes_limit = 0; + int svg_nodes_limit; // encryption std::string owner_password, user_password; From 4add9da6e4f06c793c4177b2f8a48bd28b53e1ec Mon Sep 17 00:00:00 2001 From: Duan Yao Date: Sun, 8 Jun 2014 12:43:20 +0800 Subject: [PATCH 05/10] Move BackgroundRenderer fallback logic to HTMLRenderer. --- src/BackgroundRenderer/BackgroundRenderer.cc | 22 ++++---- src/BackgroundRenderer/BackgroundRenderer.h | 6 ++- .../CairoBackgroundRenderer.cc | 52 ++++++------------- .../CairoBackgroundRenderer.h | 6 +-- .../SplashBackgroundRenderer.cc | 22 ++++---- .../SplashBackgroundRenderer.h | 6 +-- src/HTMLRenderer/HTMLRenderer.h | 3 +- src/HTMLRenderer/general.cc | 19 ++++++- 8 files changed, 66 insertions(+), 70 deletions(-) diff --git a/src/BackgroundRenderer/BackgroundRenderer.cc b/src/BackgroundRenderer/BackgroundRenderer.cc index e6cf59c..164e766 100644 --- a/src/BackgroundRenderer/BackgroundRenderer.cc +++ b/src/BackgroundRenderer/BackgroundRenderer.cc @@ -20,18 +20,9 @@ namespace pdf2htmlEX { BackgroundRenderer * BackgroundRenderer::getBackgroundRenderer(const std::string & format, HTMLRenderer * html_renderer, const Param & param) { -#ifdef ENABLE_LIBPNG - if(format == "png") - { - return new SplashBackgroundRenderer(html_renderer, param); - } -#endif -#ifdef ENABLE_LIBJPEG - if(format == "jpg") - { - return new SplashBackgroundRenderer(html_renderer, param); - } -#endif + if (format == "png" || format == "jpg") + return new SplashBackgroundRenderer(format, html_renderer, param); + #if ENABLE_SVG if (format == "svg") { @@ -42,4 +33,11 @@ BackgroundRenderer * BackgroundRenderer::getBackgroundRenderer(const std::string return nullptr; } +BackgroundRenderer * BackgroundRenderer::getFallbackBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param) +{ + if (param.bg_format == "svg" && param.svg_nodes_limit > 0) + return new SplashBackgroundRenderer("", html_renderer, param); + return nullptr; +} + } // namespace pdf2htmlEX diff --git a/src/BackgroundRenderer/BackgroundRenderer.h b/src/BackgroundRenderer/BackgroundRenderer.h index f6d898e..29e03b6 100644 --- a/src/BackgroundRenderer/BackgroundRenderer.h +++ b/src/BackgroundRenderer/BackgroundRenderer.h @@ -22,12 +22,16 @@ class BackgroundRenderer public: // return nullptr upon failure static BackgroundRenderer * getBackgroundRenderer(const std::string & format, HTMLRenderer * html_renderer, const Param & param); + // Return a fallback bg renderer according to param.bg_format. + // Currently only svg bg format might need a bitmap fallback. + static BackgroundRenderer * getFallbackBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param); BackgroundRenderer() {} virtual ~BackgroundRenderer() {} virtual void init(PDFDoc * doc) = 0; - virtual void render_page(PDFDoc * doc, int pageno) = 0; + //return true on success, false otherwise (e.g. need a fallback) + virtual bool render_page(PDFDoc * doc, int pageno) = 0; virtual void embed_image(int pageno) = 0; }; diff --git a/src/BackgroundRenderer/CairoBackgroundRenderer.cc b/src/BackgroundRenderer/CairoBackgroundRenderer.cc index c92703b..4f30305 100644 --- a/src/BackgroundRenderer/CairoBackgroundRenderer.cc +++ b/src/BackgroundRenderer/CairoBackgroundRenderer.cc @@ -23,19 +23,11 @@ using std::string; using std::ifstream; CairoBackgroundRenderer::CairoBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param) - : CairoOutputDev() - , html_renderer(html_renderer) - , param(param) - , surface(nullptr) - , use_bitmap(false) -{ - if (param.svg_nodes_limit > 0) - { - this->bitmap_renderer = new SplashBackgroundRenderer(html_renderer, param); - } - else - this->bitmap_renderer = nullptr; -} + : CairoOutputDev() + , html_renderer(html_renderer) + , param(param) + , surface(nullptr) +{ } void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y, double dx, double dy, @@ -62,15 +54,13 @@ void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y, void CairoBackgroundRenderer::init(PDFDoc * doc) { startDoc(doc); - if (this->bitmap_renderer != nullptr) - this->bitmap_renderer->init(doc); } static GBool annot_cb(Annot *, void * pflag) { return (*((bool*)pflag)) ? gTrue : gFalse; }; -void CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) +bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) { double page_width; double page_height; @@ -85,11 +75,11 @@ void CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) page_height = doc->getPageMediaHeight(pageno); } - auto fn = html_renderer->str_fmt("%s/bg%x.svg", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno); + string fn = (char*)html_renderer->str_fmt("%s/bg%x.svg", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno); if(param.embed_image) - html_renderer->tmp_files.add((char*)fn); + html_renderer->tmp_files.add(fn); - surface = cairo_svg_surface_create((char*)fn, page_width * param.h_dpi / DEFAULT_DPI, page_height * param.v_dpi / DEFAULT_DPI); + surface = cairo_svg_surface_create(fn.c_str(), page_width * param.h_dpi / DEFAULT_DPI, page_height * param.v_dpi / DEFAULT_DPI); cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2); cairo_surface_set_fallback_resolution(surface, param.h_dpi, param.v_dpi); @@ -127,33 +117,25 @@ void CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) { int n = 0; char c; - ifstream svgfile((char*)fn); + ifstream svgfile(fn); //count of '<' in the file should be an approximation of node count. while(svgfile >> c) { if (c == '<') ++n; + if (n > param.svg_nodes_limit) + { + html_renderer->tmp_files.add(fn); + return false; + } } - svgfile.close(); - if (n > param.svg_nodes_limit) - { - html_renderer->tmp_files.add((char*)fn); - use_bitmap = true; - bitmap_renderer->render_page(doc, pageno); - } - else - use_bitmap = false; } + + return true; } void CairoBackgroundRenderer::embed_image(int pageno) { - if (use_bitmap) - { - bitmap_renderer->embed_image(pageno); - return; - } - auto & f_page = *(html_renderer->f_curpage); f_page << "displayPage(this, pageno, param.h_dpi, param.v_dpi, @@ -108,6 +107,7 @@ void SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno) (!(param.use_cropbox)), false, false, nullptr, nullptr, &annot_cb, &process_annotation); + return true; } void SplashBackgroundRenderer::embed_image(int pageno) diff --git a/src/BackgroundRenderer/SplashBackgroundRenderer.h b/src/BackgroundRenderer/SplashBackgroundRenderer.h index 55b9a97..9ec8de9 100644 --- a/src/BackgroundRenderer/SplashBackgroundRenderer.h +++ b/src/BackgroundRenderer/SplashBackgroundRenderer.h @@ -26,13 +26,13 @@ class SplashBackgroundRenderer : public BackgroundRenderer, SplashOutputDev { public: static const SplashColor white; - - SplashBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param); + //format: "png" or "jpg", or "" for a default format + SplashBackgroundRenderer(const std::string & format, HTMLRenderer * html_renderer, const Param & param); virtual ~SplashBackgroundRenderer() { } virtual void init(PDFDoc * doc); - virtual void render_page(PDFDoc * doc, int pageno); + virtual bool render_page(PDFDoc * doc, int pageno); virtual void embed_image(int pageno); // Does this device use beginType3Char/endType3Char? Otherwise, diff --git a/src/HTMLRenderer/HTMLRenderer.h b/src/HTMLRenderer/HTMLRenderer.h index 7d67f70..73929ab 100644 --- a/src/HTMLRenderer/HTMLRenderer.h +++ b/src/HTMLRenderer/HTMLRenderer.h @@ -327,7 +327,8 @@ protected: friend class CairoBackgroundRenderer; // ugly! #endif BackgroundRenderer * bg_renderer; - + BackgroundRenderer * fallback_bg_renderer; + bool fallback_bg_required; struct { std::ofstream fs; diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 9c85a97..803bc2d 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -98,12 +98,17 @@ void HTMLRenderer::process(PDFDoc *doc) // Process pages bg_renderer = nullptr; + fallback_bg_renderer = nullptr; if(param.process_nontext) { bg_renderer = BackgroundRenderer::getBackgroundRenderer(param.bg_format, this, param); if(!bg_renderer) throw "Cannot initialize background renderer, unsupported format"; bg_renderer->init(doc); + + fallback_bg_renderer = BackgroundRenderer::getFallbackBackgroundRenderer(this, param); + if (fallback_bg_renderer) + fallback_bg_renderer->init(doc); } int page_count = (param.last_page - param.first_page + 1); @@ -130,7 +135,9 @@ void HTMLRenderer::process(PDFDoc *doc) if(param.process_nontext) { - bg_renderer->render_page(doc, i); + fallback_bg_required = !bg_renderer->render_page(doc, i); + if (fallback_bg_required && fallback_bg_renderer != nullptr) + fallback_bg_renderer->render_page(doc, i); } doc->displayPage(this, i, @@ -163,6 +170,11 @@ void HTMLRenderer::process(PDFDoc *doc) delete bg_renderer; bg_renderer = nullptr; } + if(fallback_bg_renderer) + { + delete fallback_bg_renderer; + fallback_bg_renderer = nullptr; + } cerr << endl; } @@ -219,7 +231,10 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref) if(param.process_nontext) { - bg_renderer->embed_image(pageNum); + if (!fallback_bg_required) + bg_renderer->embed_image(pageNum); + else if (fallback_bg_renderer != nullptr) + fallback_bg_renderer->embed_image(pageNum); } reset_state(); From 0586c88d26444fc88ece3295f9354778e3573e2a Mon Sep 17 00:00:00 2001 From: Duan Yao Date: Sun, 8 Jun 2014 20:53:26 +0800 Subject: [PATCH 06/10] Implement --svg-embed-bitmap: saving bitmaps in svg as external files. --- 3rdparty/poppler/git/CairoOutputDev.h | 2 +- .../CairoBackgroundRenderer.cc | 82 ++++++++++++++++++- .../CairoBackgroundRenderer.h | 17 +++- src/Param.h | 1 + src/pdf2htmlEX.cc | 3 + 5 files changed, 100 insertions(+), 5 deletions(-) diff --git a/3rdparty/poppler/git/CairoOutputDev.h b/3rdparty/poppler/git/CairoOutputDev.h index c9ae33d..727c687 100644 --- a/3rdparty/poppler/git/CairoOutputDev.h +++ b/3rdparty/poppler/git/CairoOutputDev.h @@ -272,7 +272,7 @@ protected: cairo_filter_t getFilterForSurface(cairo_surface_t *image, GBool interpolate); GBool getStreamData (Stream *str, char **buffer, int *length); - void setMimeData(Stream *str, Object *ref, cairo_surface_t *image); + virtual void setMimeData(Stream *str, Object *ref, cairo_surface_t *image); void fillToStrokePathClip(GfxState *state); void alignStrokeCoords(GfxSubpath *subpath, int i, double *x, double *y); diff --git a/src/BackgroundRenderer/CairoBackgroundRenderer.cc b/src/BackgroundRenderer/CairoBackgroundRenderer.cc index 4f30305..795336a 100644 --- a/src/BackgroundRenderer/CairoBackgroundRenderer.cc +++ b/src/BackgroundRenderer/CairoBackgroundRenderer.cc @@ -19,8 +19,7 @@ namespace pdf2htmlEX { -using std::string; -using std::ifstream; +using namespace std; CairoBackgroundRenderer::CairoBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param) : CairoOutputDev() @@ -29,6 +28,15 @@ CairoBackgroundRenderer::CairoBackgroundRenderer(HTMLRenderer * html_renderer, c , surface(nullptr) { } +CairoBackgroundRenderer::~CairoBackgroundRenderer() +{ + for(auto i = bitmaps_ref_count.begin(); i != bitmaps_ref_count.end(); ++i) + { + if (i->second == 0) + html_renderer->tmp_files.add(this->get_bitmap_path(i->first)); + } +} + void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, @@ -86,6 +94,8 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) cairo_t * cr = cairo_create(surface); setCairo(cr); + bitmaps_in_current_page.resize(0); + bool process_annotation = param.process_annotation; doc->displayPage(this, pageno, param.h_dpi, param.v_dpi, 0, @@ -131,6 +141,10 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) } } + // the svg file is actually used, so add its bitmaps' ref count. + for (auto i = bitmaps_in_current_page.begin(); i != bitmaps_in_current_page.end(); i++) + ++bitmaps_ref_count[*i]; + return true; } @@ -138,7 +152,19 @@ void CairoBackgroundRenderer::embed_image(int pageno) { auto & f_page = *(html_renderer->f_curpage); - f_page << " or background-image can't have external resources; + // SVGs introduced by and can, but they are more expensive for browsers. + // So we use if the SVG contains no external bitmaps, and use otherwise. + // See also: + // https://developer.mozilla.org/en-US/docs/Web/SVG/SVG_as_an_Image + // http://stackoverflow.com/questions/4476526/do-i-use-img-object-or-embed-for-svg-files + + if (param.svg_embed_bitmap || bitmaps_in_current_page.empty()) + f_page << ""; } +// use object number as bitmap file name, without pageno prefix, +// because a bitmap may be shared by multiple pages. +const char* CairoBackgroundRenderer::get_bitmap_path(int id) +{ + return html_renderer->str_fmt("%s/%d.jpg", param.dest_dir.c_str(), id); +} +// Override CairoOutputDev::setMimeData() and dump bitmaps in SVG to external files. +void CairoBackgroundRenderer::setMimeData(Stream *str, Object *ref, cairo_surface_t *image) +{ + if (param.svg_embed_bitmap) + { + CairoOutputDev::setMimeData(str, ref, image); + return; + } + + // TODO dump bitmaps in other formats. + if (str->getKind() != strDCT) + return; + + // TODO inline image? + if (ref == nullptr || !ref->isRef()) + return; + + int imgId = ref->getRef().num; + auto uri = strdup((char*) html_renderer->str_fmt("%d.jpg", imgId)); + auto st = cairo_surface_set_mime_data(image, CAIRO_MIME_TYPE_URI, + (unsigned char*) uri, strlen(uri), gfree, uri); + if (st) + { + gfree(uri); + return; + } + bitmaps_in_current_page.push_back(imgId); + + if(bitmaps_ref_count.find(imgId) != bitmaps_ref_count.end()) + return; + + bitmaps_ref_count[imgId] = 0; + + char *strBuffer; + int len; + if (getStreamData(str->getNextStream(), &strBuffer, &len)) + { + string path = get_bitmap_path(imgId); + ofstream imgfile(path, ofstream::binary); + imgfile.write(strBuffer, len); + gfree(strBuffer); + } +} + } // namespace pdf2htmlEX #endif // ENABLE_SVG diff --git a/src/BackgroundRenderer/CairoBackgroundRenderer.h b/src/BackgroundRenderer/CairoBackgroundRenderer.h index b24b05b..25d366a 100644 --- a/src/BackgroundRenderer/CairoBackgroundRenderer.h +++ b/src/BackgroundRenderer/CairoBackgroundRenderer.h @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include "pdf2htmlEX-config.h" @@ -26,7 +28,7 @@ class CairoBackgroundRenderer : public BackgroundRenderer, CairoOutputDev public: CairoBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param); - virtual ~CairoBackgroundRenderer() { } + virtual ~CairoBackgroundRenderer(); virtual void init(PDFDoc * doc); virtual bool render_page(PDFDoc * doc, int pageno); @@ -41,10 +43,23 @@ public: double originX, double originY, CharCode code, int nBytes, Unicode *u, int uLen); +protected: + virtual void setMimeData(Stream *str, Object *ref, cairo_surface_t *image); + protected: HTMLRenderer * html_renderer; const Param & param; cairo_surface_t * surface; + +private: + // convert bitmap stream id to bitmap file name. No pageno prefix, + // because a bitmap may be shared by multiple pages. + const char* get_bitmap_path(int id); + // map + // note: if a svg bg fallbacks to bitmap bg, its bitmaps are not taken into account. + std::map bitmaps_ref_count; + // id of bitmaps' stream used by current page + std::vector bitmaps_in_current_page; }; } diff --git a/src/Param.h b/src/Param.h index 8d19361..3f20eca 100644 --- a/src/Param.h +++ b/src/Param.h @@ -64,6 +64,7 @@ struct Param // background image std::string bg_format; int svg_nodes_limit; + int svg_embed_bitmap; // encryption std::string owner_password, user_password; diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 0df3d30..ccb426d 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -192,6 +192,9 @@ void parse_options (int argc, char **argv) .add("bg-format", ¶m.bg_format, "png", "specify background image format") .add("svg-nodes-limit", ¶m.svg_nodes_limit, 0, "if node count in a svg background image exceeds this limit," " fall back to bitmap background. 0 or negative means no limit.") + .add("svg-embed-bitmap", ¶m.svg_embed_bitmap, 1, "embed bitmaps in svg files or save them as external files" + " (even if 0 is specified, some bitmaps may still be embedded)") + // encryption .add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", true) .add("user-password,u", ¶m.user_password, "", "user password (for encrypted files)", true) From edb0ddacea9a05ea17508260edde0be5b5650bc7 Mon Sep 17 00:00:00 2001 From: Duan Yao Date: Wed, 11 Jun 2014 21:59:50 +0800 Subject: [PATCH 07/10] Some coding style improvements --- src/BackgroundRenderer/BackgroundRenderer.cc | 13 ++++++++++-- .../CairoBackgroundRenderer.cc | 20 ++++++++++--------- .../CairoBackgroundRenderer.h | 4 ++-- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/src/BackgroundRenderer/BackgroundRenderer.cc b/src/BackgroundRenderer/BackgroundRenderer.cc index 164e766..5c37fde 100644 --- a/src/BackgroundRenderer/BackgroundRenderer.cc +++ b/src/BackgroundRenderer/BackgroundRenderer.cc @@ -20,9 +20,18 @@ namespace pdf2htmlEX { BackgroundRenderer * BackgroundRenderer::getBackgroundRenderer(const std::string & format, HTMLRenderer * html_renderer, const Param & param) { - if (format == "png" || format == "jpg") +#ifdef ENABLE_LIBPNG + if(format == "png") + { return new SplashBackgroundRenderer(format, html_renderer, param); - + } +#endif +#ifdef ENABLE_LIBJPEG + if(format == "jpg") + { + return new SplashBackgroundRenderer(format, html_renderer, param); + } +#endif #if ENABLE_SVG if (format == "svg") { diff --git a/src/BackgroundRenderer/CairoBackgroundRenderer.cc b/src/BackgroundRenderer/CairoBackgroundRenderer.cc index 795336a..c418933 100644 --- a/src/BackgroundRenderer/CairoBackgroundRenderer.cc +++ b/src/BackgroundRenderer/CairoBackgroundRenderer.cc @@ -19,7 +19,11 @@ namespace pdf2htmlEX { -using namespace std; +using std::string; +using std::ifstream; +using std::ofstream; +using std::vector; +using std::unordered_map; CairoBackgroundRenderer::CairoBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param) : CairoOutputDev() @@ -30,10 +34,10 @@ CairoBackgroundRenderer::CairoBackgroundRenderer(HTMLRenderer * html_renderer, c CairoBackgroundRenderer::~CairoBackgroundRenderer() { - for(auto i = bitmaps_ref_count.begin(); i != bitmaps_ref_count.end(); ++i) + for(auto itr = bitmaps_ref_count.begin(); itr != bitmaps_ref_count.end(); ++itr) { - if (i->second == 0) - html_renderer->tmp_files.add(this->get_bitmap_path(i->first)); + if (itr->second == 0) + html_renderer->tmp_files.add(this->get_bitmap_path(itr->first)); } } @@ -182,8 +186,6 @@ void CairoBackgroundRenderer::embed_image(int pageno) f_page << "\"/>"; } -// use object number as bitmap file name, without pageno prefix, -// because a bitmap may be shared by multiple pages. const char* CairoBackgroundRenderer::get_bitmap_path(int id) { return html_renderer->str_fmt("%s/%d.jpg", param.dest_dir.c_str(), id); @@ -208,10 +210,10 @@ void CairoBackgroundRenderer::setMimeData(Stream *str, Object *ref, cairo_surfac int imgId = ref->getRef().num; auto uri = strdup((char*) html_renderer->str_fmt("%d.jpg", imgId)); auto st = cairo_surface_set_mime_data(image, CAIRO_MIME_TYPE_URI, - (unsigned char*) uri, strlen(uri), gfree, uri); + (unsigned char*) uri, strlen(uri), free, uri); if (st) { - gfree(uri); + free(uri); return; } bitmaps_in_current_page.push_back(imgId); @@ -228,7 +230,7 @@ void CairoBackgroundRenderer::setMimeData(Stream *str, Object *ref, cairo_surfac string path = get_bitmap_path(imgId); ofstream imgfile(path, ofstream::binary); imgfile.write(strBuffer, len); - gfree(strBuffer); + free(strBuffer); } } diff --git a/src/BackgroundRenderer/CairoBackgroundRenderer.h b/src/BackgroundRenderer/CairoBackgroundRenderer.h index 25d366a..fc587a3 100644 --- a/src/BackgroundRenderer/CairoBackgroundRenderer.h +++ b/src/BackgroundRenderer/CairoBackgroundRenderer.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include "pdf2htmlEX-config.h" @@ -57,7 +57,7 @@ private: const char* get_bitmap_path(int id); // map // note: if a svg bg fallbacks to bitmap bg, its bitmaps are not taken into account. - std::map bitmaps_ref_count; + std::unordered_map bitmaps_ref_count; // id of bitmaps' stream used by current page std::vector bitmaps_in_current_page; }; From 5681fe11c0367e959485ab2d06f864fd62efab0d Mon Sep 17 00:00:00 2001 From: Duan Yao Date: Wed, 11 Jun 2014 22:19:17 +0800 Subject: [PATCH 08/10] Fix bad usage of html_renderer->str_fmt() in get_bitmap_path(); add prefix to bitmap file name. --- .../CairoBackgroundRenderer.cc | 16 ++++++++++------ src/BackgroundRenderer/CairoBackgroundRenderer.h | 3 ++- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/BackgroundRenderer/CairoBackgroundRenderer.cc b/src/BackgroundRenderer/CairoBackgroundRenderer.cc index c418933..56c8596 100644 --- a/src/BackgroundRenderer/CairoBackgroundRenderer.cc +++ b/src/BackgroundRenderer/CairoBackgroundRenderer.cc @@ -37,7 +37,10 @@ CairoBackgroundRenderer::~CairoBackgroundRenderer() for(auto itr = bitmaps_ref_count.begin(); itr != bitmaps_ref_count.end(); ++itr) { if (itr->second == 0) - html_renderer->tmp_files.add(this->get_bitmap_path(itr->first)); + { + string path; + html_renderer->tmp_files.add(this->build_bitmap_path(itr->first, path)); + } } } @@ -186,9 +189,10 @@ void CairoBackgroundRenderer::embed_image(int pageno) f_page << "\"/>"; } -const char* CairoBackgroundRenderer::get_bitmap_path(int id) +string & CairoBackgroundRenderer::build_bitmap_path(int id, string & path) { - return html_renderer->str_fmt("%s/%d.jpg", param.dest_dir.c_str(), id); + // "po" for "PDF Object" + return path = html_renderer->str_fmt("%s/po-%d.jpg", param.dest_dir.c_str(), id); } // Override CairoOutputDev::setMimeData() and dump bitmaps in SVG to external files. void CairoBackgroundRenderer::setMimeData(Stream *str, Object *ref, cairo_surface_t *image) @@ -208,7 +212,7 @@ void CairoBackgroundRenderer::setMimeData(Stream *str, Object *ref, cairo_surfac return; int imgId = ref->getRef().num; - auto uri = strdup((char*) html_renderer->str_fmt("%d.jpg", imgId)); + auto uri = strdup((char*) html_renderer->str_fmt("po-%d.jpg", imgId)); auto st = cairo_surface_set_mime_data(image, CAIRO_MIME_TYPE_URI, (unsigned char*) uri, strlen(uri), free, uri); if (st) @@ -227,8 +231,8 @@ void CairoBackgroundRenderer::setMimeData(Stream *str, Object *ref, cairo_surfac int len; if (getStreamData(str->getNextStream(), &strBuffer, &len)) { - string path = get_bitmap_path(imgId); - ofstream imgfile(path, ofstream::binary); + string path; + ofstream imgfile(build_bitmap_path(imgId, path), ofstream::binary); imgfile.write(strBuffer, len); free(strBuffer); } diff --git a/src/BackgroundRenderer/CairoBackgroundRenderer.h b/src/BackgroundRenderer/CairoBackgroundRenderer.h index fc587a3..65e4515 100644 --- a/src/BackgroundRenderer/CairoBackgroundRenderer.h +++ b/src/BackgroundRenderer/CairoBackgroundRenderer.h @@ -14,6 +14,7 @@ #include #include #include +#include #include "pdf2htmlEX-config.h" @@ -54,7 +55,7 @@ protected: private: // convert bitmap stream id to bitmap file name. No pageno prefix, // because a bitmap may be shared by multiple pages. - const char* get_bitmap_path(int id); + std::string & build_bitmap_path(int id, std::string & path); // map // note: if a svg bg fallbacks to bitmap bg, its bitmaps are not taken into account. std::unordered_map bitmaps_ref_count; From fdbc70401f3d6ba4c21088256badae5ad4bc9b65 Mon Sep 17 00:00:00 2001 From: Duan Yao Date: Wed, 11 Jun 2014 23:20:09 +0800 Subject: [PATCH 09/10] Improve help texts for --svg-node-count-limit and --svg-embed-bitmap; rename --svg-nodes-limit to --svg-node-count-limit. --- pdf2htmlEX.1.in | 10 ++++++++++ src/BackgroundRenderer/BackgroundRenderer.cc | 2 +- src/BackgroundRenderer/CairoBackgroundRenderer.cc | 8 ++++---- src/Param.h | 2 +- src/pdf2htmlEX.cc | 7 +++---- 5 files changed, 19 insertions(+), 10 deletions(-) diff --git a/pdf2htmlEX.1.in b/pdf2htmlEX.1.in index 8b01c34..b63b1d9 100644 --- a/pdf2htmlEX.1.in +++ b/pdf2htmlEX.1.in @@ -248,6 +248,16 @@ If set to 1, pdf2htmlEX will try to reduce the number of HTML elements used for .B --bg-format (Default: png) Specify the background image format. Run `pdf2htmlEX -v` to check all supported formats. +.TP +.B --svg-node-count-limit (Default: -1) +If node count in a svg background image exceeds this limit, fall back this page to bitmap background; negative value means no limit. +This option is only useful when '--bg-format svg' is specified. Note that node count in svg is just calculated approximately. + +.TP +.B --svg-embed-bitmap <0|1> (Default: 1) +Whether embed bitmaps in svg background image. 1: embed bitmaps in svg background; 0: dump bitmaps to external files if possible. +JPEG images in a PDF are most possibly dumped. This option is only useful when '--bg-format svg' is specified. + .SS PDF Protection .TP diff --git a/src/BackgroundRenderer/BackgroundRenderer.cc b/src/BackgroundRenderer/BackgroundRenderer.cc index 5c37fde..1ae298c 100644 --- a/src/BackgroundRenderer/BackgroundRenderer.cc +++ b/src/BackgroundRenderer/BackgroundRenderer.cc @@ -44,7 +44,7 @@ BackgroundRenderer * BackgroundRenderer::getBackgroundRenderer(const std::string BackgroundRenderer * BackgroundRenderer::getFallbackBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param) { - if (param.bg_format == "svg" && param.svg_nodes_limit > 0) + if (param.bg_format == "svg" && param.svg_node_count_limit >= 0) return new SplashBackgroundRenderer("", html_renderer, param); return nullptr; } diff --git a/src/BackgroundRenderer/CairoBackgroundRenderer.cc b/src/BackgroundRenderer/CairoBackgroundRenderer.cc index 56c8596..4279d3a 100644 --- a/src/BackgroundRenderer/CairoBackgroundRenderer.cc +++ b/src/BackgroundRenderer/CairoBackgroundRenderer.cc @@ -130,7 +130,7 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) } //check node count in the svg file, fall back to bitmap_renderer if necessary. - if (param.svg_nodes_limit > 0) + if (param.svg_node_count_limit >= 0) { int n = 0; char c; @@ -140,7 +140,7 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) { if (c == '<') ++n; - if (n > param.svg_nodes_limit) + if (n > param.svg_node_count_limit) { html_renderer->tmp_files.add(fn); return false; @@ -149,8 +149,8 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) } // the svg file is actually used, so add its bitmaps' ref count. - for (auto i = bitmaps_in_current_page.begin(); i != bitmaps_in_current_page.end(); i++) - ++bitmaps_ref_count[*i]; + for (auto itr = bitmaps_in_current_page.begin(); itr != bitmaps_in_current_page.end(); itr++) + ++bitmaps_ref_count[*itr]; return true; } diff --git a/src/Param.h b/src/Param.h index 3f20eca..8c16802 100644 --- a/src/Param.h +++ b/src/Param.h @@ -63,7 +63,7 @@ struct Param // background image std::string bg_format; - int svg_nodes_limit; + int svg_node_count_limit; int svg_embed_bitmap; // encryption diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index ccb426d..f20dc2b 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -190,10 +190,9 @@ void parse_options (int argc, char **argv) // background image .add("bg-format", ¶m.bg_format, "png", "specify background image format") - .add("svg-nodes-limit", ¶m.svg_nodes_limit, 0, "if node count in a svg background image exceeds this limit," - " fall back to bitmap background. 0 or negative means no limit.") - .add("svg-embed-bitmap", ¶m.svg_embed_bitmap, 1, "embed bitmaps in svg files or save them as external files" - " (even if 0 is specified, some bitmaps may still be embedded)") + .add("svg-node-count-limit", ¶m.svg_node_count_limit, -1, "if node count in a svg background image exceeds this limit," + " fall back this page to bitmap background; negative value means no limit.") + .add("svg-embed-bitmap", ¶m.svg_embed_bitmap, 1, "1: embed bitmaps in svg background; 0: dump bitmaps to external files if possible.") // encryption .add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", true) From ac8208a6938b83dc259b026f72404dfbddf7a269 Mon Sep 17 00:00:00 2001 From: Duan Yao Date: Tue, 17 Jun 2014 11:59:45 +0800 Subject: [PATCH 10/10] Code cleaning. --- .../CairoBackgroundRenderer.cc | 16 +++++++--------- src/BackgroundRenderer/CairoBackgroundRenderer.h | 2 +- .../SplashBackgroundRenderer.cc | 11 ++++++++++- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/BackgroundRenderer/CairoBackgroundRenderer.cc b/src/BackgroundRenderer/CairoBackgroundRenderer.cc index 4279d3a..63a6a81 100644 --- a/src/BackgroundRenderer/CairoBackgroundRenderer.cc +++ b/src/BackgroundRenderer/CairoBackgroundRenderer.cc @@ -38,8 +38,7 @@ CairoBackgroundRenderer::~CairoBackgroundRenderer() { if (itr->second == 0) { - string path; - html_renderer->tmp_files.add(this->build_bitmap_path(itr->first, path)); + html_renderer->tmp_files.add(this->build_bitmap_path(itr->first)); } } } @@ -101,7 +100,7 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) cairo_t * cr = cairo_create(surface); setCairo(cr); - bitmaps_in_current_page.resize(0); + bitmaps_in_current_page.clear(); bool process_annotation = param.process_annotation; doc->displayPage(this, pageno, param.h_dpi, param.v_dpi, @@ -189,10 +188,10 @@ void CairoBackgroundRenderer::embed_image(int pageno) f_page << "\"/>"; } -string & CairoBackgroundRenderer::build_bitmap_path(int id, string & path) +string CairoBackgroundRenderer::build_bitmap_path(int id) { - // "po" for "PDF Object" - return path = html_renderer->str_fmt("%s/po-%d.jpg", param.dest_dir.c_str(), id); + // "o" for "PDF Object" + return string(html_renderer->str_fmt("%s/o%d.jpg", param.dest_dir.c_str(), id)); } // Override CairoOutputDev::setMimeData() and dump bitmaps in SVG to external files. void CairoBackgroundRenderer::setMimeData(Stream *str, Object *ref, cairo_surface_t *image) @@ -212,7 +211,7 @@ void CairoBackgroundRenderer::setMimeData(Stream *str, Object *ref, cairo_surfac return; int imgId = ref->getRef().num; - auto uri = strdup((char*) html_renderer->str_fmt("po-%d.jpg", imgId)); + auto uri = strdup((char*) html_renderer->str_fmt("o%d.jpg", imgId)); auto st = cairo_surface_set_mime_data(image, CAIRO_MIME_TYPE_URI, (unsigned char*) uri, strlen(uri), free, uri); if (st) @@ -231,8 +230,7 @@ void CairoBackgroundRenderer::setMimeData(Stream *str, Object *ref, cairo_surfac int len; if (getStreamData(str->getNextStream(), &strBuffer, &len)) { - string path; - ofstream imgfile(build_bitmap_path(imgId, path), ofstream::binary); + ofstream imgfile(build_bitmap_path(imgId), ofstream::binary); imgfile.write(strBuffer, len); free(strBuffer); } diff --git a/src/BackgroundRenderer/CairoBackgroundRenderer.h b/src/BackgroundRenderer/CairoBackgroundRenderer.h index 65e4515..8abe5f3 100644 --- a/src/BackgroundRenderer/CairoBackgroundRenderer.h +++ b/src/BackgroundRenderer/CairoBackgroundRenderer.h @@ -55,7 +55,7 @@ protected: private: // convert bitmap stream id to bitmap file name. No pageno prefix, // because a bitmap may be shared by multiple pages. - std::string & build_bitmap_path(int id, std::string & path); + std::string build_bitmap_path(int id); // map // note: if a svg bg fallbacks to bitmap bg, its bitmaps are not taken into account. std::unordered_map bitmaps_ref_count; diff --git a/src/BackgroundRenderer/SplashBackgroundRenderer.cc b/src/BackgroundRenderer/SplashBackgroundRenderer.cc index 970d59e..c596508 100644 --- a/src/BackgroundRenderer/SplashBackgroundRenderer.cc +++ b/src/BackgroundRenderer/SplashBackgroundRenderer.cc @@ -177,14 +177,23 @@ void SplashBackgroundRenderer::dump_image(const char * filename, int x1, int y1, // use unique_ptr to auto delete the object upon exception unique_ptr writer; - if(format == "png") + if(false) { } +#ifdef ENABLE_LIBPNG + else if(format == "png") { writer = unique_ptr(new PNGWriter); } +#endif +#ifdef ENABLE_LIBJPEG else if(format == "jpg") { writer = unique_ptr(new JpegWriter); } +#endif + else + { + throw string("Image format not supported: ") + format; + } if(!writer->init(f, width, height, param.h_dpi, param.v_dpi)) throw "Cannot initialize PNGWriter";