diff --git a/3rdparty/poppler/git/CairoOutputDev.h b/3rdparty/poppler/git/CairoOutputDev.h index c9ae33d..727c687 100644 --- a/3rdparty/poppler/git/CairoOutputDev.h +++ b/3rdparty/poppler/git/CairoOutputDev.h @@ -272,7 +272,7 @@ protected: cairo_filter_t getFilterForSurface(cairo_surface_t *image, GBool interpolate); GBool getStreamData (Stream *str, char **buffer, int *length); - void setMimeData(Stream *str, Object *ref, cairo_surface_t *image); + virtual void setMimeData(Stream *str, Object *ref, cairo_surface_t *image); void fillToStrokePathClip(GfxState *state); void alignStrokeCoords(GfxSubpath *subpath, int i, double *x, double *y); diff --git a/src/BackgroundRenderer/CairoBackgroundRenderer.cc b/src/BackgroundRenderer/CairoBackgroundRenderer.cc index 4f30305..795336a 100644 --- a/src/BackgroundRenderer/CairoBackgroundRenderer.cc +++ b/src/BackgroundRenderer/CairoBackgroundRenderer.cc @@ -19,8 +19,7 @@ namespace pdf2htmlEX { -using std::string; -using std::ifstream; +using namespace std; CairoBackgroundRenderer::CairoBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param) : CairoOutputDev() @@ -29,6 +28,15 @@ CairoBackgroundRenderer::CairoBackgroundRenderer(HTMLRenderer * html_renderer, c , surface(nullptr) { } +CairoBackgroundRenderer::~CairoBackgroundRenderer() +{ + for(auto i = bitmaps_ref_count.begin(); i != bitmaps_ref_count.end(); ++i) + { + if (i->second == 0) + html_renderer->tmp_files.add(this->get_bitmap_path(i->first)); + } +} + void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, @@ -86,6 +94,8 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) cairo_t * cr = cairo_create(surface); setCairo(cr); + bitmaps_in_current_page.resize(0); + bool process_annotation = param.process_annotation; doc->displayPage(this, pageno, param.h_dpi, param.v_dpi, 0, @@ -131,6 +141,10 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) } } + // the svg file is actually used, so add its bitmaps' ref count. + for (auto i = bitmaps_in_current_page.begin(); i != bitmaps_in_current_page.end(); i++) + ++bitmaps_ref_count[*i]; + return true; } @@ -138,7 +152,19 @@ void CairoBackgroundRenderer::embed_image(int pageno) { auto & f_page = *(html_renderer->f_curpage); - f_page << " or background-image can't have external resources; + // SVGs introduced by and can, but they are more expensive for browsers. + // So we use if the SVG contains no external bitmaps, and use otherwise. + // See also: + // https://developer.mozilla.org/en-US/docs/Web/SVG/SVG_as_an_Image + // http://stackoverflow.com/questions/4476526/do-i-use-img-object-or-embed-for-svg-files + + if (param.svg_embed_bitmap || bitmaps_in_current_page.empty()) + f_page << ""; } +// use object number as bitmap file name, without pageno prefix, +// because a bitmap may be shared by multiple pages. +const char* CairoBackgroundRenderer::get_bitmap_path(int id) +{ + return html_renderer->str_fmt("%s/%d.jpg", param.dest_dir.c_str(), id); +} +// Override CairoOutputDev::setMimeData() and dump bitmaps in SVG to external files. +void CairoBackgroundRenderer::setMimeData(Stream *str, Object *ref, cairo_surface_t *image) +{ + if (param.svg_embed_bitmap) + { + CairoOutputDev::setMimeData(str, ref, image); + return; + } + + // TODO dump bitmaps in other formats. + if (str->getKind() != strDCT) + return; + + // TODO inline image? + if (ref == nullptr || !ref->isRef()) + return; + + int imgId = ref->getRef().num; + auto uri = strdup((char*) html_renderer->str_fmt("%d.jpg", imgId)); + auto st = cairo_surface_set_mime_data(image, CAIRO_MIME_TYPE_URI, + (unsigned char*) uri, strlen(uri), gfree, uri); + if (st) + { + gfree(uri); + return; + } + bitmaps_in_current_page.push_back(imgId); + + if(bitmaps_ref_count.find(imgId) != bitmaps_ref_count.end()) + return; + + bitmaps_ref_count[imgId] = 0; + + char *strBuffer; + int len; + if (getStreamData(str->getNextStream(), &strBuffer, &len)) + { + string path = get_bitmap_path(imgId); + ofstream imgfile(path, ofstream::binary); + imgfile.write(strBuffer, len); + gfree(strBuffer); + } +} + } // namespace pdf2htmlEX #endif // ENABLE_SVG diff --git a/src/BackgroundRenderer/CairoBackgroundRenderer.h b/src/BackgroundRenderer/CairoBackgroundRenderer.h index b24b05b..25d366a 100644 --- a/src/BackgroundRenderer/CairoBackgroundRenderer.h +++ b/src/BackgroundRenderer/CairoBackgroundRenderer.h @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include "pdf2htmlEX-config.h" @@ -26,7 +28,7 @@ class CairoBackgroundRenderer : public BackgroundRenderer, CairoOutputDev public: CairoBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param); - virtual ~CairoBackgroundRenderer() { } + virtual ~CairoBackgroundRenderer(); virtual void init(PDFDoc * doc); virtual bool render_page(PDFDoc * doc, int pageno); @@ -41,10 +43,23 @@ public: double originX, double originY, CharCode code, int nBytes, Unicode *u, int uLen); +protected: + virtual void setMimeData(Stream *str, Object *ref, cairo_surface_t *image); + protected: HTMLRenderer * html_renderer; const Param & param; cairo_surface_t * surface; + +private: + // convert bitmap stream id to bitmap file name. No pageno prefix, + // because a bitmap may be shared by multiple pages. + const char* get_bitmap_path(int id); + // map + // note: if a svg bg fallbacks to bitmap bg, its bitmaps are not taken into account. + std::map bitmaps_ref_count; + // id of bitmaps' stream used by current page + std::vector bitmaps_in_current_page; }; } diff --git a/src/Param.h b/src/Param.h index 8d19361..3f20eca 100644 --- a/src/Param.h +++ b/src/Param.h @@ -64,6 +64,7 @@ struct Param // background image std::string bg_format; int svg_nodes_limit; + int svg_embed_bitmap; // encryption std::string owner_password, user_password; diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 0df3d30..ccb426d 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -192,6 +192,9 @@ void parse_options (int argc, char **argv) .add("bg-format", ¶m.bg_format, "png", "specify background image format") .add("svg-nodes-limit", ¶m.svg_nodes_limit, 0, "if node count in a svg background image exceeds this limit," " fall back to bitmap background. 0 or negative means no limit.") + .add("svg-embed-bitmap", ¶m.svg_embed_bitmap, 1, "embed bitmaps in svg files or save them as external files" + " (even if 0 is specified, some bitmaps may still be embedded)") + // encryption .add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", true) .add("user-password,u", ¶m.user_password, "", "user password (for encrypted files)", true)