/* * CairoBackgroundRenderer.cc * * Copyright (C) 2012,2013 Lu Wang */ #include #include #include "pdf2htmlEX-config.h" #include "Base64Stream.h" #if ENABLE_SVG #include "CairoBackgroundRenderer.h" #include "SplashBackgroundRenderer.h" namespace pdf2htmlEX { using std::string; using std::ifstream; using std::ofstream; using std::vector; using std::unordered_map; CairoBackgroundRenderer::CairoBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param) : CairoOutputDev() , html_renderer(html_renderer) , param(param) , surface(nullptr) { } CairoBackgroundRenderer::~CairoBackgroundRenderer() { for(auto const& p : bitmaps_ref_count) { if (p.second == 0) { html_renderer->tmp_files.add(this->build_bitmap_path(p.first)); } } } void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, int nBytes, Unicode *u, int uLen) { // draw characters as image when // - in fallback mode // - OR there is special filling method // - OR using a writing mode font // - OR using a Type 3 font while param.process_type3 is not enabled // - OR the text is used as path if((param.fallback || param.proof) || ( (state->getFont()) && ( (state->getFont()->getWMode()) || ((state->getFont()->getType() == fontType3) && (!param.process_type3)) || (state->getRender() >= 4) ) ) ) { CairoOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code,nBytes,u,uLen); } // If a char is treated as image, it is not subject to cover test // (see HTMLRenderer::drawString), so don't increase drawn_char_count. else if (param.correct_text_visibility) { if (html_renderer->is_char_covered(drawn_char_count)) CairoOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code,nBytes,u,uLen); drawn_char_count++; } } void CairoBackgroundRenderer::beginTextObject(GfxState *state) { if (param.proof == 2) proof_begin_text_object(state, this); CairoOutputDev::beginTextObject(state); } void CairoBackgroundRenderer::beginString(GfxState *state, GooString * str) { if (param.proof == 2) proof_begin_string(state, this); CairoOutputDev::beginString(state, str); } void CairoBackgroundRenderer::endTextObject(GfxState *state) { if (param.proof == 2) proof_end_text_object(state, this); CairoOutputDev::endTextObject(state); } void CairoBackgroundRenderer::updateRender(GfxState *state) { if (param.proof == 2) proof_update_render(state, this); CairoOutputDev::updateRender(state); } void CairoBackgroundRenderer::init(PDFDoc * doc) { startDoc(doc); } static GBool annot_cb(Annot *, void * pflag) { return (*((bool*)pflag)) ? gTrue : gFalse; }; bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) { drawn_char_count = 0; double page_width; double page_height; if(param.use_cropbox) { page_width = doc->getPageCropWidth(pageno); page_height = doc->getPageCropHeight(pageno); } else { page_width = doc->getPageMediaWidth(pageno); page_height = doc->getPageMediaHeight(pageno); } string fn = (char*)html_renderer->str_fmt("%s/bg%x.svg", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno); if(param.embed_image) html_renderer->tmp_files.add(fn); surface = cairo_svg_surface_create(fn.c_str(), page_width * param.h_dpi / DEFAULT_DPI, page_height * param.v_dpi / DEFAULT_DPI); cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2); cairo_surface_set_fallback_resolution(surface, param.h_dpi, param.v_dpi); cairo_t * cr = cairo_create(surface); setCairo(cr); bitmaps_in_current_page.clear(); bool process_annotation = param.process_annotation; doc->displayPage(this, pageno, param.h_dpi, param.v_dpi, 0, (!(param.use_cropbox)), false, false, nullptr, nullptr, &annot_cb, &process_annotation); setCairo(nullptr); { auto status = cairo_status(cr); cairo_destroy(cr); if(status) throw string("Cairo error: ") + cairo_status_to_string(status); } cairo_surface_finish(surface); { auto status = cairo_surface_status(surface); cairo_surface_destroy(surface); surface = nullptr; if(status) throw string("Error in cairo: ") + cairo_status_to_string(status); } //check node count in the svg file, fall back to bitmap_renderer if necessary. if (param.svg_node_count_limit >= 0) { int n = 0; char c; ifstream svgfile(fn); //count of '<' in the file should be an approximation of node count. while(svgfile >> c) { if (c == '<') ++n; if (n > param.svg_node_count_limit) { html_renderer->tmp_files.add(fn); return false; } } } // the svg file is actually used, so add its bitmaps' ref count. for (auto id : bitmaps_in_current_page) ++bitmaps_ref_count[id]; return true; } void CairoBackgroundRenderer::embed_image(int pageno) { auto & f_page = *(html_renderer->f_curpage); // SVGs introduced by or background-image can't have external resources; // SVGs introduced by and can, but they are more expensive for browsers. // So we use if the SVG contains no external bitmaps, and use otherwise. // See also: // https://developer.mozilla.org/en-US/docs/Web/SVG/SVG_as_an_Image // http://stackoverflow.com/questions/4476526/do-i-use-img-object-or-embed-for-svg-files if (param.svg_embed_bitmap || bitmaps_in_current_page.empty()) f_page << "str_fmt("%s/bg%x.svg", param.tmp_dir.c_str(), pageno); ifstream fin((char*)path, ifstream::binary); if(!fin) throw string("Cannot read background image ") + (char*)path; f_page << "data:image/svg+xml;base64," << Base64Stream(fin); } else { f_page << (char*)html_renderer->str_fmt("bg%x.svg", pageno); } f_page << "\"/>"; } string CairoBackgroundRenderer::build_bitmap_path(int id) { // "o" for "PDF Object" return string(html_renderer->str_fmt("%s/o%d.jpg", param.dest_dir.c_str(), id)); } // Override CairoOutputDev::setMimeData() and dump bitmaps in SVG to external files. void CairoBackgroundRenderer::setMimeData(Stream *str, Object *ref, cairo_surface_t *image) { if (param.svg_embed_bitmap) { CairoOutputDev::setMimeData(str, ref, image); return; } // TODO dump bitmaps in other formats. if (str->getKind() != strDCT) return; // TODO inline image? if (ref == nullptr || !ref->isRef()) return; // We only dump rgb or gray jpeg without /Decode array. // // Although jpeg support CMYK, PDF readers do color conversion incompatibly with most other // programs (including browsers): other programs invert CMYK color if 'Adobe' marker (app14) presents // in a jpeg file; while PDF readers don't, they solely rely on /Decode array to invert color. // It's a bit complicated to decide whether a CMYK jpeg is safe to dump, so we don't dump at all. // See also: // JPEG file embedded in PDF (CMYK) https://forums.adobe.com/thread/975777 // http://stackoverflow.com/questions/3123574/how-to-convert-from-cmyk-to-rgb-in-java-correctly // // In PDF, jpeg stream objects can also specify other color spaces like DeviceN and Separation, // It is also not safe to dump them directly. Object obj; str->getDict()->lookup("ColorSpace", &obj); if (!obj.isName() || (strcmp(obj.getName(), "DeviceRGB") && strcmp(obj.getName(), "DeviceGray")) ) { obj.free(); return; } obj.free(); str->getDict()->lookup("Decode", &obj); if (obj.isArray()) { obj.free(); return; } obj.free(); int imgId = ref->getRef().num; auto uri = strdup((char*) html_renderer->str_fmt("o%d.jpg", imgId)); auto st = cairo_surface_set_mime_data(image, CAIRO_MIME_TYPE_URI, (unsigned char*) uri, strlen(uri), free, uri); if (st) { free(uri); return; } bitmaps_in_current_page.push_back(imgId); if(bitmaps_ref_count.find(imgId) != bitmaps_ref_count.end()) return; bitmaps_ref_count[imgId] = 0; char *strBuffer; int len; if (getStreamData(str->getNextStream(), &strBuffer, &len)) { ofstream imgfile(build_bitmap_path(imgId), ofstream::binary); imgfile.write(strBuffer, len); free(strBuffer); } } } // namespace pdf2htmlEX #endif // ENABLE_SVG