From f04941bed002be4f9b1a34e8940c1f35b07dc00e Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 18 Sep 2013 20:24:48 +0800 Subject: [PATCH] support jpeg --- README.md | 2 +- pdf2htmlEX.1.in | 12 +++- src/BackgroundRenderer/BackgroundRenderer.cc | 22 +++++--- .../CairoBackgroundRenderer.cc | 26 ++++++++- .../SplashBackgroundRenderer.cc | 42 +++++++++++--- src/HTMLRenderer/font.cc | 55 ++++++++++--------- src/Param.h | 2 +- src/pdf2htmlEX.cc | 48 +++++++++++----- src/util/const.cc | 31 +++++++---- src/util/const.h | 2 + 10 files changed, 168 insertions(+), 74 deletions(-) diff --git a/README.md b/README.md index 7c38135..f01ee12 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ Read `LICENSE` for more detail. ### Contacts -* Lu Wang or [coolwanglu@twitter](https://twitter.com/coolwanglu) +* 王璐 (Lu Wang) or [coolwanglu@twitter](https://twitter.com/coolwanglu) - For general and person questions - If you want to report an issue, please refer to [How to report](https://github.com/coolwanglu/pdf2htmlEX/wiki/How-to-report) - Accepting messages in **中文**, **English** or **日本語** diff --git a/pdf2htmlEX.1.in b/pdf2htmlEX.1.in index b91bff9..c1e8de7 100644 --- a/pdf2htmlEX.1.in +++ b/pdf2htmlEX.1.in @@ -151,8 +151,8 @@ Specify whether the local matched fonts, for fonts not embedded in PDF, should b If this switch is off, only font names are exported such that web browsers may try to find proper fonts themselves, and that might cause issues about incorrect font metrics. .TP -.B --font-suffix (Default: .ttf) -Specify the suffix of fonts extracted from the PDF file. +.B --font-format (Default: ttf) +Specify the format of fonts extracted from the PDF file. .TP .B --decompose-ligature <0|1> (Default: 0) @@ -227,7 +227,13 @@ If set to 1, pdf2htmlEX will try to reduce the number of HTML elements used for .TP .B --bg-format (Default: "png") -Specify the format for background images, currently "png" and "svg" are supported. +Specify the format for background images, run `pdf2htmlEX -v` to check all supported formats. + +.SS Background Image + +.TP +.B --bg-format (Default: png) +Specify the background image format. Run `pdf2htmlEX -v` to check all supported formats. .SS PDF Protection diff --git a/src/BackgroundRenderer/BackgroundRenderer.cc b/src/BackgroundRenderer/BackgroundRenderer.cc index e3a2b39..e6cf59c 100644 --- a/src/BackgroundRenderer/BackgroundRenderer.cc +++ b/src/BackgroundRenderer/BackgroundRenderer.cc @@ -5,6 +5,8 @@ * Copyright (C) 2013 Lu Wang */ +#include + #include "HTMLRenderer/HTMLRenderer.h" #include "Param.h" @@ -18,22 +20,26 @@ namespace pdf2htmlEX { BackgroundRenderer * BackgroundRenderer::getBackgroundRenderer(const std::string & format, HTMLRenderer * html_renderer, const Param & param) { +#ifdef ENABLE_LIBPNG if(format == "png") { return new SplashBackgroundRenderer(html_renderer, param); } - else if (format == "svg") +#endif +#ifdef ENABLE_LIBJPEG + if(format == "jpg") { + return new SplashBackgroundRenderer(html_renderer, param); + } +#endif #if ENABLE_SVG - return new CairoBackgroundRenderer(html_renderer, param); -#else - return nullptr; -#endif - } - else + if (format == "svg") { - return nullptr; + return new CairoBackgroundRenderer(html_renderer, param); } +#endif + + return nullptr; } } // namespace pdf2htmlEX diff --git a/src/BackgroundRenderer/CairoBackgroundRenderer.cc b/src/BackgroundRenderer/CairoBackgroundRenderer.cc index c6b0b4e..0368868 100644 --- a/src/BackgroundRenderer/CairoBackgroundRenderer.cc +++ b/src/BackgroundRenderer/CairoBackgroundRenderer.cc @@ -26,7 +26,21 @@ void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y, double originX, double originY, CharCode code, int nBytes, Unicode *u, int uLen) { - // CairoOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen); + // draw characters as image when + // - in fallback mode + // - OR there is special filling method + // - OR using a writing mode font + // - OR using a Type 3 font + if((param.fallback) + || ( (state->getFont()) + && ( (state->getFont()->getWMode()) + || (state->getFont()->getType() == fontType3) + ) + ) + ) + { + CairoOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code,nBytes,u,uLen); + } } void CairoBackgroundRenderer::init(PDFDoc * doc) @@ -58,7 +72,7 @@ void CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) if(param.embed_image) html_renderer->tmp_files.add((char*)fn); - surface = cairo_svg_surface_create((char*)fn, page_width, page_height); + surface = cairo_svg_surface_create((char*)fn, page_width * DEFAULT_DPI, page_height * DEFAULT_DPI); } cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2); cairo_surface_set_fallback_resolution(surface, param.h_dpi, param.v_dpi); @@ -68,7 +82,13 @@ void CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno) setPrinting(false); // TODO, check the parameter cairo_save(cr); - // TODO apply crop box + // zoom the image to prevent CairoOutputDev from rounding/increasing thin borders + { + cairo_matrix_t matrix; + cairo_matrix_init_identity(&matrix); + cairo_matrix_scale(&matrix, DEFAULT_DPI, DEFAULT_DPI); + cairo_transform(cr, &matrix); + } doc->displayPage(this, pageno, DEFAULT_DPI, DEFAULT_DPI, 0, diff --git a/src/BackgroundRenderer/SplashBackgroundRenderer.cc b/src/BackgroundRenderer/SplashBackgroundRenderer.cc index e50a53e..8e6a412 100644 --- a/src/BackgroundRenderer/SplashBackgroundRenderer.cc +++ b/src/BackgroundRenderer/SplashBackgroundRenderer.cc @@ -8,10 +8,13 @@ #include #include +#include #include #include +#include #include "Base64Stream.h" +#include "util/const.h" #include "SplashBackgroundRenderer.h" @@ -31,13 +34,11 @@ const SplashColor SplashBackgroundRenderer::white = {255,255,255}; */ #if POPPLER_OLDER_THAN_0_23_0 void SplashBackgroundRenderer::startPage(int pageNum, GfxState *state) -#else -void SplashBackgroundRenderer::startPage(int pageNum, GfxState *state, XRef *xrefA) -#endif { -#if POPPLER_OLDER_THAN_0_23_0 SplashOutputDev::startPage(pageNum, state); #else +void SplashBackgroundRenderer::startPage(int pageNum, GfxState *state, XRef *xrefA) +{ SplashOutputDev::startPage(pageNum, state, xrefA); #endif clearModRegion(); @@ -93,7 +94,7 @@ void SplashBackgroundRenderer::embed_image(int pageno) if((xmin <= xmax) && (ymin <= ymax)) { { - auto fn = html_renderer->str_fmt("%s/bg%x.png", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno); + auto fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, param.bg_format.c_str()); if(param.embed_image) html_renderer->tmp_files.add((char*)fn); @@ -115,11 +116,17 @@ void SplashBackgroundRenderer::embed_image(int pageno) if(param.embed_image) { - auto path = html_renderer->str_fmt("%s/bg%x.png", param.tmp_dir.c_str(), pageno); + auto path = html_renderer->str_fmt("%s/bg%x.%s", param.tmp_dir.c_str(), pageno, param.bg_format.c_str()); ifstream fin((char*)path, ifstream::binary); if(!fin) throw string("Cannot read background image ") + (char*)path; - f_page << "data:image/png;base64," << Base64Stream(fin); + + auto iter = FORMAT_MIME_TYPE_MAP.find(param.bg_format); + if(iter == FORMAT_MIME_TYPE_MAP.end()) + throw string("Image format not supported: ") + param.bg_format; + + string mime_type = iter->second; + f_page << "data:" << mime_type << ";base64," << Base64Stream(fin); } else { @@ -142,7 +149,26 @@ void SplashBackgroundRenderer::dump_image(const char * filename, int x1, int y1, throw string("Cannot open file for background image " ) + filename; // use unique_ptr to auto delete the object upon exception - auto writer = unique_ptr(new PNGWriter); + unique_ptr writer; + + if(false) { } +#ifdef ENABLE_LIBPNG + else if(param.bg_format == "png") + { + writer = unique_ptr(new PNGWriter); + } +#endif +#ifdef ENABLE_LIBJPEG + else if(param.bg_format == "jpg") + { + writer = unique_ptr(new JpegWriter); + } +#endif + else + { + throw string("Image format not supported: ") + param.bg_format; + } + if(!writer->init(f, width, height, param.h_dpi, param.v_dpi)) throw "Cannot initialize PNGWriter"; diff --git a/src/HTMLRenderer/font.cc b/src/HTMLRenderer/font.cc index 44892b1..e8efe73 100644 --- a/src/HTMLRenderer/font.cc +++ b/src/HTMLRenderer/font.cc @@ -178,7 +178,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo if(param.debug) { - auto fn = str_fmt("%s/__raw_font_%llx", param.tmp_dir.c_str(), info.id, param.font_suffix.c_str()); + auto fn = str_fmt("%s/__raw_font_%llx%s", param.tmp_dir.c_str(), info.id, get_suffix(filepath).c_str()); tmp_files.add((char*)fn); ofstream((char*)fn, ofstream::binary) << ifstream(filepath).rdbuf(); } @@ -528,9 +528,9 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo */ ffw_reencode_unicode_full(); - string cur_tmp_fn = (char*)str_fmt("%s/__tmp_font1%s", param.tmp_dir.c_str(), param.font_suffix.c_str()); + string cur_tmp_fn = (char*)str_fmt("%s/__tmp_font1.%s", param.tmp_dir.c_str(), param.font_format.c_str()); tmp_files.add(cur_tmp_fn); - string other_tmp_fn = (char*)str_fmt("%s/__tmp_font2%s", param.tmp_dir.c_str(), param.font_suffix.c_str()); + string other_tmp_fn = (char*)str_fmt("%s/__tmp_font2.%s", param.tmp_dir.c_str(), param.font_format.c_str()); tmp_files.add(other_tmp_fn); ffw_save(cur_tmp_fn.c_str()); @@ -571,9 +571,9 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo * Ascent/Descent are not used in PDF, and the values in PDF may be wrong or inconsistent (there are 3 sets of them) * We need to reload in order to retrieve/fix accurate ascent/descent, some info won't be written to the font by fontforge until saved. */ - string fn = (char*)str_fmt("%s/f%llx%s", + string fn = (char*)str_fmt("%s/f%llx.%s", (param.embed_font ? param.tmp_dir : param.dest_dir).c_str(), - info.id, param.font_suffix.c_str()); + info.id, param.font_format.c_str()); if(param.embed_font) tmp_files.add(fn); @@ -685,7 +685,7 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, FontInfo & info) if(path != "") { embed_font(path, font, info); - export_remote_font(info, param.font_suffix, font); + export_remote_font(info, param.font_format, font); } else { @@ -712,7 +712,7 @@ void HTMLRenderer::install_external_font(GfxFont * font, FontInfo & info) if(localfontloc != nullptr) { embed_font(string(localfontloc->path->getCString()), font, info); - export_remote_font(info, param.font_suffix, font); + export_remote_font(info, param.font_format, font); delete localfontloc; return; } @@ -739,45 +739,46 @@ void HTMLRenderer::install_external_font(GfxFont * font, FontInfo & info) export_local_font(info, font, fontname, ""); } -void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suffix, GfxFont * font) +void HTMLRenderer::export_remote_font(const FontInfo & info, const string & format, GfxFont * font) { - string mime_type, format; - if(suffix == ".ttf") + string css_font_format; + if(format == "ttf") { - format = "truetype"; - mime_type = "application/x-font-ttf"; + css_font_format = "truetype"; } - else if(suffix == ".otf") + else if(format == "otf") { - format = "opentype"; - mime_type = "application/x-font-otf"; + css_font_format = "opentype"; } - else if(suffix == ".woff") + else if(format == "woff") { - format = "woff"; - mime_type = "application/font-woff"; + css_font_format = "woff"; } - else if(suffix == ".eot") + else if(format == "eot") { - format = "embedded-opentype"; - mime_type = "application/vnd.ms-fontobject"; + css_font_format = "embedded-opentype"; } - else if(suffix == ".svg") + else if(format == "svg") { - format = "svg"; - mime_type = "image/svg+xml"; + css_font_format = "svg"; } else { - cerr << "Warning: unknown font suffix: " << suffix << endl; + throw string("Warning: unknown font format: ") + format; } + auto iter = FORMAT_MIME_TYPE_MAP.find(format); + if(iter == FORMAT_MIME_TYPE_MAP.end()) + { + throw string("Warning: unknown font format: ") + format; + } + string mime_type = iter->second; f_css.fs << "@font-face{" << "font-family:" << CSS::FONT_FAMILY_CN << info.id << ";" << "src:url("; { - auto fn = str_fmt("f%llx%s", info.id, suffix.c_str()); + auto fn = str_fmt("f%llx.%s", info.id, format.c_str()); if(param.embed_font) { auto path = param.tmp_dir + "/" + (char*)fn; @@ -793,7 +794,7 @@ void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suff } f_css.fs << ")" - << "format(\"" << format << "\");" + << "format(\"" << css_font_format << "\");" << "}" // end of @font-face << "." << CSS::FONT_FAMILY_CN << info.id << "{" << "font-family:" << CSS::FONT_FAMILY_CN << info.id << ";" diff --git a/src/Param.h b/src/Param.h index d054aca..2662736 100644 --- a/src/Param.h +++ b/src/Param.h @@ -42,7 +42,7 @@ struct Param // fonts int embed_external_font; - std::string font_suffix; + std::string font_format; int decompose_ligature; int auto_hint; std::string external_hint_tool; diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index e1f58be..4d69293 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -23,6 +23,11 @@ #include #include "pdf2htmlEX-config.h" + +#if ENABLE_SVG +#include +#endif + #include "ArgParser.h" #include "Param.h" #include "HTMLRenderer/HTMLRenderer.h" @@ -46,11 +51,28 @@ void show_usage_and_exit(const char * dummy = nullptr) void show_version_and_exit(const char * dummy = nullptr) { cerr << "pdf2htmlEX version " << PDF2HTMLEX_VERSION << endl; - cerr << "Copyright 2012,2013 Lu Wang " << endl; + cerr << "Copyright 2012,2013 Lu Wang and other contributers" << endl; + cerr << endl; cerr << "Libraries: "; cerr << "poppler " << POPPLER_VERSION << ", "; cerr << "libfontforge " << ffw_get_version() << endl; +#if ENABLE_SVG + cerr << "cairo " << cairo_version_string() << endl; +#endif + cerr << endl; cerr << "Default data-dir: " << PDF2HTMLEX_DATA_PATH << endl; + cerr << "Supported image format:"; +#ifdef ENABLE_LIBPNG + cerr << " png"; +#endif +#ifdef ENABLE_LIBJPEG + cerr << " jpg"; +#endif +#if ENABLE_SVG + cerr << " svg"; +#endif + cerr << endl; + cerr << endl; exit(EXIT_SUCCESS); } @@ -113,7 +135,7 @@ void parse_options (int argc, char **argv) // fonts .add("embed-external-font", ¶m.embed_external_font, 1, "embed local match for external fonts") - .add("font-suffix", ¶m.font_suffix, ".ttf", "suffix for embedded font files (.ttf,.otf,.woff,.svg)") + .add("font-format", ¶m.font_format, "ttf", "suffix for embedded font files (ttf,otf,woff,svg)") .add("decompose-ligature", ¶m.decompose_ligature, 0, "decompose ligatures, such as \uFB01 -> fi") .add("auto-hint", ¶m.auto_hint, 0, "use fontforge autohint on fonts without hints") .add("external-hint-tool", ¶m.external_hint_tool, "", "external tool for hinting fonts (overrides --auto-hint)") @@ -253,20 +275,20 @@ void check_param() param.outline_filename = s + ".outline"; } } - if(param.bg_format == "svg") - { -#if not ENABLE_SVG - cerr << "SVG support is not built" << endl; - exit(EXIT_FAILURE); + + if(false) { } +#ifdef ENABLE_LIBPNG + else if (param.bg_format == "png") { } +#endif +#ifdef ENABLE_LIBJPEG + else if (param.bg_format == "jpg") { } +#endif +#if not ENABLE_SVG + else if(param.bg_format == "svg") { } #endif - } - else if (param.bg_format == "png") - { - // pass - } else { - cerr << "Unknown format for background: " << param.bg_format << endl; + cerr << "Image format not supported: " << param.bg_format << endl; exit(EXIT_FAILURE); } } diff --git a/src/util/const.cc b/src/util/const.cc index 1303495..4ce95a2 100644 --- a/src/util/const.cc +++ b/src/util/const.cc @@ -23,15 +23,26 @@ const map GB_ENCODED_FONT_NAME_MAP({ }); const std::map EMBED_STRING_MAP({ - {".css", {&Param::embed_css, - "", - "" }}, - {".js", {&Param::embed_javascript, - "", - "" }} + {".css", {&Param::embed_css, + "", + "" }}, + {".js", {&Param::embed_javascript, + "", + "" }} }); + +const std::map FORMAT_MIME_TYPE_MAP({ + {"eot", "application/vnd.ms-fontobject"}, + {"jpg", "image/jpeg"}, + {"otf", "appilcation/x-font-otf"}, + {"png", "image/png"}, + {"svg", "image/svg+xml"}, + {"ttf", "application/x-font-ttf"}, + {"woff", "application/font-woff"}, +}); + } //namespace pdf2htmlEX diff --git a/src/util/const.h b/src/util/const.h index e6cabdc..7944395 100644 --- a/src/util/const.h +++ b/src/util/const.h @@ -38,6 +38,8 @@ struct EmbedStringEntry }; extern const std::map EMBED_STRING_MAP; +extern const std::map FORMAT_MIME_TYPE_MAP; + } // namespace pdf2htmlEX #endif //CONST_H__