diff --git a/CMakeLists.txt b/CMakeLists.txt index 78229bb..76317da 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -181,6 +181,7 @@ set(PDF2HTMLEX_SRC ${PDF2HTMLEX_SRC} src/HTMLRenderer/state.cc src/HTMLRenderer/text.cc src/BackgroundRenderer/BackgroundRenderer.h + src/BackgroundRenderer/BackgroundRenderer.cc src/BackgroundRenderer/SplashBackgroundRenderer.h src/BackgroundRenderer/SplashBackgroundRenderer.cc src/BackgroundRenderer/CairoBackgroundRenderer.h diff --git a/pdf2htmlEX.1.in b/pdf2htmlEX.1.in index 59e1fa8..b91bff9 100644 --- a/pdf2htmlEX.1.in +++ b/pdf2htmlEX.1.in @@ -223,6 +223,12 @@ If set to 0, pdf2htmlEX would try its best to balance the two methods above. .B --optimize-text <0|1> (Deafult: 0) If set to 1, pdf2htmlEX will try to reduce the number of HTML elements used for text. Turn it off if anything goes wrong. +.SS Background Image + +.TP +.B --bg-format (Default: "png") +Specify the format for background images, currently "png" and "svg" are supported. + .SS PDF Protection .TP diff --git a/src/BackgroundRenderer/BackgroundRenderer.cc b/src/BackgroundRenderer/BackgroundRenderer.cc new file mode 100644 index 0000000..e3a2b39 --- /dev/null +++ b/src/BackgroundRenderer/BackgroundRenderer.cc @@ -0,0 +1,39 @@ +/* + * Background renderer + * Render all those things not supported as Image + * + * Copyright (C) 2013 Lu Wang + */ + +#include "HTMLRenderer/HTMLRenderer.h" +#include "Param.h" + +#include "BackgroundRenderer.h" +#include "SplashBackgroundRenderer.h" +#if ENABLE_SVG +#include "CairoBackgroundRenderer.h" +#endif + +namespace pdf2htmlEX { + +BackgroundRenderer * BackgroundRenderer::getBackgroundRenderer(const std::string & format, HTMLRenderer * html_renderer, const Param & param) +{ + if(format == "png") + { + return new SplashBackgroundRenderer(html_renderer, param); + } + else if (format == "svg") + { +#if ENABLE_SVG + return new CairoBackgroundRenderer(html_renderer, param); +#else + return nullptr; +#endif + } + else + { + return nullptr; + } +} + +} // namespace pdf2htmlEX diff --git a/src/BackgroundRenderer/BackgroundRenderer.h b/src/BackgroundRenderer/BackgroundRenderer.h index ab3d9aa..f6d898e 100644 --- a/src/BackgroundRenderer/BackgroundRenderer.h +++ b/src/BackgroundRenderer/BackgroundRenderer.h @@ -2,31 +2,36 @@ * Background renderer * Render all those things not supported as Image * - * Copyright (C) 2012 Lu Wang + * Copyright (C) 2012,2013 Lu Wang */ #ifndef BACKGROUND_RENDERER_H__ #define BACKGROUND_RENDERER_H__ -#include "pdf2htmlEX-config.h" +#include -#if ENABLE_SVG - -#include "CairoBackgroundRenderer.h" +class PDFDoc; namespace pdf2htmlEX { - typedef CairoBackgroundRenderer BackgroundRenderer; -} -#else +class Param; +class HTMLRenderer; +class BackgroundRenderer +{ +public: + // return nullptr upon failure + static BackgroundRenderer * getBackgroundRenderer(const std::string & format, HTMLRenderer * html_renderer, const Param & param); -#include "SplashBackgroundRenderer.h" + BackgroundRenderer() {} + virtual ~BackgroundRenderer() {} -namespace pdf2htmlEX { - typedef SplashBackgroundRenderer BackgroundRenderer; -} + virtual void init(PDFDoc * doc) = 0; + virtual void render_page(PDFDoc * doc, int pageno) = 0; + virtual void embed_image(int pageno) = 0; -#endif // ENABLE_SVG +}; + +} // namespace pdf2htmlEX #endif //BACKGROUND_RENDERER_H__ diff --git a/src/BackgroundRenderer/BackgroundRenderer_forward.h b/src/BackgroundRenderer/BackgroundRenderer_forward.h deleted file mode 100644 index 15f1ccb..0000000 --- a/src/BackgroundRenderer/BackgroundRenderer_forward.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Background renderer - * Render all those things not supported as Image - * - * Copyright (C) 2013 Lu Wang - */ - - -/* - * This file includes forward declarations since HTMLRenderer and BackgroundRendere have cross references - */ - -#ifndef BACKGROUND_RENDERER_FORWARD_H__ -#define BACKGROUND_RENDERER_FORWARD_H__ - -#include "pdf2htmlEX-config.h" - -namespace pdf2htmlEX { -#if ENABLE_SVG - class CairoBackgroundRenderer; - typedef CairoBackgroundRenderer BackgroundRenderer; -#else - class SplashBackgroundRenderer; - typedef SplashBackgroundRenderer BackgroundRenderer; -#endif // ENABLE_SVG -} - - -#endif //BACKGROUND_RENDERER_FORWARD_H__ diff --git a/src/BackgroundRenderer/CairoBackgroundRenderer.cc b/src/BackgroundRenderer/CairoBackgroundRenderer.cc index 12e3be0..c6b0b4e 100644 --- a/src/BackgroundRenderer/CairoBackgroundRenderer.cc +++ b/src/BackgroundRenderer/CairoBackgroundRenderer.cc @@ -1,7 +1,7 @@ /* * CairoBackgroundRenderer.cc * - * Copyright (C) 2012 Lu Wang + * Copyright (C) 2012,2013 Lu Wang */ #include @@ -29,6 +29,11 @@ void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y, // CairoOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen); } +void CairoBackgroundRenderer::init(PDFDoc * doc) +{ + startDoc(doc); +} + static GBool annot_cb(Annot *, void *) { return false; }; diff --git a/src/BackgroundRenderer/CairoBackgroundRenderer.h b/src/BackgroundRenderer/CairoBackgroundRenderer.h index e63c0b9..5c0b465 100644 --- a/src/BackgroundRenderer/CairoBackgroundRenderer.h +++ b/src/BackgroundRenderer/CairoBackgroundRenderer.h @@ -2,7 +2,7 @@ * Cairo Background renderer * Render all those things not supported as Image, with Cairo * - * Copyright (C) 2012 Lu Wang + * Copyright (C) 2012,2013 Lu Wang */ @@ -21,7 +21,7 @@ namespace pdf2htmlEX { // Based on BackgroundRenderer from poppler -class CairoBackgroundRenderer : public CairoOutputDev +class CairoBackgroundRenderer : public BackgroundRenderer, CairoOutputDev { public: CairoBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param) @@ -33,14 +33,15 @@ public: virtual ~CairoBackgroundRenderer() { } + virtual void init(PDFDoc * doc); + virtual void render_page(PDFDoc * doc, int pageno); + virtual void embed_image(int pageno); + virtual void drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, int nBytes, Unicode *u, int uLen); - void render_page(PDFDoc * doc, int pageno); - void embed_image(int pageno); - protected: HTMLRenderer * html_renderer; const Param & param; diff --git a/src/BackgroundRenderer/SplashBackgroundRenderer.cc b/src/BackgroundRenderer/SplashBackgroundRenderer.cc index 5385f4d..e50a53e 100644 --- a/src/BackgroundRenderer/SplashBackgroundRenderer.cc +++ b/src/BackgroundRenderer/SplashBackgroundRenderer.cc @@ -1,11 +1,12 @@ /* * SplashBackgroundRenderer.cc * - * Copyright (C) 2012 Lu Wang + * Copyright (C) 2012,2013 Lu Wang */ #include #include +#include #include #include @@ -19,6 +20,7 @@ namespace pdf2htmlEX { using std::string; using std::ifstream; using std::vector; +using std::unique_ptr; const SplashColor SplashBackgroundRenderer::white = {255,255,255}; @@ -63,6 +65,11 @@ void SplashBackgroundRenderer::drawChar(GfxState *state, double x, double y, } } +void SplashBackgroundRenderer::init(PDFDoc * doc) +{ + startDoc(doc); +} + static GBool annot_cb(Annot *, void *) { return false; }; @@ -134,7 +141,8 @@ void SplashBackgroundRenderer::dump_image(const char * filename, int x1, int y1, if(!f) throw string("Cannot open file for background image " ) + filename; - ImgWriter * writer = new PNGWriter(); + // use unique_ptr to auto delete the object upon exception + auto writer = unique_ptr(new PNGWriter); if(!writer->init(f, width, height, param.h_dpi, param.v_dpi)) throw "Cannot initialize PNGWriter"; @@ -157,7 +165,6 @@ void SplashBackgroundRenderer::dump_image(const char * filename, int x1, int y1, throw "Cannot write background image"; } - delete writer; fclose(f); } diff --git a/src/BackgroundRenderer/SplashBackgroundRenderer.h b/src/BackgroundRenderer/SplashBackgroundRenderer.h index 1301354..dea2bf0 100644 --- a/src/BackgroundRenderer/SplashBackgroundRenderer.h +++ b/src/BackgroundRenderer/SplashBackgroundRenderer.h @@ -2,8 +2,7 @@ * Splash Background renderer * Render all those things not supported as Image, with Splash * - * by WangLu - * 2012.08.06 + * Copyright (C) 2012,2013 Lu Wang */ @@ -23,7 +22,7 @@ namespace pdf2htmlEX { // Based on BackgroundRenderer from poppler -class SplashBackgroundRenderer : public SplashOutputDev +class SplashBackgroundRenderer : public BackgroundRenderer, SplashOutputDev { public: static const SplashColor white; @@ -36,6 +35,10 @@ public: virtual ~SplashBackgroundRenderer() { } + virtual void init(PDFDoc * doc); + virtual void render_page(PDFDoc * doc, int pageno); + virtual void embed_image(int pageno); + #if POPPLER_OLDER_THAN_0_23_0 virtual void startPage(int pageNum, GfxState *state); #else @@ -57,11 +60,8 @@ public: SplashOutputDev::fill(state); } - void render_page(PDFDoc * doc, int pageno); - void embed_image(int pageno); - void dump_image(const char * filename, int x1, int y1, int x2, int y2); - protected: + void dump_image(const char * filename, int x1, int y1, int x2, int y2); HTMLRenderer * html_renderer; const Param & param; }; diff --git a/src/HTMLRenderer/HTMLRenderer.h b/src/HTMLRenderer/HTMLRenderer.h index fce9719..98d641e 100644 --- a/src/HTMLRenderer/HTMLRenderer.h +++ b/src/HTMLRenderer/HTMLRenderer.h @@ -30,7 +30,7 @@ #include "StateManager.h" #include "HTMLTextPage.h" -#include "BackgroundRenderer/BackgroundRenderer_forward.h" +#include "BackgroundRenderer/BackgroundRenderer.h" #include "util/const.h" #include "util/misc.h" @@ -321,8 +321,10 @@ protected: StringFormatter str_fmt; // render background image - friend class SplashBackgroundRenderer; - friend class CairoBackgroundRenderer; + friend class SplashBackgroundRenderer; // ugly! +#if ENABLE_SVG + friend class CairoBackgroundRenderer; // ugly! +#endif BackgroundRenderer * bg_renderer; diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 6799e86..2380e4f 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -100,8 +100,10 @@ void HTMLRenderer::process(PDFDoc *doc) bg_renderer = nullptr; if(param.process_nontext) { - bg_renderer = new BackgroundRenderer(this, param); - bg_renderer->startDoc(doc); + bg_renderer = BackgroundRenderer::getBackgroundRenderer(param.bg_format, this, param); + if(!bg_renderer) + throw "Cannot initialize background renderer, unsupported format"; + bg_renderer->init(doc); } int page_count = (param.last_page - param.first_page + 1); diff --git a/src/Param.h b/src/Param.h index 6af32df..d054aca 100644 --- a/src/Param.h +++ b/src/Param.h @@ -57,6 +57,9 @@ struct Param int space_as_offset; int tounicode; int optimize_text; + + // background image + std::string bg_format; // encryption std::string owner_password, user_password; @@ -67,9 +70,7 @@ struct Param std::string data_dir; int css_draw; int debug; - int wa_unicode; - // non-optional std::string input_filename, output_filename; // not a paramater diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index e17e531..e1f58be 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -36,18 +36,6 @@ using namespace pdf2htmlEX; Param param; ArgParser argparser; -void deprecated_single_html(const char * dummy = nullptr) -{ - cerr << "--single_html is deprecated. Use `--embed CFIJO` instead." << endl; - exit(EXIT_FAILURE); -} - -void removed_remove_unsed_glyph(const char * dummy = nullptr) -{ - cerr << "--remove-unsed-glyph is removed. Use a PDF optimization tool instead." << endl; - exit(EXIT_FAILURE); -} - void show_usage_and_exit(const char * dummy = nullptr) { cerr << "Usage: pdf2htmlEX [options] []" << endl; @@ -141,6 +129,9 @@ void parse_options (int argc, char **argv) .add("space-as-offset", ¶m.space_as_offset, 0, "treat space characters as offsets") .add("tounicode", ¶m.tounicode, 0, "how to handle ToUnicode CMaps (0=auto, 1=force, -1=ignore)") .add("optimize-text", ¶m.optimize_text, 0, "try to reduce the number of HTML elements used for text") + + // background image + .add("bg-format", ¶m.bg_format, "png", "specify background image format") // encryption .add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", true) @@ -158,10 +149,6 @@ void parse_options (int argc, char **argv) .add("version,v", "print copyright and version info", &show_version_and_exit) .add("help,h", "print usage information", &show_usage_and_exit) - // deprecated - .add("single-html", "", &deprecated_single_html) - .add("remove-unused-glyph", "", &removed_remove_unsed_glyph) - .add("", ¶m.input_filename, "", "") .add("", ¶m.output_filename, "", "") ; @@ -192,14 +179,103 @@ void parse_options (int argc, char **argv) } } -int main(int argc, char **argv) +void check_param() { - parse_options(argc, argv); if (param.input_filename == "") { show_usage_and_exit(); } + if(param.output_filename.empty()) + { + const string s = get_filename(param.input_filename); + if(get_suffix(param.input_filename) == ".pdf") + { + param.output_filename = s.substr(0, s.size() - 4) + ".html"; + + } + else + { + param.output_filename = s + ".html"; + } + } + + if(param.page_filename.empty()) + { + const string s = get_filename(param.input_filename); + if(get_suffix(param.input_filename) == ".pdf") + { + param.page_filename = s.substr(0, s.size() - 4) + "%d.page"; + } + else + { + param.page_filename = s + "%d.page"; + } + sanitize_filename(param.page_filename); + } + + else + { + // Need to make sure we have a page number placeholder in the filename + if(!sanitize_filename(param.page_filename)) + { + // Inject the placeholder just before the file extension + const string suffix = get_suffix(param.page_filename); + param.page_filename = param.page_filename.substr(0, param.page_filename.size() - suffix.size()) + "%d" + suffix; + sanitize_filename(param.page_filename); + } + } + if(param.css_filename.empty()) + { + const string s = get_filename(param.input_filename); + + if(get_suffix(param.input_filename) == ".pdf") + { + param.css_filename = s.substr(0, s.size() - 4) + ".css"; + } + else + { + if(!param.split_pages) + param.css_filename = s + ".css"; + } + } + if(param.outline_filename.empty()) + { + const string s = get_filename(param.input_filename); + + if(get_suffix(param.input_filename) == ".pdf") + { + param.outline_filename = s.substr(0, s.size() - 4) + ".outline"; + } + else + { + if(!param.split_pages) + param.outline_filename = s + ".outline"; + } + } + if(param.bg_format == "svg") + { +#if not ENABLE_SVG + cerr << "SVG support is not built" << endl; + exit(EXIT_FAILURE); +#endif + } + else if (param.bg_format == "png") + { + // pass + } + else + { + cerr << "Unknown format for background: " << param.bg_format << endl; + exit(EXIT_FAILURE); + } +} + +int main(int argc, char **argv) +{ + parse_options(argc, argv); + check_param(); + //prepare the directories { char buf[] = "/tmp/pdf2htmlEX-XXXXXX"; @@ -243,89 +319,20 @@ int main(int argc, char **argv) delete ownerPW; } - if (!doc->isOk()) { + if (!doc->isOk()) throw "Cannot read the file"; - } // check for copy permission - if (!doc->okToCopy()) { - if (param.no_drm == 0) { + if (!doc->okToCopy()) + { + if (param.no_drm == 0) throw "Copying of text from this document is not allowed."; - } cerr << "Document has copy-protection bit set." << endl; } param.first_page = min(max(param.first_page, 1), doc->getNumPages()); param.last_page = min(max(param.last_page, param.first_page), doc->getNumPages()); - if(param.output_filename.empty()) - { - const string s = get_filename(param.input_filename); - if(get_suffix(param.input_filename) == ".pdf") - { - param.output_filename = s.substr(0, s.size() - 4) + ".html"; - - } - else - { - param.output_filename = s + ".html"; - } - } - - if(param.page_filename.empty()) - { - const string s = get_filename(param.input_filename); - if(get_suffix(param.input_filename) == ".pdf") - { - param.page_filename = s.substr(0, s.size() - 4) + "%d.page"; - } - else - { - param.page_filename = s + "%d.page"; - } - sanitize_filename(param.page_filename); - } - - else - { - // Need to make sure we have a page number placeholder in the filename - if(!sanitize_filename(param.page_filename)) - { - // Inject the placeholder just before the file extension - const string suffix = get_suffix(param.page_filename); - param.page_filename = param.page_filename.substr(0, param.page_filename.size() - suffix.size()) + "%d" + suffix; - sanitize_filename(param.page_filename); - } - } - if(param.css_filename.empty()) - { - const string s = get_filename(param.input_filename); - - if(get_suffix(param.input_filename) == ".pdf") - { - param.css_filename = s.substr(0, s.size() - 4) + ".css"; - } - else - { - if(!param.split_pages) - param.css_filename = s + ".css"; - } - } - if(param.outline_filename.empty()) - { - const string s = get_filename(param.input_filename); - - if(get_suffix(param.input_filename) == ".pdf") - { - param.outline_filename = s.substr(0, s.size() - 4) + ".outline"; - } - else - { - if(!param.split_pages) - param.outline_filename = s + ".outline"; - } - - } unique_ptr(new HTMLRenderer(param))->process(doc);