From 5a157def7ebfa36e0f235de99e6fe5c4f27a62a6 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 15 Aug 2012 02:28:19 +0800 Subject: [PATCH] background image embedding works now --- README.md | 4 +- src/HTMLRenderer.h | 25 ++++++++++ src/HTMLRenderer/export.cc | 19 -------- src/HTMLRenderer/general.cc | 24 ++++------ src/util.h | 94 ++++++++++++++++++++++--------------- 5 files changed, 92 insertions(+), 74 deletions(-) diff --git a/README.md b/README.md index d404e96..4464285 100644 --- a/README.md +++ b/README.md @@ -35,11 +35,11 @@ Dependency HOW TO COMPILE ---------------------------- - cmake . && make + cmake . && make && sudo make install HOW TO USE ---------------------------- - bin/pdf2htmlEX /path/to/sample.pdf + pdf2htmlEX /path/to/sample.pdf LICENSE diff --git a/src/HTMLRenderer.h b/src/HTMLRenderer.h index 8fa5cd3..f9ba245 100644 --- a/src/HTMLRenderer.h +++ b/src/HTMLRenderer.h @@ -30,6 +30,31 @@ #include "Param.h" #include "util.h" + +/* + * Naming Convention + * + * ID + * + * p - Page + * + * CSS classes + * + * p - Page + * l - Line + * w - White space + * i - Image + * + * Reusable CSS classes + * + * f - Font (also for font names) + * s - font Size + * w - White space + * t - Transform matrix + * c - Color + * + */ + class HTMLRenderer : public OutputDev { public: diff --git a/src/HTMLRenderer/export.cc b/src/HTMLRenderer/export.cc index 2e030b1..13c660c 100644 --- a/src/HTMLRenderer/export.cc +++ b/src/HTMLRenderer/export.cc @@ -15,25 +15,6 @@ using boost::algorithm::ifind_first; -/* - * CSS classes - * - * p - Page - * l - Line - * w - White space - * i - Image - * - * - * Reusable CSS classes - * - * f - Font (also for font names) - * s - font Size - * w - White space - * t - Transform matrix - * c - Color - * - */ - void HTMLRenderer::export_remote_font(long long fn_id, const string & suffix, const string & fontfileformat, GfxFont * font) { diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 291fb77..a746328 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -3,8 +3,6 @@ * * Hanlding general stuffs * - * TODO: better name for this file? - * * by WangLu * 2012.08.14 */ @@ -20,8 +18,6 @@ using std::flush; -using boost::filesystem::file_size; - HTMLRenderer::HTMLRenderer(const Param * param) :line_opened(false) ,image_count(0) @@ -73,7 +69,6 @@ void HTMLRenderer::process(PDFDoc *doc) bg_renderer->getBitmap()->writeImgFile(splashFormatPng, (char*)(working_dir() / (format("p%|1$x|.png")%i).str()).c_str(), param->h_dpi2, param->v_dpi2); } - doc->displayPage(this, i, param->h_dpi, param->v_dpi, 0, true, false, false, nullptr, nullptr, nullptr, nullptr); @@ -90,7 +85,8 @@ void HTMLRenderer::process(PDFDoc *doc) void HTMLRenderer::pre_process() { - html_fout.open(working_dir() / param->output_filename, ofstream::binary); // we may output utf8 characters, so use binary + // we may output utf8 characters, so use binary + html_fout.open(working_dir() / param->output_filename, ofstream::binary); allcss_fout.open(working_dir() / "all.css", ofstream::binary); fontscript_fout.open(tmp_dir / "pdf2htmlEX.pe", ofstream::binary); @@ -129,27 +125,25 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state) assert(!line_opened); - html_fout << format("
") % pageNum << endl; - html_fout << "background-image:url("; + allcss_fout << format("#p%|1$x|{width:%2%px;height:%3%px;") % pageNum % pageWidth % pageHeight; + + allcss_fout << "background-image:url("; const std::string fn = (format("p%|1$x|.png") % pageNum).str(); if(param->single_html) { auto path = tmp_dir / fn; - html_fout << "'data:image/png;base64,"; - copy_base64(html_fout, ifstream(path), file_size(path)); - html_fout << "'"; + allcss_fout << "'data:image/png;base64," << base64_filter(ifstream(path, ifstream::binary)) << "'"; } else { - html_fout << fn; + allcss_fout << fn; } - html_fout << format(");background-position:0 0;background-size:%1%px %2%px;background-repeat:no-repeat;") % pageWidth % pageHeight; + allcss_fout << format(");background-position:0 0;background-size:%1%px %2%px;background-repeat:no-repeat;}") % pageWidth % pageHeight; - html_fout << "\">" << endl; - cur_fn_id = cur_fs_id = cur_tm_id = cur_color_id = 0; cur_tx = cur_ty = 0; cur_font_size = 0; diff --git a/src/util.h b/src/util.h index 6356522..e638efb 100644 --- a/src/util.h +++ b/src/util.h @@ -14,10 +14,10 @@ #include #include #include +#include -#include -#include - +#include +#include #include #include "Consts.h" @@ -25,11 +25,8 @@ using std::istream; using std::ostream; using std::istream_iterator; -using std::ostream_iterator; -using std::copy; - -using boost::archive::iterators::base64_from_binary; -using boost::archive::iterators::transform_width; +using std::endl; +using std::noskipws; // mute gcc warning of unused function namespace @@ -123,40 +120,61 @@ public: double _[6]; }; -static inline void copy_base64 (ostream & out, istream & in, size_t length) +class base64_filter { - typedef base64_from_binary < transform_width < istream_iterator, 6, 8 > > base64_iter; - copy(base64_iter(istream_iterator(in)), base64_iter(istream_iterator()), ostream_iterator(out)); - switch(length % 3) - { - case 1: - out << '='; - // fall through - case 2: - out << '='; - // fall through - case 0: - default: - break; - } -} +public: -static inline void copy_base64 (ostream & out, istream && in, size_t length) -{ - typedef base64_from_binary < transform_width < istream_iterator, 6, 8 > > base64_iter; - copy(base64_iter(istream_iterator(in)), base64_iter(istream_iterator()), ostream_iterator(out)); - switch(length % 3) + base64_filter(istream & in) + : in_iter(istream_iterator(in >> noskipws)) + { } + + base64_filter(istream && in) + : in_iter(istream_iterator(in >> noskipws)) + { } + + ostream & dumpto(ostream & out) { - case 1: + unsigned char buf[3]; + istream_iterator end_iter; + int cnt = 0; + while(in_iter != end_iter) + { + buf[cnt++] = *(in_iter++); + if(cnt == 3) + { + out << base64_encoding[(buf[0] & 0xfc)>>2]; + out << base64_encoding[((buf[0] & 0x03)<<4) | ((buf[1] & 0xf0)>>4)]; + out << base64_encoding[((buf[1] & 0x0f)<<2) | ((buf[2] & 0xc0)>>6)]; + out << base64_encoding[(buf[2] & 0x3f)]; + cnt = 0; + } + } + if(cnt > 0) + { + for(int i = cnt; i < 3; ++i) + buf[i] = 0; + out << base64_encoding[(buf[0] & 0xfc)>>2]; + out << base64_encoding[((buf[0] & 0x03)<<4) | ((buf[1] & 0xf0)>>4)]; + if(cnt > 1) + { + out << base64_encoding[(buf[1] & 0x0f)<<2]; + } + else + { + out << '='; + } out << '='; - // fall through - case 2: - out << '='; - // fall through - case 0: - default: - break; + } + + return out; } -} + +private: + static constexpr const char * base64_encoding = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + istream_iterator in_iter; +}; + +static inline ostream & operator << (ostream & out, base64_filter & bf) { return bf.dumpto(out); } +static inline ostream & operator << (ostream & out, base64_filter && bf) { return bf.dumpto(out); } #endif //UTIL_H__