From 1bfe86187e649dc5af4ae80482900d9f7a995c28 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 2 Oct 2012 01:59:04 +0800 Subject: [PATCH 01/17] working on CSS draw --- CMakeLists.txt | 1 + share/base.css | 8 +++ src/HTMLRenderer/LineBuffer.cc | 2 +- src/HTMLRenderer/draw.cc | 96 ++++++++++++++++++++++++++++++++++ src/HTMLRenderer/general.cc | 12 +++-- src/HTMLRenderer/link.cc | 83 +++++++---------------------- src/HTMLRenderer/state.cc | 44 ++++++++-------- src/HTMLRenderer/text.cc | 2 +- src/include/HTMLRenderer.h | 24 ++++++--- src/include/util.h | 12 +++++ src/util.cc | 46 ++++++++++++++++ 11 files changed, 229 insertions(+), 101 deletions(-) create mode 100644 src/HTMLRenderer/draw.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index e80d0c3..ffccdd9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,6 +127,7 @@ add_executable(pdf2htmlEX src/HTMLRenderer/export.cc src/HTMLRenderer/text.cc src/HTMLRenderer/image.cc + src/HTMLRenderer/draw.cc src/HTMLRenderer/link.cc src/include/namespace.h src/HTMLRenderer/LineBuffer.cc diff --git a/share/base.css b/share/base.css index b1394de..c1dae94 100644 --- a/share/base.css +++ b/share/base.css @@ -81,4 +81,12 @@ span { } .a { } +.cr { + position:absolute; + transform-origin:0% 100%; + -ms-transform-origin:0% 100%; + -moz-transform-origin:0% 100%; + -webkit-transform-origin:0% 100%; + -o-transform-origin:0% 100%; +} /* Base CSS END */ diff --git a/src/HTMLRenderer/LineBuffer.cc b/src/HTMLRenderer/LineBuffer.cc index 6ad6eb3..32e557a 100644 --- a/src/HTMLRenderer/LineBuffer.cc +++ b/src/HTMLRenderer/LineBuffer.cc @@ -22,7 +22,7 @@ using std::ostream; void HTMLRenderer::LineBuffer::reset(GfxState * state) { state->transform(state->getCurX(), state->getCurY(), &x, &y); - tm_id = renderer->cur_tm_id; + tm_id = renderer->cur_ttm_id; } void HTMLRenderer::LineBuffer::append_unicodes(const Unicode * u, int l) diff --git a/src/HTMLRenderer/draw.cc b/src/HTMLRenderer/draw.cc new file mode 100644 index 0000000..6e1ddbe --- /dev/null +++ b/src/HTMLRenderer/draw.cc @@ -0,0 +1,96 @@ +/* + * Draw.cc + * + * Handling path drawing + * + * by WangLu + * 2012.10.01 + */ + +#include "HTMLRenderer.h" +#include "util.h" +#include "namespace.h" + +namespace pdf2htmlEX { + +using std::swap; + +static bool is_horizontal_line(GfxSubpath * path) +{ + return ((path->getNumPoints() == 2) + && (!path->getCurve(1)) + && (_equal(path->getY(0), path->getY(1)))); +} + +static bool is_rectangle(GfxSubpath * path) +{ + if (!(((path->getNumPoints() != 4) && (path->isClosed())) + || ((path->getNumPoints() == 5) + && _equal(path->getX(0), path->getX(4)) + && _equal(path->getY(0), path->getY(4))))) + return false; + + return (_equal(path->getY(0), path->getY(1)) + && _equal(path->getX(1), path->getX(2)) + && _equal(path->getY(2), path->getY(3)) + && _equal(path->getX(3), path->getX(0))) + || (_equal(path->getX(0), path->getX(1)) + && _equal(path->getY(1), path->getY(2)) + && _equal(path->getX(2), path->getX(3)) + && _equal(path->getY(3), path->getY(0))); +} + +//TODO track state +//TODO connection style +void HTMLRenderer::stroke(GfxState *state) +{ + GfxPath * path = state->getPath(); + for(int i = 0; i < path->getNumSubpaths(); ++i) + { + GfxSubpath * subpath = path->getSubpath(i); + + if(is_horizontal_line(subpath)) + { + close_text_line(); + double x1 = subpath->getX(0); + double x2 = subpath->getX(1); + double y = subpath->getY(0); + if(x1 > x2) swap(x1, x2); + + _transform(state->getCTM(), x1, y); + + html_fout << "
"; + } + else if (is_rectangle(subpath)) + { + close_text_line(); + double x1 = subpath->getX(0); + double x2 = subpath->getX(2); + double y1 = subpath->getY(0); + double y2 = subpath->getY(2); + + if(x1 > x2) swap(x1, x2); + if(y1 > y2) swap(y1, y2); + + double x,y,w,h,w1,w2; + css_fix_rectangle_border_width(x1, y1, x2, y2, state->getLineWidth(), + x,y,w,h,w1,w2); + + _transform(state->getCTM(), x, y); + + html_fout << "
getCTM()) + << "\" style=\"border:solid red;border-width:" + << _round(w1) << "px " + << _round(w2) << " px;left:" + << _round(x) << "px;bottom:" + << _round(y) << "px;width:" + << _round(w) << "px;height:" + << _round(h) << "px;\">
"; + } + } +} + +} // namespace pdf2htmlEX diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index ffd3602..fea717a 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -225,9 +225,9 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state) cur_font_size = draw_font_size = 0; cur_fs_id = install_font_size(cur_font_size); - memcpy(cur_ctm, id_matrix, sizeof(cur_ctm)); - memcpy(draw_ctm, id_matrix, sizeof(draw_ctm)); - cur_tm_id = install_transform_matrix(draw_ctm); + memcpy(cur_text_tm, id_matrix, sizeof(cur_text_tm)); + memcpy(draw_text_tm, id_matrix, sizeof(draw_text_tm)); + cur_ttm_id = install_transform_matrix(draw_text_tm); cur_letter_space = cur_word_space = 0; cur_ls_id = install_letter_space(cur_letter_space); @@ -247,7 +247,7 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state) } void HTMLRenderer::endPage() { - close_line(); + close_text_line(); // process links before the page is closed cur_doc->processLinks(this, pageNum); @@ -404,7 +404,9 @@ void HTMLRenderer::post_process() void HTMLRenderer::fix_stream (std::ostream & out) { - out << hex; + // we output all ID's in hex + // browsers are not happy with scientific notations + out << hex << fixed; } void HTMLRenderer::add_tmp_file(const string & fn) diff --git a/src/HTMLRenderer/link.cc b/src/HTMLRenderer/link.cc index 9abb917..6b71bf5 100644 --- a/src/HTMLRenderer/link.cc +++ b/src/HTMLRenderer/link.cc @@ -22,62 +22,6 @@ using std::ostringstream; using std::min; using std::max; -static void _transform(const double * ctm, double & x, double & y) -{ - double xx = x, yy = y; - x = ctm[0] * xx + ctm[2] * yy + ctm[4]; - y = ctm[1] * xx + ctm[3] * yy + ctm[5]; -} - -static void _get_transformed_rect(AnnotLink * link, const double * ctm, double & x1, double & y1, double & x2, double & y2) -{ - double _x1, _x2, _y1, _y2; - link->getRect(&_x1, &_y1, &_x2, &_y2); - - _transform(ctm, _x1, _y1); - _transform(ctm, _x2, _y2); - - x1 = min(_x1, _x2); - x2 = max(_x1, _x2); - y1 = min(_y1, _y2); - y2 = max(_y1, _y2); -} - -/* - * In PDF, edges of the rectangle are in the middle of the borders - * In HTML, edges are completely outside the rectangle - */ -static void _fix_border_width(double & x1, double & y1, double & x2, double & y2, - double border_width, double & border_top_bottom_width, double & border_left_right_width) -{ - double w = x2 - x1; - if(w > border_width) - { - x1 += border_width / 2; - x2 -= border_width / 2; - border_left_right_width = border_width; - } - else - { - x1 += w / 2; - x2 -= w / 2; - border_left_right_width = border_width + w/2; - } - double h = y2 - y1; - if(h > border_width) - { - y1 += border_width / 2; - y2 -= border_width / 2; - border_top_bottom_width = border_width; - } - else - { - y1 += h / 2; - y2 -= h / 2; - border_top_bottom_width = border_width + h/2; - } -} - /* * The detailed rectangle area of the link destination * Will be parsed and performed by Javascript @@ -164,6 +108,7 @@ static string get_dest_detail_str(int pageno, LinkDest * dest) /* * Based on pdftohtml from poppler * TODO: CSS for link rectangles + * TODO: share rectangle draw with css-draw */ void HTMLRenderer::processLink(AnnotLink * al) { @@ -239,10 +184,17 @@ void HTMLRenderer::processLink(AnnotLink * al) html_fout << ">"; } - html_fout << "
getRect(&x1, &y1, &x2, &y2); + x = min(x1, x2); + y = min(y1, y2); + w = max(x1, x2) - x; + h = max(y1, y2) - y; double border_width = 0; double border_top_bottom_width = 0; @@ -254,8 +206,10 @@ void HTMLRenderer::processLink(AnnotLink * al) if(border_width > 0) { { - _fix_border_width(x1, y1, x2, y1, - border_width, border_top_bottom_width, border_left_right_width); + css_fix_rectangle_border_width(x1, y2, x2, y2, border_width, + x, y, w, h, + border_top_bottom_width, border_left_right_width); + if(abs(border_top_bottom_width - border_left_right_width) < EPS) html_fout << "border-width:" << _round(border_top_bottom_width) << "px;"; else @@ -313,12 +267,13 @@ void HTMLRenderer::processLink(AnnotLink * al) html_fout << "border-style:none;"; } + _transform(default_ctm, x, y); html_fout << "position:absolute;" - << "left:" << _round(x1- border_left_right_width) << "px;" - << "bottom:" << _round(y1 - border_top_bottom_width) << "px;" - << "width:" << _round(x2-x1) << "px;" - << "height:" << _round(y2-y1) << "px;"; + << "left:" << _round(x) << "px;" + << "bottom:" << _round(y) << "px;" + << "width:" << _round(w) << "px;" + << "height:" << _round(h) << "px;"; // fix for IE html_fout << "background-color:rgba(255,255,255,0.000001);"; diff --git a/src/HTMLRenderer/state.cc b/src/HTMLRenderer/state.cc index 6f7cd55..521ab12 100644 --- a/src/HTMLRenderer/state.cc +++ b/src/HTMLRenderer/state.cc @@ -112,7 +112,7 @@ void HTMLRenderer::check_state_change(GfxState * state) // backup the current ctm for need_recheck_position double old_ctm[6]; - memcpy(old_ctm, cur_ctm, sizeof(old_ctm)); + memcpy(old_ctm, cur_text_tm, sizeof(old_ctm)); // ctm & text ctm & hori scale if(all_changed || ctm_changed || text_mat_changed || hori_scale_changed) @@ -131,29 +131,29 @@ void HTMLRenderer::check_state_change(GfxState * state) new_ctm[5] = m1[1] * m2[4] + m1[3] * m2[5] + m1[5]; //new_ctm[4] = new_ctm[5] = 0; - if(!_tm_equal(new_ctm, cur_ctm)) + if(!_tm_equal(new_ctm, cur_text_tm)) { need_recheck_position = true; need_rescale_font = true; - memcpy(cur_ctm, new_ctm, sizeof(cur_ctm)); + memcpy(cur_text_tm, new_ctm, sizeof(cur_text_tm)); } } - // draw_ctm, draw_scale + // draw_text_tm, draw_scale // depends: font size & ctm & text_ctm & hori scale if(need_rescale_font) { - double new_draw_ctm[6]; - memcpy(new_draw_ctm, cur_ctm, sizeof(new_draw_ctm)); + double new_draw_text_tm[6]; + memcpy(new_draw_text_tm, cur_text_tm, sizeof(new_draw_text_tm)); - double new_draw_scale = 1.0/scale_factor2 * sqrt(new_draw_ctm[2] * new_draw_ctm[2] + new_draw_ctm[3] * new_draw_ctm[3]); + double new_draw_scale = 1.0/scale_factor2 * sqrt(new_draw_text_tm[2] * new_draw_text_tm[2] + new_draw_text_tm[3] * new_draw_text_tm[3]); double new_draw_font_size = cur_font_size; if(_is_positive(new_draw_scale)) { new_draw_font_size *= new_draw_scale; for(int i = 0; i < 4; ++i) - new_draw_ctm[i] /= new_draw_scale; + new_draw_text_tm[i] /= new_draw_scale; } else { @@ -172,11 +172,11 @@ void HTMLRenderer::check_state_change(GfxState * state) draw_font_size = new_draw_font_size; cur_fs_id = install_font_size(draw_font_size); } - if(!(_tm_equal(new_draw_ctm, draw_ctm, 4))) + if(!(_tm_equal(new_draw_text_tm, draw_text_tm, 4))) { new_line_state = max(new_line_state, NLS_DIV); - memcpy(draw_ctm, new_draw_ctm, sizeof(draw_ctm)); - cur_tm_id = install_transform_matrix(draw_ctm); + memcpy(draw_text_tm, new_draw_text_tm, sizeof(draw_text_tm)); + cur_ttm_id = install_transform_matrix(draw_text_tm); } } @@ -198,23 +198,23 @@ void HTMLRenderer::check_state_change(GfxState * state) */ bool merged = false; - if(_tm_equal(old_ctm, cur_ctm, 4)) + if(_tm_equal(old_ctm, cur_text_tm, 4)) { double dy = cur_ty - draw_ty; - double tdx = old_ctm[4] - cur_ctm[4] - cur_ctm[2] * dy; - double tdy = old_ctm[5] - cur_ctm[5] - cur_ctm[3] * dy; + double tdx = old_ctm[4] - cur_text_tm[4] - cur_text_tm[2] * dy; + double tdy = old_ctm[5] - cur_text_tm[5] - cur_text_tm[3] * dy; - if(_equal(cur_ctm[0] * tdy, cur_ctm[1] * tdx)) + if(_equal(cur_text_tm[0] * tdy, cur_text_tm[1] * tdx)) { - if(abs(cur_ctm[0]) > EPS) + if(abs(cur_text_tm[0]) > EPS) { - draw_tx += tdx / cur_ctm[0]; + draw_tx += tdx / cur_text_tm[0]; draw_ty += dy; merged = true; } - else if (abs(cur_ctm[1]) > EPS) + else if (abs(cur_text_tm[1]) > EPS) { - draw_tx += tdy / cur_ctm[1]; + draw_tx += tdy / cur_text_tm[1]; draw_ty += dy; merged = true; } @@ -312,7 +312,7 @@ void HTMLRenderer::reset_state_change() color_changed = false; } -void HTMLRenderer::prepare_line(GfxState * state) +void HTMLRenderer::prepare_text_line(GfxState * state) { if(!line_opened) { @@ -321,7 +321,7 @@ void HTMLRenderer::prepare_line(GfxState * state) if(new_line_state == NLS_DIV) { - close_line(); + close_text_line(); line_buf.reset(state); @@ -353,7 +353,7 @@ void HTMLRenderer::prepare_line(GfxState * state) line_opened = true; } -void HTMLRenderer::close_line() +void HTMLRenderer::close_text_line() { if(line_opened) { diff --git a/src/HTMLRenderer/text.cc b/src/HTMLRenderer/text.cc index 47a24ec..3b9b179 100644 --- a/src/HTMLRenderer/text.cc +++ b/src/HTMLRenderer/text.cc @@ -484,7 +484,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s) // see if the line has to be closed due to state change check_state_change(state); - prepare_line(state); + prepare_text_line(state); // Now ready to output // get the unicodes diff --git a/src/include/HTMLRenderer.h b/src/include/HTMLRenderer.h index 5457296..c212b8e 100644 --- a/src/include/HTMLRenderer.h +++ b/src/include/HTMLRenderer.h @@ -42,6 +42,8 @@ * j - Js data * p - Page * + * cr - CSS draw Rectangle + * * Reusable CSS classes * * t - Transform matrix @@ -82,7 +84,7 @@ class HTMLRenderer : public OutputDev virtual GBool interpretType3Chars() { return gFalse; } // Does this device need non-text content? - virtual GBool needNonText() { return gFalse; } + virtual GBool needNonText() { return gTrue; } virtual void setDefaultCTM(double *ctm); @@ -121,6 +123,8 @@ class HTMLRenderer : public OutputDev virtual void drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg); + virtual void stroke(GfxState *state); + virtual void processLink(AnnotLink * al); protected: @@ -190,8 +194,13 @@ class HTMLRenderer : public OutputDev void reset_state_change(); // prepare the line context, (close old tags, open new tags) // make sure the current HTML style consistent with PDF - void prepare_line(GfxState * state); - void close_line(); + void prepare_text_line(GfxState * state); + void close_text_line(); + + //////////////////////////////////////////////////// + // CSS drawing + //////////////////////////////////////////////////// + void css_draw_rectange(); //////////////////////////////////////////////////// @@ -246,16 +255,15 @@ class HTMLRenderer : public OutputDev bool font_changed; // transform matrix - long long cur_tm_id; + long long cur_ttm_id; bool ctm_changed; bool text_mat_changed; // horizontal scaling bool hori_scale_changed; - // this is CTM * TextMAT in PDF, not only CTM + // this is CTM * TextMAT in PDF // [4] and [5] are ignored, // as we'll calculate the position of the origin separately - // TODO: changed this for images - double cur_ctm[6]; // unscaled + double cur_text_tm[6]; // unscaled // letter spacing long long cur_ls_id; @@ -283,7 +291,7 @@ class HTMLRenderer : public OutputDev // draw_ctm is cur_ctm scaled by 1/draw_scale, // so everything redenered should be multiplied by draw_scale - double draw_ctm[6]; + double draw_text_tm[6]; double draw_font_size; double draw_scale; diff --git a/src/include/util.h b/src/include/util.h index 1284837..bc4e05e 100644 --- a/src/include/util.h +++ b/src/include/util.h @@ -47,6 +47,8 @@ static inline bool _tm_equal(const double * tm1, const double * tm2, int size = return true; } +void _transform(const double * ctm, double & x, double & y, bool is_delta = false); + static inline long long hash_ref(const Ref * id) { return (((long long)(id->num)) << (sizeof(id->gen)*8)) | (id->gen); @@ -223,5 +225,15 @@ bool is_truetype_suffix(const std::string & suffix); std::string get_filename(const std::string & path); std::string get_suffix(const std::string & path); +/* + * In PDF, edges of the rectangle are in the middle of the borders + * In HTML, edges are completely outside the rectangle + */ +void css_fix_rectangle_border_width(double x1, double y1, double x2, double y2, + double border_width, + double & x, double & y, double & w, double & h, + double & border_top_bottom_width, + double & border_left_right_width); + } // namespace util #endif //UTIL_H__ diff --git a/src/util.cc b/src/util.cc index 2c854f3..ddaa2a2 100644 --- a/src/util.cc +++ b/src/util.cc @@ -54,6 +54,18 @@ const std::map, std::pair {{".js", 1}, {""}} }); +void _transform(const double * ctm, double & x, double & y, bool is_delta) +{ + double xx = x, yy = y; + x = ctm[0] * xx + ctm[2] * yy; + y = ctm[1] * xx + ctm[3] * yy; + if(!is_delta) + { + x += ctm[4]; + y += ctm[5]; + } +} + bool isLegalUnicode(Unicode u) { /* @@ -249,4 +261,38 @@ string get_suffix(const string & path) } } +void css_fix_rectangle_border_width(double x1, double y1, + double x2, double y2, + double border_width, + double & x, double & y, double & w, double & h, + double & border_top_bottom_width, + double & border_left_right_width) +{ + w = x2 - x1; + if(w > border_width) + { + w -= border_width; + border_left_right_width = border_width; + } + else + { + border_left_right_width = border_width + w/2; + w = 0; + } + x = x1 - border_width / 2; + + h = y2 - y1; + if(h > border_width) + { + h -= border_width; + border_top_bottom_width = border_width; + } + else + { + border_top_bottom_width = border_width + h/2; + h = 0; + } + y = y1 - border_width / 2; +} + } // namespace pdf2htmlEX From a568383a21e6275ad14870f505dfa6f3eab8ecce Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 2 Oct 2012 04:06:38 +0800 Subject: [PATCH 02/17] working on CSS draw --- share/base.css | 2 +- src/HTMLRenderer/draw.cc | 106 ++++++++++++++++++++++++++++++------- src/HTMLRenderer/export.cc | 4 +- src/HTMLRenderer/link.cc | 4 +- src/include/HTMLRenderer.h | 18 +++++-- src/include/util.h | 2 + src/util.cc | 11 ++++ 7 files changed, 119 insertions(+), 28 deletions(-) diff --git a/share/base.css b/share/base.css index c1dae94..acc5098 100644 --- a/share/base.css +++ b/share/base.css @@ -81,7 +81,7 @@ span { } .a { } -.cr { +.Cd { position:absolute; transform-origin:0% 100%; -ms-transform-origin:0% 100%; diff --git a/src/HTMLRenderer/draw.cc b/src/HTMLRenderer/draw.cc index 6e1ddbe..a8b12d6 100644 --- a/src/HTMLRenderer/draw.cc +++ b/src/HTMLRenderer/draw.cc @@ -30,6 +30,10 @@ static bool is_rectangle(GfxSubpath * path) && _equal(path->getY(0), path->getY(4))))) return false; + for(int i = 1; i < path->getNumPoints(); ++i) + if(path->getCurve(i)) + return false; + return (_equal(path->getY(0), path->getY(1)) && _equal(path->getX(1), path->getX(2)) && _equal(path->getY(2), path->getY(3)) @@ -42,7 +46,7 @@ static bool is_rectangle(GfxSubpath * path) //TODO track state //TODO connection style -void HTMLRenderer::stroke(GfxState *state) +void HTMLRenderer::css_draw(GfxState *state, bool fill) { GfxPath * path = state->getPath(); for(int i = 0; i < path->getNumSubpaths(); ++i) @@ -51,18 +55,19 @@ void HTMLRenderer::stroke(GfxState *state) if(is_horizontal_line(subpath)) { - close_text_line(); double x1 = subpath->getX(0); double x2 = subpath->getX(1); double y = subpath->getY(0); if(x1 > x2) swap(x1, x2); - _transform(state->getCTM(), x1, y); + GfxRGB stroke_color; + state->getStrokeRGB(&stroke_color); - html_fout << "
"; + double lw = state->getLineWidth(); + + css_draw_rectangle(x1, y - lw/2, x2-x1, lw, + nullptr, 0, + nullptr, &stroke_color, state); } else if (is_rectangle(subpath)) { @@ -75,22 +80,85 @@ void HTMLRenderer::stroke(GfxState *state) if(x1 > x2) swap(x1, x2); if(y1 > y2) swap(y1, y2); - double x,y,w,h,w1,w2; + double x,y,w,h,lw[2]; css_fix_rectangle_border_width(x1, y1, x2, y2, state->getLineWidth(), - x,y,w,h,w1,w2); - - _transform(state->getCTM(), x, y); + x,y,w,h,lw[0],lw[1]); - html_fout << "
getCTM()) - << "\" style=\"border:solid red;border-width:" - << _round(w1) << "px " - << _round(w2) << " px;left:" - << _round(x) << "px;bottom:" - << _round(y) << "px;width:" - << _round(w) << "px;height:" - << _round(h) << "px;\">
"; + GfxRGB stroke_color; + state->getStrokeRGB(&stroke_color); + + GfxRGB fill_color; + if(fill) state->getFillRGB(&fill_color); + + int lw_count = 2; + + GfxRGB * ps = &stroke_color; + GfxRGB * pf = fill ? (&fill_color) : nullptr; + + if(_equal(h, 0) || _equal(w, 0)) + { + // orthogonal line + + // TODO: check length + ps = nullptr; + pf = &stroke_color; + h += lw[0]; + w += lw[1]; + } + + css_draw_rectangle(x, y, w, h, + lw, lw_count, + ps, pf, state); } } } +void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, + double * line_width_array, int line_width_count, + const GfxRGB * line_color, const GfxRGB * fill_color, + GfxState * state) +{ + close_text_line(); + + html_fout << "
getCTM()) << "\" style=\""; + + if(line_color) + { + html_fout << "border-color:" << *line_color << ";"; + + html_fout << "border-width:"; + for(int i = 0; i < line_width_count; ++i) + { + if(i > 0) html_fout << ' '; + + double lw = line_width_array[i]; + html_fout << _round(lw); + if(lw > EPS) html_fout << "px"; + } + html_fout << ";"; + } + else + { + html_fout << "border:none;"; + } + + if(fill_color) + { + html_fout << "background-color:" << (*fill_color) << ";"; + } + else + { + html_fout << "background-color:transparent;"; + } + + _transform(state->getCTM(), x, y); + + html_fout << "bottom:" << _round(y) << "px;" + << "left:" << _round(x) << "px;" + << "width:" << _round(w) << "px;" + << "height:" << _round(h) << "px;" + << "\">
"; +} + + } // namespace pdf2htmlEX diff --git a/src/HTMLRenderer/export.cc b/src/HTMLRenderer/export.cc index 5fb5a5b..62a1856 100644 --- a/src/HTMLRenderer/export.cc +++ b/src/HTMLRenderer/export.cc @@ -141,9 +141,7 @@ void HTMLRenderer::export_word_space (long long ws_id, double word_space) void HTMLRenderer::export_color (long long color_id, const GfxRGB * rgb) { - css_fout << ".c" << color_id << "{color:rgb(" - << dec << (int)colToByte(rgb->r) << "," << (int)colToByte(rgb->g) << "," << (int)colToByte(rgb->b) << ");}" << hex - << endl; + css_fout << ".c" << color_id << "{color:" << (*rgb) << ";}" << endl; } void HTMLRenderer::export_whitespace (long long ws_id, double ws_width) diff --git a/src/HTMLRenderer/link.cc b/src/HTMLRenderer/link.cc index 6b71bf5..968bc02 100644 --- a/src/HTMLRenderer/link.cc +++ b/src/HTMLRenderer/link.cc @@ -184,7 +184,7 @@ void HTMLRenderer::processLink(AnnotLink * al) html_fout << ">"; } - html_fout << "
0) { { - css_fix_rectangle_border_width(x1, y2, x2, y2, border_width, + css_fix_rectangle_border_width(x1, y1, x2, y2, border_width, x, y, w, h, border_top_bottom_width, border_left_right_width); diff --git a/src/include/HTMLRenderer.h b/src/include/HTMLRenderer.h index c212b8e..b102a4f 100644 --- a/src/include/HTMLRenderer.h +++ b/src/include/HTMLRenderer.h @@ -42,7 +42,7 @@ * j - Js data * p - Page * - * cr - CSS draw Rectangle + * Cd - CSS Draw * * Reusable CSS classes * @@ -123,7 +123,8 @@ class HTMLRenderer : public OutputDev virtual void drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg); - virtual void stroke(GfxState *state); + virtual void stroke(GfxState *state) { css_draw(state, false); } + virtual void fill(GfxState *state) { css_draw(state, true); } virtual void processLink(AnnotLink * al); @@ -200,7 +201,18 @@ class HTMLRenderer : public OutputDev //////////////////////////////////////////////////// // CSS drawing //////////////////////////////////////////////////// - void css_draw_rectange(); + void css_draw(GfxState *state, bool fill); + /* + * coordinates are to transformed by state->getCTM() + * (x,y) should be the bottom-left corner INCLUDING border + * w,h should be the metrics WITHOUT border + * + * line_color & fill_color may be specified as nullptr to indicate none + */ + void css_draw_rectangle(double x, double y, double w, double h, + double * line_width_array, int line_width_count, + const GfxRGB * line_color, const GfxRGB * fill_color, + GfxState * state); //////////////////////////////////////////////////// diff --git a/src/include/util.h b/src/include/util.h index bc4e05e..746077b 100644 --- a/src/include/util.h +++ b/src/include/util.h @@ -235,5 +235,7 @@ void css_fix_rectangle_border_width(double x1, double y1, double x2, double y2, double & border_top_bottom_width, double & border_left_right_width); +std::ostream & operator << (std::ostream & out, const GfxRGB & rgb); + } // namespace util #endif //UTIL_H__ diff --git a/src/util.cc b/src/util.cc index ddaa2a2..9432cc8 100644 --- a/src/util.cc +++ b/src/util.cc @@ -295,4 +295,15 @@ void css_fix_rectangle_border_width(double x1, double y1, y = y1 - border_width / 2; } +ostream & operator << (ostream & out, const GfxRGB & rgb) +{ + auto flags= out.flags(); + out << std::dec << "rgb(" + << (int)colToByte(rgb.r) << "," + << (int)colToByte(rgb.g) << "," + << (int)colToByte(rgb.b) << ")"; + out.flags(flags); + return out; +} + } // namespace pdf2htmlEX From b5d73e685052315274ab9ee9ba4bab891d90a052 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 2 Oct 2012 04:26:13 +0800 Subject: [PATCH 03/17] .. --- src/HTMLRenderer/draw.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/HTMLRenderer/draw.cc b/src/HTMLRenderer/draw.cc index a8b12d6..5053357 100644 --- a/src/HTMLRenderer/draw.cc +++ b/src/HTMLRenderer/draw.cc @@ -156,8 +156,9 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, html_fout << "bottom:" << _round(y) << "px;" << "left:" << _round(x) << "px;" << "width:" << _round(w) << "px;" - << "height:" << _round(h) << "px;" - << "\">
"; + << "height:" << _round(h) << "px;"; + + html_fout << "\">
"; } From a4a2252741ab398f6c45b96c4ba57215fc67ec79 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 2 Oct 2012 04:31:55 +0800 Subject: [PATCH 04/17] don't stroke when fill --- src/HTMLRenderer/draw.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/HTMLRenderer/draw.cc b/src/HTMLRenderer/draw.cc index 5053357..c311931 100644 --- a/src/HTMLRenderer/draw.cc +++ b/src/HTMLRenderer/draw.cc @@ -81,18 +81,18 @@ void HTMLRenderer::css_draw(GfxState *state, bool fill) if(y1 > y2) swap(y1, y2); double x,y,w,h,lw[2]; - css_fix_rectangle_border_width(x1, y1, x2, y2, state->getLineWidth(), + css_fix_rectangle_border_width(x1, y1, x2, y2, (fill ? 0.0 : state->getLineWidth()), x,y,w,h,lw[0],lw[1]); GfxRGB stroke_color; - state->getStrokeRGB(&stroke_color); + if(!fill) state->getStrokeRGB(&stroke_color); GfxRGB fill_color; if(fill) state->getFillRGB(&fill_color); int lw_count = 2; - GfxRGB * ps = &stroke_color; + GfxRGB * ps = fill ? nullptr : (&stroke_color); GfxRGB * pf = fill ? (&fill_color) : nullptr; if(_equal(h, 0) || _equal(w, 0)) @@ -100,8 +100,8 @@ void HTMLRenderer::css_draw(GfxState *state, bool fill) // orthogonal line // TODO: check length + pf = ps; ps = nullptr; - pf = &stroke_color; h += lw[0]; w += lw[1]; } From 4090d79eac071f987895dce9136ed7ed5499aceb Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 2 Oct 2012 12:43:08 +0800 Subject: [PATCH 05/17] typo --- src/pdf2htmlEX.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index c712df0..03fbc08 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -82,7 +82,7 @@ void parse_options (int argc, char **argv) .add("tounicode", ¶m.tounicode, 0, "Specify how to deal with ToUnicode map, 0 for auto, 1 for forced, -1 for disabled") .add("space-as-offset", ¶m.space_as_offset, 0, "treat space characters as offsets") .add("stretch_narrow_glyph", ¶m.stretch_narrow_glyph, 0, "stretch narrow glyphs instead of padding space") - .add("squeeze_wide_glyph", ¶m.squeeze_wide_glyph, 0, "squeeze wide glyphs instead of truncating") + .add("squeeze-wide-glyph", ¶m.squeeze_wide_glyph, 0, "squeeze wide glyphs instead of truncating") .add("css-filename", ¶m.css_filename, "", "Specify the file name of the generated css file") .add("font-suffix", ¶m.font_suffix, ".ttf", "suffix for extracted font files") From 779c91f81e929df0f5b14ead1b2546a10e2529d5 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 2 Oct 2012 14:19:20 +0800 Subject: [PATCH 06/17] draw_scale->draw_text_scale --- src/HTMLRenderer/general.cc | 9 ++++---- src/HTMLRenderer/link.cc | 2 +- src/HTMLRenderer/state.cc | 42 ++++++++++++++++++------------------- src/HTMLRenderer/text.cc | 2 +- src/ffw.c | 12 ++++------- src/include/HTMLRenderer.h | 12 +++++------ 6 files changed, 38 insertions(+), 41 deletions(-) diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index fea717a..a79627a 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -108,8 +108,8 @@ void HTMLRenderer::process(PDFDoc *doc) zoom = *min_element(zoom_factors.begin(), zoom_factors.end()); } - scale_factor1 = max(zoom, param->font_size_multiplier); - scale_factor2 = zoom / scale_factor1; + text_scale_factor1 = max(zoom, param->font_size_multiplier); + text_scale_factor2 = zoom / text_scale_factor1; } @@ -156,7 +156,8 @@ void HTMLRenderer::process(PDFDoc *doc) } } - doc->displayPage(this, i, zoom_factor() * DEFAULT_DPI, zoom_factor() * DEFAULT_DPI, + doc->displayPage(this, i, + text_zoom_factor() * DEFAULT_DPI, text_zoom_factor() * DEFAULT_DPI, 0, true, false, false, nullptr, nullptr, nullptr, nullptr); @@ -219,7 +220,7 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state) } html_fout << "\">"; - draw_scale = 1.0; + draw_text_scale = 1.0; cur_font_info = install_font(nullptr); cur_font_size = draw_font_size = 0; diff --git a/src/HTMLRenderer/link.cc b/src/HTMLRenderer/link.cc index 968bc02..a38fc65 100644 --- a/src/HTMLRenderer/link.cc +++ b/src/HTMLRenderer/link.cc @@ -202,7 +202,7 @@ void HTMLRenderer::processLink(AnnotLink * al) auto * border = al->getBorder(); if(border) { - border_width = border->getWidth() * zoom_factor(); + border_width = border->getWidth(); if(border_width > 0) { { diff --git a/src/HTMLRenderer/state.cc b/src/HTMLRenderer/state.cc index 521ab12..054a807 100644 --- a/src/HTMLRenderer/state.cc +++ b/src/HTMLRenderer/state.cc @@ -82,7 +82,7 @@ void HTMLRenderer::check_state_change(GfxState * state) bool need_recheck_position = false; bool need_rescale_font = false; - bool draw_scale_changed = false; + bool draw_text_scale_changed = false; // text position // we've been tracking the text position positively in the update*** functions @@ -139,31 +139,31 @@ void HTMLRenderer::check_state_change(GfxState * state) } } - // draw_text_tm, draw_scale + // draw_text_tm, draw_text_scale // depends: font size & ctm & text_ctm & hori scale if(need_rescale_font) { double new_draw_text_tm[6]; memcpy(new_draw_text_tm, cur_text_tm, sizeof(new_draw_text_tm)); - double new_draw_scale = 1.0/scale_factor2 * sqrt(new_draw_text_tm[2] * new_draw_text_tm[2] + new_draw_text_tm[3] * new_draw_text_tm[3]); + double new_draw_text_scale = 1.0/text_scale_factor2 * sqrt(new_draw_text_tm[2] * new_draw_text_tm[2] + new_draw_text_tm[3] * new_draw_text_tm[3]); double new_draw_font_size = cur_font_size; - if(_is_positive(new_draw_scale)) + if(_is_positive(new_draw_text_scale)) { - new_draw_font_size *= new_draw_scale; + new_draw_font_size *= new_draw_text_scale; for(int i = 0; i < 4; ++i) - new_draw_text_tm[i] /= new_draw_scale; + new_draw_text_tm[i] /= new_draw_text_scale; } else { - new_draw_scale = 1.0; + new_draw_text_scale = 1.0; } - if(!(_equal(new_draw_scale, draw_scale))) + if(!(_equal(new_draw_text_scale, draw_text_scale))) { - draw_scale_changed = true; - draw_scale = new_draw_scale; + draw_text_scale_changed = true; + draw_text_scale = new_draw_text_scale; } if(!(_equal(new_draw_font_size, draw_font_size))) @@ -241,28 +241,28 @@ void HTMLRenderer::check_state_change(GfxState * state) } // letter space - // depends: draw_scale - if(all_changed || letter_space_changed || draw_scale_changed) + // depends: draw_text_scale + if(all_changed || letter_space_changed || draw_text_scale_changed) { double new_letter_space = state->getCharSpace(); if(!_equal(cur_letter_space, new_letter_space)) { new_line_state = max(new_line_state, NLS_SPAN); cur_letter_space = new_letter_space; - cur_ls_id = install_letter_space(cur_letter_space * draw_scale); + cur_ls_id = install_letter_space(cur_letter_space * draw_text_scale); } } // word space - // depends draw_scale - if(all_changed || word_space_changed || draw_scale_changed) + // depends draw_text_scale + if(all_changed || word_space_changed || draw_text_scale_changed) { double new_word_space = state->getWordSpace(); if(!_equal(cur_word_space, new_word_space)) { new_line_state = max(new_line_state, NLS_SPAN); cur_word_space = new_word_space; - cur_ws_id = install_word_space(cur_word_space * draw_scale); + cur_ws_id = install_word_space(cur_word_space * draw_text_scale); } } @@ -280,15 +280,15 @@ void HTMLRenderer::check_state_change(GfxState * state) } // rise - // depends draw_scale - if(all_changed || rise_changed || draw_scale_changed) + // depends draw_text_scale + if(all_changed || rise_changed || draw_text_scale_changed) { double new_rise = state->getRise(); if(!_equal(cur_rise, new_rise)) { new_line_state = max(new_line_state, NLS_SPAN); cur_rise = new_rise; - cur_rise_id = install_rise(new_rise * draw_scale); + cur_rise_id = install_rise(new_rise * draw_text_scale); } } @@ -333,7 +333,7 @@ void HTMLRenderer::prepare_text_line(GfxState * state) { // align horizontal position // try to merge with the last line if possible - double target = (cur_tx - draw_tx) * draw_scale; + double target = (cur_tx - draw_tx) * draw_text_scale; if(abs(target) < param->h_eps) { // ignore it @@ -341,7 +341,7 @@ void HTMLRenderer::prepare_text_line(GfxState * state) else { line_buf.append_offset(target); - draw_tx += target / draw_scale; + draw_tx += target / draw_text_scale; } } diff --git a/src/HTMLRenderer/text.cc b/src/HTMLRenderer/text.cc index 3b9b179..1a6e323 100644 --- a/src/HTMLRenderer/text.cc +++ b/src/HTMLRenderer/text.cc @@ -522,7 +522,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s) if(is_space && (param->space_as_offset)) { // ignore horiz_scaling, as it's merged in CTM - line_buf.append_offset((dx1 * cur_font_size + cur_letter_space + cur_word_space) * draw_scale); + line_buf.append_offset((dx1 * cur_font_size + cur_letter_space + cur_word_space) * draw_text_scale); } else { diff --git a/src/ffw.c b/src/ffw.c index dc8e913..2bdef39 100644 --- a/src/ffw.c +++ b/src/ffw.c @@ -332,15 +332,11 @@ void ffw_set_widths(int * width_list, int mapping_len, int stretch_narrow, int s SplineChar * sc = sf->glyphs[j]; if(sc == NULL) continue; - DBounds bb; - SplineCharFindBounds(sc, &bb); - - double glyph_width = bb.maxx - bb.minx; - if((glyph_width > EPS) - && (((glyph_width > width_list[i] + EPS) && (squeeze_wide)) - || ((glyph_width < width_list[i] - EPS) && (stretch_narrow)))) + if(((sc->width > EPS) + && (((sc->width > width_list[i] + EPS) && (squeeze_wide)) + || ((sc->width < width_list[i] - EPS) && (stretch_narrow))))) { - real transform[6]; transform[0] = ((double)width_list[i]) / glyph_width; + real transform[6]; transform[0] = ((double)width_list[i]) / (sc->width); transform[3] = 1.0; transform[1] = transform[2] = transform[4] = transform[5] = 0; FVTrans(cur_fv, sc, transform, NULL, fvt_alllayers | fvt_dontmovewidth); diff --git a/src/include/HTMLRenderer.h b/src/include/HTMLRenderer.h index b102a4f..9ebc872 100644 --- a/src/include/HTMLRenderer.h +++ b/src/include/HTMLRenderer.h @@ -237,9 +237,9 @@ class HTMLRenderer : public OutputDev * factor1 & factor 2 are determined according to zoom and font-size-multiplier * */ - double zoom_factor (void) const { return scale_factor1 * scale_factor2; } - double scale_factor1; - double scale_factor2; + double text_zoom_factor (void) const { return text_scale_factor1 * text_scale_factor2; } + double text_scale_factor1; + double text_scale_factor2; //////////////////////////////////////////////////// @@ -301,11 +301,11 @@ class HTMLRenderer : public OutputDev // we try to render the final font size directly // to reduce the effect of ctm as much as possible - // draw_ctm is cur_ctm scaled by 1/draw_scale, - // so everything redenered should be multiplied by draw_scale + // draw_ctm is cur_ctm scaled by 1/draw_text_scale, + // so everything redenered should be multiplied by draw_text_scale double draw_text_tm[6]; double draw_font_size; - double draw_scale; + double draw_text_scale; // the position of next char, in text coords // this is actual position (in HTML), which might be different from cur_tx/ty (in PDF) From 2b06319207622cfe3d0dcc6c3e2c67f3909431e7 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 2 Oct 2012 16:06:08 +0800 Subject: [PATCH 07/17] TODO --- TODO | 5 +++-- src/HTMLRenderer/draw.cc | 35 ++++++++++++++++++++++++++++------- src/HTMLRenderer/general.cc | 6 +++--- src/HTMLRenderer/state.cc | 6 +++--- src/include/Param.h | 3 +++ src/pdf2htmlEX.cc | 1 + 6 files changed, 41 insertions(+), 15 deletions(-) diff --git a/TODO b/TODO index faf9ff4..4246d7b 100644 --- a/TODO +++ b/TODO @@ -1,3 +1,6 @@ +Integrate splash/cairo +native support for image +native support for draw about glyph width: - IE @@ -29,5 +32,3 @@ combine lines (unwarp) Printing multiple charcode mapped to a same glyph check if we can add information to the font, and let browsers show ligatures automatically -native support for image -native support for draw diff --git a/src/HTMLRenderer/draw.cc b/src/HTMLRenderer/draw.cc index c311931..9e3d611 100644 --- a/src/HTMLRenderer/draw.cc +++ b/src/HTMLRenderer/draw.cc @@ -48,6 +48,8 @@ static bool is_rectangle(GfxSubpath * path) //TODO connection style void HTMLRenderer::css_draw(GfxState *state, bool fill) { + if(!(param->css_draw)) return; + GfxPath * path = state->getPath(); for(int i = 0; i < path->getNumSubpaths(); ++i) { @@ -120,7 +122,28 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, { close_text_line(); - html_fout << "
getCTM()) << "\" style=\""; + double ctm[6]; + memcpy(ctm, state->getCTM(), sizeof(ctm)); + + _transform(ctm, x, y); + + double scale = 1.0; + { + double i1 = ctm[0] + ctm[2]; + double i2 = ctm[1] + ctm[3]; + scale = sqrt((i1 * i1 + i2 * i2) / 2.0); + if(_is_positive(scale)) + { + for(int i = 0; i < 4; ++i) + ctm[i] /= scale; + } + else + { + scale = 1.0; + } + } + + html_fout << "
0) html_fout << ' '; - double lw = line_width_array[i]; + double lw = line_width_array[i] * scale; html_fout << _round(lw); - if(lw > EPS) html_fout << "px"; + if(_is_positive(lw)) html_fout << "px"; } html_fout << ";"; } @@ -151,12 +174,10 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, html_fout << "background-color:transparent;"; } - _transform(state->getCTM(), x, y); - html_fout << "bottom:" << _round(y) << "px;" << "left:" << _round(x) << "px;" - << "width:" << _round(w) << "px;" - << "height:" << _round(h) << "px;"; + << "width:" << _round(w * scale) << "px;" + << "height:" << _round(h * scale) << "px;"; html_fout << "\">
"; } diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index a79627a..b48227c 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -84,17 +84,17 @@ void HTMLRenderer::process(PDFDoc *doc) vector zoom_factors; - if(abs(param->zoom) > EPS) + if(_is_positive(param->zoom)) { zoom_factors.push_back(param->zoom); } - if(abs(param->fit_width) > EPS) + if(_is_positive(param->fit_width)) { zoom_factors.push_back((param->fit_width) / preprocessor.get_max_width()); } - if(abs(param->fit_height) > EPS) + if(_is_positive(param->fit_height)) { zoom_factors.push_back((param->fit_height) / preprocessor.get_max_height()); } diff --git a/src/HTMLRenderer/state.cc b/src/HTMLRenderer/state.cc index 054a807..862d7d8 100644 --- a/src/HTMLRenderer/state.cc +++ b/src/HTMLRenderer/state.cc @@ -206,13 +206,13 @@ void HTMLRenderer::check_state_change(GfxState * state) if(_equal(cur_text_tm[0] * tdy, cur_text_tm[1] * tdx)) { - if(abs(cur_text_tm[0]) > EPS) + if(_is_positive(cur_text_tm[0])) { draw_tx += tdx / cur_text_tm[0]; draw_ty += dy; merged = true; } - else if (abs(cur_text_tm[1]) > EPS) + else if (_is_positive(cur_text_tm[1])) { draw_tx += tdy / cur_text_tm[1]; draw_ty += dy; @@ -220,7 +220,7 @@ void HTMLRenderer::check_state_change(GfxState * state) } else { - if((abs(tdx) < EPS) && (abs(tdy) < EPS)) + if((_equal(tdx,0)) && (_equal(tdy,0))) { // free draw_tx = cur_tx; diff --git a/src/include/Param.h b/src/include/Param.h index db90188..6136a55 100644 --- a/src/include/Param.h +++ b/src/include/Param.h @@ -55,6 +55,9 @@ struct Param int debug; int clean_tmp; + + // experimental + int css_draw; }; } // namespace pdf2htmlEX diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 03fbc08..d95c70e 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -91,6 +91,7 @@ void parse_options (int argc, char **argv) .add("debug", ¶m.debug, 0, "output debug information") .add("clean-tmp", ¶m.clean_tmp, 1, "clean temporary files after processing") + .add("css-draw", ¶m.css_draw, 0, "[Experimental and Unsupported] CSS Drawing") .add("", ¶m.input_filename, "", "") .add("", ¶m.output_filename, "", "") ; From a9790d68b3b17113d9aa2eae47104c16ceb4037f Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 2 Oct 2012 20:41:39 +0800 Subject: [PATCH 08/17] integrating cairo outputdev --- CMakeLists.txt | 15 +++++++- ...enderer.cc => SplashBackgroundRenderer.cc} | 0 src/include/CairoBackgroundRenderer.h | 37 ++++++++++++++++++ src/include/SplashBackgroundRenderer.h | 38 +++++++++++++++++++ 4 files changed, 89 insertions(+), 1 deletion(-) rename src/{BackgroundRenderer.cc => SplashBackgroundRenderer.cc} (100%) create mode 100644 src/include/CairoBackgroundRenderer.h create mode 100644 src/include/SplashBackgroundRenderer.h diff --git a/CMakeLists.txt b/CMakeLists.txt index ffccdd9..324591d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,6 +20,16 @@ include_directories(${POPPLER_INCLUDE_DIRS}) link_directories(${POPPLER_LIBRARY_DIRS}) set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${POPPLER_LIBRARIES}) +find_path(CAIRO_OUTPUTDEV_INCLUDE_PATH CairoOutputDev.h PATHS + ${POPPLER_INCLUDE_DIRS} NO_DEFAULT_PATH) +if(CAIRO_OUTPUTDEV_INCLUDE_PATH) + message("Found CairoOutputDev.h: ${POPPLER_INCLUDE_DIRS}/CairoOutputDev.h") + set(HAVE_CAIRO 1) +else() + set(HAVE_CAIRO 0) +endif() + + # fontforge starts using pkg-config 'correctly' since 2.0.0 pkg_check_modules(FONTFORGE libfontforge>=2.0.0) if(FONTFORGE_FOUND) @@ -134,7 +144,10 @@ add_executable(pdf2htmlEX src/include/ffw.h src/ffw.c src/include/BackgroundRenderer.h - src/BackgroundRenderer.cc + src/include/SplashBackgroundRenderer.h + src/SplashBackgroundRenderer.cc + src/include/CairoBackgroundRenderer.h + src/CairoBackgroundRenderer.cc src/include/Preprocessor.h src/Preprocessor.cc src/include/util.h diff --git a/src/BackgroundRenderer.cc b/src/SplashBackgroundRenderer.cc similarity index 100% rename from src/BackgroundRenderer.cc rename to src/SplashBackgroundRenderer.cc diff --git a/src/include/CairoBackgroundRenderer.h b/src/include/CairoBackgroundRenderer.h new file mode 100644 index 0000000..dfa47c1 --- /dev/null +++ b/src/include/CairoBackgroundRenderer.h @@ -0,0 +1,37 @@ +/* + * Splash Background renderer + * Render all those things not supported as Image, with Splash + * + * Copyright (C) 2012 Lu Wang + */ + + +#ifndef SPLASH_BACKGROUND_RENDERER_H__ +#define SPLASH_BACKGROUND_RENDERER_H__ + +#include +#include + +namespace pdf2htmlEX { + +// Based on BackgroundRenderer from poppler +class SplashBackgroundRenderer : public SplashOutputDev +{ +public: + static const SplashColor white; + + SplashBackgroundRenderer() + :SplashOutputDev(splashModeRGB8, 4, gFalse, (SplashColorPtr)&white, gTrue, gTrue) + { } + + virtual ~SplashBackgroundRenderer() { } + + virtual void drawChar(GfxState *state, double x, double y, + double dx, double dy, + double originX, double originY, + CharCode code, int nBytes, Unicode *u, int uLen); +}; + +} + +#endif //SPLASH_BACKGROUND_RENDERER_H__ diff --git a/src/include/SplashBackgroundRenderer.h b/src/include/SplashBackgroundRenderer.h new file mode 100644 index 0000000..119046f --- /dev/null +++ b/src/include/SplashBackgroundRenderer.h @@ -0,0 +1,38 @@ +/* + * Splash Background renderer + * Render all those things not supported as Image, with Splash + * + * by WangLu + * 2012.08.06 + */ + + +#ifndef BACKGROUND_RENDERER_H__ +#define BACKGROUND_RENDERER_H__ + +#include +#include + +namespace pdf2htmlEX { + +// Based on BackgroundRenderer from poppler +class SplashBackgroundRenderer : public SplashOutputDev +{ +public: + SplashBackgroundRenderer() + { + SplashColor color; + color[0] = color[1] = color[2] = 255; + SplashOutputDev(splashModeRGB8, 4, gFlase, color, gTrue, gTrue)` + } + virtual ~BackgroundRenderer() { } + + virtual void drawChar(GfxState *state, double x, double y, + double dx, double dy, + double originX, double originY, + CharCode code, int nBytes, Unicode *u, int uLen); +}; + +} + +#endif //BACKGROUND_RENDERER_H__ From 8c5851863c1334c75590b1a3796b323018feea6a Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 2 Oct 2012 20:56:40 +0800 Subject: [PATCH 09/17] working on integrating splash/cairo --- src/CairoBackgroundRenderer.cc | 26 ++ src/HTMLRenderer/general.cc | 11 +- src/HTMLRenderer/text.cc.orig | 567 +++++++++++++++++++++++++ src/SplashBackgroundRenderer.cc | 16 +- src/include/BackgroundRenderer.h | 40 +- src/include/CairoBackgroundRenderer.h | 6 +- src/include/SplashBackgroundRenderer.h | 20 +- src/include/pdf2htmlEX-config.h | 24 ++ src/include/pdf2htmlEX-config.h.in | 2 + 9 files changed, 658 insertions(+), 54 deletions(-) create mode 100644 src/CairoBackgroundRenderer.cc create mode 100644 src/HTMLRenderer/text.cc.orig create mode 100644 src/include/pdf2htmlEX-config.h diff --git a/src/CairoBackgroundRenderer.cc b/src/CairoBackgroundRenderer.cc new file mode 100644 index 0000000..9005ba9 --- /dev/null +++ b/src/CairoBackgroundRenderer.cc @@ -0,0 +1,26 @@ +/* + * CairoBackgroundRenderer.cc + * + * Copyright (C) 2012 Lu Wang + */ + +#include "pdf2htmlEX-config.h" + +#if HAVE_CAIRO + +#include "CairoBackgroundRenderer.h" + +namespace pdf2htmlEX { + +void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y, + double dx, double dy, + double originX, double originY, + CharCode code, int nBytes, Unicode *u, int uLen) +{ + // CairoOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen); +} + +} // namespace pdf2htmlEX + +#endif // HAVE_CAIRO + diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index b48227c..427af7b 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -3,7 +3,7 @@ * * Handling general stuffs * - * by WangLu + * Copyright (C) 2012 Lu Wang * 2012.08.14 */ @@ -13,8 +13,6 @@ #include #include -#include - #include "HTMLRenderer.h" #include "BackgroundRenderer.h" #include "namespace.h" @@ -117,12 +115,7 @@ void HTMLRenderer::process(PDFDoc *doc) BackgroundRenderer * bg_renderer = nullptr; if(param->process_nontext) { - // Render non-text objects as image - // copied from poppler - SplashColor color; - color[0] = color[1] = color[2] = 255; - - bg_renderer = new BackgroundRenderer(splashModeRGB8, 4, gFalse, color); + bg_renderer = new BackgroundRenderer(); bg_renderer->startDoc(doc); } diff --git a/src/HTMLRenderer/text.cc.orig b/src/HTMLRenderer/text.cc.orig new file mode 100644 index 0000000..1a4b44f --- /dev/null +++ b/src/HTMLRenderer/text.cc.orig @@ -0,0 +1,567 @@ +/* + * text.cc + * + * Handling text & font, and relative stuffs + * + * by WangLu + * 2012.08.14 + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include "ffw.h" +#include "HTMLRenderer.h" +#include "namespace.h" + +namespace pdf2htmlEX { + +using std::unordered_set; +using std::min; +using std::all_of; +using std::floor; +using std::swap; + +string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id) +{ + Object obj, obj1, obj2; + Object font_obj, font_obj2, fontdesc_obj; + string suffix; + string filepath; + + try + { + // mupdf consulted + string subtype; + + auto * id = font->getID(); + + Object ref_obj; + ref_obj.initRef(id->num, id->gen); + ref_obj.fetch(xref, &font_obj); + ref_obj.free(); + + if(!font_obj.isDict()) + { + cerr << "Font object is not a dictionary" << endl; + throw 0; + } + + Dict * dict = font_obj.getDict(); + if(dict->lookup("DescendantFonts", &font_obj2)->isArray()) + { + if(font_obj2.arrayGetLength() == 0) + { + cerr << "Warning: empty DescendantFonts array" << endl; + } + else + { + if(font_obj2.arrayGetLength() > 1) + cerr << "TODO: multiple entries in DescendantFonts array" << endl; + + if(font_obj2.arrayGet(0, &obj2)->isDict()) + { + dict = obj2.getDict(); + } + } + } + + if(!dict->lookup("FontDescriptor", &fontdesc_obj)->isDict()) + { + cerr << "Cannot find FontDescriptor " << endl; + throw 0; + } + + dict = fontdesc_obj.getDict(); + + if(dict->lookup("FontFile3", &obj)->isStream()) + { + if(obj.streamGetDict()->lookup("Subtype", &obj1)->isName()) + { + subtype = obj1.getName(); + if(subtype == "Type1C") + { + suffix = ".cff"; + } + else if (subtype == "CIDFontType0C") + { + suffix = ".cid"; + } + else + { + cerr << "Unknown subtype: " << subtype << endl; + throw 0; + } + } + else + { + cerr << "Invalid subtype in font descriptor" << endl; + throw 0; + } + } + else if (dict->lookup("FontFile2", &obj)->isStream()) + { + suffix = ".ttf"; + } + else if (dict->lookup("FontFile", &obj)->isStream()) + { + suffix = ".pfa"; + } + else + { + cerr << "Cannot find FontFile for dump" << endl; + throw 0; + } + + if(suffix == "") + { + cerr << "Font type unrecognized" << endl; + throw 0; + } + + obj.streamReset(); + + filepath = (char*)str_fmt("%s/f%llx%s", param->tmp_dir.c_str(), fn_id, suffix.c_str()); + add_tmp_file(filepath); + + ofstream outf(filepath, ofstream::binary); + if(!outf) + throw string("Cannot open file ") + filepath + " for writing"; + + char buf[1024]; + int len; + while((len = obj.streamGetChars(1024, (Guchar*)buf)) > 0) + { + outf.write(buf, len); + } + outf.close(); + obj.streamClose(); + } + catch(int) + { + cerr << "Someting wrong when trying to dump font " << hex << fn_id << dec << endl; + } + + obj2.free(); + obj1.free(); + obj.free(); + + fontdesc_obj.free(); + font_obj2.free(); + font_obj.free(); + + return filepath; +} + +void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only) +{ + if(param->debug) + { + cerr << "Embed font: " << filepath << " " << info.id << endl; + } + + ffw_load_font(filepath.c_str()); + int * code2GID = nullptr; + int code2GID_len = 0; + int maxcode = 0; + + Gfx8BitFont * font_8bit = nullptr; + GfxCIDFont * font_cid = nullptr; + + string suffix = get_suffix(filepath); + for(auto iter = suffix.begin(); iter != suffix.end(); ++iter) + *iter = tolower(*iter); + + /* + * if parm->tounicode is 0, try the provided tounicode map first + */ + info.use_tounicode = (is_truetype_suffix(suffix) || (param->tounicode >= 0)); + info.has_space = false; + + const char * used_map = nullptr; + + info.em_size = ffw_get_em_size(); + + if(get_metric_only) + return; + + used_map = preprocessor.get_code_map(hash_ref(font->getID())); + + /* + * Step 1 + * dump the font file directly from the font descriptor and put the glyphs into the correct slots + * + * for 8bit + nonTrueType + * re-encoding the font using a PostScript encoding list (glyph id <-> glpyh name) + * + * for 8bit + TrueType + * sort the glpyhs as the original order, and later will map GID (instead of char code) to Unicode + * + * for CID + nonTrueType + * Flatten the font + * + * for CID Truetype + * same as 8bitTrueType, except for that we have to check 65536 charcodes + */ + if(!font->isCIDFont()) + { + font_8bit = dynamic_cast(font); + maxcode = 0xff; + if(is_truetype_suffix(suffix)) + { + ffw_reencode_glyph_order(); + FoFiTrueType *fftt = nullptr; + if((fftt = FoFiTrueType::load((char*)filepath.c_str())) != nullptr) + { + code2GID = font_8bit->getCodeToGIDMap(fftt); + code2GID_len = 256; + delete fftt; + } + } + else + { + // move the slot such that it's consistent with the encoding seen in PDF + unordered_set nameset; + bool name_conflict_warned = false; + + memset(cur_mapping2, 0, 0x100 * sizeof(char*)); + + for(int i = 0; i < 256; ++i) + { + if(!used_map[i]) continue; + + auto cn = font_8bit->getCharName(i); + if(cn == nullptr) + { + continue; + } + else + { + if(nameset.insert(string(cn)).second) + { + cur_mapping2[i] = cn; + } + else + { + if(!name_conflict_warned) + { + name_conflict_warned = true; + //TODO: may be resolved using advanced font properties? + cerr << "Warning: encoding confliction detected in font: " << hex << info.id << dec << endl; + } + } + } + } + + ffw_reencode_raw2(cur_mapping2, 256, 0); + } + } + else + { + font_cid = dynamic_cast(font); + maxcode = 0xffff; + + if(is_truetype_suffix(suffix)) + { + ffw_reencode_glyph_order(); + + GfxCIDFont * _font = dynamic_cast(font); + + // code2GID has been stored for embedded CID fonts + code2GID = _font->getCIDToGID(); + code2GID_len = _font->getCIDToGIDLen(); + } + else + { + ffw_cidflatten(); + } + } + + /* + * Step 2 + * map charcode (or GID for CID truetype) + * generate an Consortium encoding file and let fontforge handle it. + * + * - Always map to Unicode for 8bit TrueType fonts and CID fonts + * + * - For 8bit nonTruetype fonts: + * Try to calculate the correct Unicode value from the glyph names, unless param->always_apply_tounicode is set + * + * + * Also fill in the width_list, and set widths accordingly + */ + + + { + unordered_set codeset; + bool name_conflict_warned = false; + + auto ctu = font->getToUnicode(); + memset(cur_mapping, -1, 0x10000 * sizeof(*cur_mapping)); + memset(width_list, -1, 0x10000 * sizeof(*width_list)); + + if(code2GID) + maxcode = min(maxcode, code2GID_len - 1); + + bool is_truetype = is_truetype_suffix(suffix); + int max_key = maxcode; + /* + * Traverse all possible codes + */ + bool retried = false; // avoid infinite loop + for(int i = 0; i <= maxcode; ++i) + { + if(!used_map[i]) + continue; + + /* + * Skip glyphs without names (only for non-ttf fonts) + */ + if(!is_truetype && (font_8bit != nullptr) + && (font_8bit->getCharName(i) == nullptr)) + { + continue; + } + + int k = i; + if(code2GID) + { + if((k = code2GID[i]) == 0) continue; + } + + if(k > max_key) + max_key = k; + + Unicode u, *pu=&u; + if(info.use_tounicode) + { + int n = ctu ? (ctu->mapToUnicode(i, &pu)) : 0; + u = check_unicode(pu, n, i, font); + } + else + { + u = unicode_from_font(i, font); + } + + if(u == ' ') + info.has_space = true; + + if(codeset.insert(u).second) + { + cur_mapping[k] = u; + } + else + { + // collision detected + if(param->tounicode == 0) + { + // in auto mode, just drop the tounicode map + if(!retried) + { + cerr << "ToUnicode CMap is not valid and got dropped" << endl; + retried = true; + codeset.clear(); + info.use_tounicode = false; + memset(cur_mapping, -1, 0x10000 * sizeof(*cur_mapping)); + memset(width_list, -1, 0x10000 * sizeof(*width_list)); + i = -1; + continue; + } + } + if(!name_conflict_warned) + { + name_conflict_warned = true; + //TODO: may be resolved using advanced font properties? + cerr << "Warning: encoding confliction detected in font: " << hex << info.id << dec << endl; + } + } + + if(font_8bit) + { + width_list[k] = (int)floor(font_8bit->getWidth(i) * info.em_size + 0.5); + } + else + { + char buf[2]; + buf[0] = (i >> 8) & 0xff; + buf[1] = (i & 0xff); + width_list[k] = (int)floor(font_cid->getWidth(buf, 2) * info.em_size + 0.5); + } + } + + ffw_reencode_raw(cur_mapping, max_key + 1, 1); + ffw_set_widths(width_list, max_key + 1); + + if(ctu) + ctu->decRefCnt(); + } + + /* + * Step 3 + * + * Generate the font as desired + * + */ + string cur_tmp_fn = (char*)str_fmt("%s/__tmp_font1%s", param->tmp_dir.c_str(), param->font_suffix.c_str()); + add_tmp_file(cur_tmp_fn); + string other_tmp_fn = (char*)str_fmt("%s/__tmp_font2%s", param->tmp_dir.c_str(), param->font_suffix.c_str()); + add_tmp_file(other_tmp_fn); + + ffw_save(cur_tmp_fn.c_str()); + ffw_close(); + + /* + * Step 4 + * Font Hinting + */ + bool hinted = false; + + // Call external hinting program if specified + if(param->external_hint_tool != "") + { + hinted = (system((char*)str_fmt("%s \"%s\" \"%s\"", param->external_hint_tool.c_str(), cur_tmp_fn.c_str(), other_tmp_fn.c_str())) == 0); + } + + // Call internal hinting procedure if specified + if((!hinted) && (param->auto_hint)) + { + ffw_load_font(cur_tmp_fn.c_str()); + ffw_auto_hint(); + ffw_save(other_tmp_fn.c_str()); + ffw_close(); + hinted = true; + } + + if(hinted) + { + swap(cur_tmp_fn, other_tmp_fn); + } + + /* + * Step 5 + * Generate the font + * Reload to retrieve/fix accurate ascent/descent + */ + string fn = (char*)str_fmt("%s/f%llx%s", + (param->single_html ? param->tmp_dir : param->dest_dir).c_str(), + info.id, param->font_suffix.c_str()); + + if(param->single_html) + add_tmp_file(fn); + + ffw_load_font(cur_tmp_fn.c_str()); + ffw_metric(&info.ascent, &info.descent); + ffw_save(fn.c_str()); + ffw_close(); +} + +void HTMLRenderer::drawString(GfxState * state, GooString * s) +{ + if(s->getLength() == 0) + return; + + auto font = state->getFont(); + if((font == nullptr) || (font->getWMode())) + { + return; + } + + //hidden + if((state->getRender() & 3) == 3) + { + return; + } + + // see if the line has to be closed due to state change + check_state_change(state); + prepare_line(state); + + // Now ready to output + // get the unicodes + char *p = s->getCString(); + int len = s->getLength(); + + double dx = 0; + double dy = 0; + double dxerr = 0; + double dx1,dy1; + double ox, oy; + + int nChars = 0; + int nSpaces = 0; + int uLen; + + CharCode code; + Unicode *u = nullptr; + + while (len > 0) { + auto n = font->getNextChar(p, len, &code, &u, &uLen, &dx1, &dy1, &ox, &oy); + + if(!(_equal(ox, 0) && _equal(oy, 0))) + { + cerr << "TODO: non-zero origins" << endl; + } + + bool is_space = false; + if (n == 1 && *p == ' ') + { + ++nSpaces; + is_space = true; + } + + if(is_space && (param->space_as_offset)) + { + // ignore horiz_scaling, as it's merged in CTM + line_buf.append_offset((dx1 * cur_font_size + cur_letter_space + cur_word_space) * draw_scale); + } + else + { + if((param->decompose_ligature) && (uLen > 1) && all_of(u, u+uLen, isLegalUnicode)) + { + line_buf.append_unicodes(u, uLen); + } + else + { + if(cur_font_info->use_tounicode) + { + Unicode uu = check_unicode(u, uLen, code, font); + line_buf.append_unicodes(&uu, 1); + } + else + { + Unicode uu = unicode_from_font(code, font); + line_buf.append_unicodes(&uu, 1); + } + } + } + + dx += dx1; + dy += dy1; + + ++nChars; + p += n; + len -= n; + } + + double hs = state->getHorizScaling(); + + // horiz_scaling is merged into ctm now, + // so the coordinate system is ugly + dx = (dx * cur_font_size + nChars * cur_letter_space + nSpaces * cur_word_space) * hs; + + dy *= cur_font_size; + + cur_tx += dx; + cur_ty += dy; + + draw_tx += dx + dxerr * cur_font_size * hs; + draw_ty += dy; +} + +} // namespace pdf2htmlEX diff --git a/src/SplashBackgroundRenderer.cc b/src/SplashBackgroundRenderer.cc index 6956408..4492160 100644 --- a/src/SplashBackgroundRenderer.cc +++ b/src/SplashBackgroundRenderer.cc @@ -1,19 +1,16 @@ /* - * BackgroundRenderer.cc + * SplashBackgroundRenderer.cc * - * Copyright (C) 2012 by Lu Wang coolwanglugmail.com + * Copyright (C) 2012 Lu Wang */ -#include +#include "SplashBackgroundRenderer.h" -#include "GfxFont.h" +namespace pdf2htmlEX { -#include "BackgroundRenderer.h" -#include "util.h" +const SplashColor SplashBackgroundRenderer::white = {255,255,255}; -using namespace pdf2htmlEX; - -void BackgroundRenderer::drawChar(GfxState *state, double x, double y, +void SplashBackgroundRenderer::drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, int nBytes, Unicode *u, int uLen) @@ -21,3 +18,4 @@ void BackgroundRenderer::drawChar(GfxState *state, double x, double y, // SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen); } +} // namespace pdf2htmlEX diff --git a/src/include/BackgroundRenderer.h b/src/include/BackgroundRenderer.h index 39cbd07..b3cd623 100644 --- a/src/include/BackgroundRenderer.h +++ b/src/include/BackgroundRenderer.h @@ -2,37 +2,31 @@ * Background renderer * Render all those things not supported as Image * - * by WangLu - * 2012.08.06 + * Copyright (C) 2012 Lu Wang */ #ifndef BACKGROUND_RENDERER_H__ #define BACKGROUND_RENDERER_H__ -#include +#include "pdf2htmlEX-config.h" + +#if HAVE_CAIRO + +#include "CairoBackgroundRenderer.h" namespace pdf2htmlEX { - -// Based on BackgroundRenderer from poppler -class BackgroundRenderer : public SplashOutputDev { -public: - BackgroundRenderer(SplashColorMode colorModeA, int bitmapRowPadA, - GBool reverseVideoA, SplashColorPtr paperColorA, - GBool bitmapTopDownA = gTrue, - GBool allowAntialiasA = gTrue) - : SplashOutputDev(colorModeA, - bitmapRowPadA, reverseVideoA, paperColorA, bitmapTopDownA, - allowAntialiasA) - { } - virtual ~BackgroundRenderer() { } - - virtual void drawChar(GfxState *state, double x, double y, - double dx, double dy, - double originX, double originY, - CharCode code, int nBytes, Unicode *u, int uLen); -}; - + typedef CairoBackgroundRenderer BackgroundRenderer; } +#else + +#include "SplashBackgroundRenderer.h" + +namespace pdf2htmlEX { + typedef SplashBackgroundRenderer BackgroundRenderer; +} + +#endif // HAVE_CAIRO + #endif //BACKGROUND_RENDERER_H__ diff --git a/src/include/CairoBackgroundRenderer.h b/src/include/CairoBackgroundRenderer.h index dfa47c1..5a2d2ab 100644 --- a/src/include/CairoBackgroundRenderer.h +++ b/src/include/CairoBackgroundRenderer.h @@ -6,8 +6,8 @@ */ -#ifndef SPLASH_BACKGROUND_RENDERER_H__ -#define SPLASH_BACKGROUND_RENDERER_H__ +#ifndef CAIRO_BACKGROUND_RENDERER_H__ +#define CAIRO_BACKGROUND_RENDERER_H__ #include #include @@ -34,4 +34,4 @@ public: } -#endif //SPLASH_BACKGROUND_RENDERER_H__ +#endif //CAIRO_BACKGROUND_RENDERER_H__ diff --git a/src/include/SplashBackgroundRenderer.h b/src/include/SplashBackgroundRenderer.h index 119046f..aced0fb 100644 --- a/src/include/SplashBackgroundRenderer.h +++ b/src/include/SplashBackgroundRenderer.h @@ -7,8 +7,8 @@ */ -#ifndef BACKGROUND_RENDERER_H__ -#define BACKGROUND_RENDERER_H__ +#ifndef SPLASH_BACKGROUND_RENDERER_H__ +#define SPLASH_BACKGROUND_RENDERER_H__ #include #include @@ -19,13 +19,13 @@ namespace pdf2htmlEX { class SplashBackgroundRenderer : public SplashOutputDev { public: + static const SplashColor white; + SplashBackgroundRenderer() - { - SplashColor color; - color[0] = color[1] = color[2] = 255; - SplashOutputDev(splashModeRGB8, 4, gFlase, color, gTrue, gTrue)` - } - virtual ~BackgroundRenderer() { } + : SplashOutputDev(splashModeRGB8, 4, gFalse, (SplashColorPtr)&white, gTrue, gTrue) + { } + + virtual ~SplashBackgroundRenderer() { } virtual void drawChar(GfxState *state, double x, double y, double dx, double dy, @@ -33,6 +33,6 @@ public: CharCode code, int nBytes, Unicode *u, int uLen); }; -} +} // namespace pdf2htmlEX -#endif //BACKGROUND_RENDERER_H__ +#endif // SPLASH_BACKGROUND_RENDERER_H__ diff --git a/src/include/pdf2htmlEX-config.h b/src/include/pdf2htmlEX-config.h new file mode 100644 index 0000000..ffb69e8 --- /dev/null +++ b/src/include/pdf2htmlEX-config.h @@ -0,0 +1,24 @@ +/* + * config.h + * Compile time constants + * + * by WangLu + */ + + +#ifndef PDF2HTMLEX_CONFIG_H__ +#define PDF2HTMLEX_CONFIG_H__ + +#include + +#define HAVE_CAIRO 0 + +namespace pdf2htmlEX { + +static const std::string PDF2HTMLEX_VERSION = "0.5"; +static const std::string PDF2HTMLEX_PREFIX = "/usr/local"; +static const std::string PDF2HTMLEX_DATA_PATH = "/usr/local""/share/pdf2htmlEX"; + +} // namespace pdf2htmlEX + +#endif //PDF2HTMLEX_CONFIG_H__ diff --git a/src/include/pdf2htmlEX-config.h.in b/src/include/pdf2htmlEX-config.h.in index 7a7ef62..695a8dd 100644 --- a/src/include/pdf2htmlEX-config.h.in +++ b/src/include/pdf2htmlEX-config.h.in @@ -11,6 +11,8 @@ #include +#define HAVE_CAIRO @HAVE_CAIRO@ + namespace pdf2htmlEX { static const std::string PDF2HTMLEX_VERSION = "@PDF2HTMLEX_VERSION@"; From 6ae97943df3fb138607b7bebdef6b22edf4f74d8 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 2 Oct 2012 21:11:00 +0800 Subject: [PATCH 10/17] .. --- src/HTMLRenderer/general.cc | 22 +++++----------------- src/SplashBackgroundRenderer.cc | 19 +++++++++++++++++++ src/include/SplashBackgroundRenderer.h | 12 +++++++++++- 3 files changed, 35 insertions(+), 18 deletions(-) diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 427af7b..ea55f72 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -62,10 +62,6 @@ HTMLRenderer::~HTMLRenderer() delete [] width_list; } -static GBool annot_cb(Annot *, void *) { - return false; -}; - void HTMLRenderer::process(PDFDoc *doc) { cur_doc = doc; @@ -115,7 +111,7 @@ void HTMLRenderer::process(PDFDoc *doc) BackgroundRenderer * bg_renderer = nullptr; if(param->process_nontext) { - bg_renderer = new BackgroundRenderer(); + bg_renderer = new BackgroundRenderer(param); bg_renderer->startDoc(doc); } @@ -134,19 +130,11 @@ void HTMLRenderer::process(PDFDoc *doc) if(param->process_nontext) { - doc->displayPage(bg_renderer, i, param->h_dpi, param->v_dpi, - 0, true, false, false, - nullptr, nullptr, &annot_cb, nullptr); + auto fn = str_fmt("%s/p%x.png", (param->single_html ? param->tmp_dir : param->dest_dir).c_str(), i); + if(param->single_html) + add_tmp_file((char*)fn); - { - auto fn = str_fmt("%s/p%x.png", (param->single_html ? param->tmp_dir : param->dest_dir).c_str(), i); - if(param->single_html) - add_tmp_file((char*)fn); - - bg_renderer->getBitmap()->writeImgFile(splashFormatPng, - (char*)fn, - param->h_dpi, param->v_dpi); - } + bg_renderer->render_page(doc, i, (char*)fn); } doc->displayPage(this, i, diff --git a/src/SplashBackgroundRenderer.cc b/src/SplashBackgroundRenderer.cc index 4492160..f2d7a85 100644 --- a/src/SplashBackgroundRenderer.cc +++ b/src/SplashBackgroundRenderer.cc @@ -4,10 +4,14 @@ * Copyright (C) 2012 Lu Wang */ +#include + #include "SplashBackgroundRenderer.h" namespace pdf2htmlEX { +using std::string; + const SplashColor SplashBackgroundRenderer::white = {255,255,255}; void SplashBackgroundRenderer::drawChar(GfxState *state, double x, double y, @@ -18,4 +22,19 @@ void SplashBackgroundRenderer::drawChar(GfxState *state, double x, double y, // SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen); } +static GBool annot_cb(Annot *, void *) { + return false; +}; + +void SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno, const string & filename) +{ + doc->displayPage(this, pageno, param->h_dpi, param->v_dpi, + 0, true, false, false, + nullptr, nullptr, &annot_cb, nullptr); + + getBitmap()->writeImgFile(splashFormatPng, + (char*)filename.c_str(), + param->h_dpi, param->v_dpi); +} + } // namespace pdf2htmlEX diff --git a/src/include/SplashBackgroundRenderer.h b/src/include/SplashBackgroundRenderer.h index aced0fb..e56bf1d 100644 --- a/src/include/SplashBackgroundRenderer.h +++ b/src/include/SplashBackgroundRenderer.h @@ -10,9 +10,13 @@ #ifndef SPLASH_BACKGROUND_RENDERER_H__ #define SPLASH_BACKGROUND_RENDERER_H__ +#include + #include #include +#include "Param.h" + namespace pdf2htmlEX { // Based on BackgroundRenderer from poppler @@ -21,8 +25,9 @@ class SplashBackgroundRenderer : public SplashOutputDev public: static const SplashColor white; - SplashBackgroundRenderer() + SplashBackgroundRenderer(const Param * param) : SplashOutputDev(splashModeRGB8, 4, gFalse, (SplashColorPtr)&white, gTrue, gTrue) + , param(param) { } virtual ~SplashBackgroundRenderer() { } @@ -31,6 +36,11 @@ public: double dx, double dy, double originX, double originY, CharCode code, int nBytes, Unicode *u, int uLen); + + void render_page(PDFDoc * doc, int pageno, const std::string & filename); + +protected: + const Param * param; }; } // namespace pdf2htmlEX From 2041c2d146d2bcfba372d521da568823bfb6a107 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 3 Oct 2012 02:19:40 +0800 Subject: [PATCH 11/17] working on linear gradient --- CMakeLists.txt | 14 +- src/CairoBackgroundRenderer.cc | 4 + src/HTMLRenderer/LineBuffer.cc | 4 +- src/HTMLRenderer/draw.cc | 202 ++++++++- src/HTMLRenderer/general.cc | 2 +- src/HTMLRenderer/install.cc | 6 +- src/HTMLRenderer/link.cc | 8 +- src/HTMLRenderer/state.cc | 20 +- src/HTMLRenderer/text.cc | 2 +- src/HTMLRenderer/text.cc.orig | 567 -------------------------- src/Preprocessor.cc | 4 +- src/include/CairoBackgroundRenderer.h | 25 +- src/include/HTMLRenderer.h | 16 +- src/include/util.h | 19 +- src/pdf2htmlEX.cc | 4 +- 15 files changed, 261 insertions(+), 636 deletions(-) delete mode 100644 src/HTMLRenderer/text.cc.orig diff --git a/CMakeLists.txt b/CMakeLists.txt index 324591d..18dd525 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,14 +20,20 @@ include_directories(${POPPLER_INCLUDE_DIRS}) link_directories(${POPPLER_LIBRARY_DIRS}) set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${POPPLER_LIBRARIES}) -find_path(CAIRO_OUTPUTDEV_INCLUDE_PATH CairoOutputDev.h PATHS - ${POPPLER_INCLUDE_DIRS} NO_DEFAULT_PATH) -if(CAIRO_OUTPUTDEV_INCLUDE_PATH) - message("Found CairoOutputDev.h: ${POPPLER_INCLUDE_DIRS}/CairoOutputDev.h") +# disable CAIRO for now +if(0) +pkg_check_modules(POPPLER_CAIRO poppler-cairo>=0.20.0) +if(POPPLER_CAIRO_FOUND) set(HAVE_CAIRO 1) + include_directories(${POPPLER_CAIRO_INCLUDE_DIRS}) + link_directories(${POPPLER_CAIRO_LIBRARY_DIRS}) + set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${POPPLER_CAIRO_LIBRARIES}) else() set(HAVE_CAIRO 0) endif() +else() +set(HAVE_CAIRO 0) +endif() # fontforge starts using pkg-config 'correctly' since 2.0.0 diff --git a/src/CairoBackgroundRenderer.cc b/src/CairoBackgroundRenderer.cc index 9005ba9..3e51741 100644 --- a/src/CairoBackgroundRenderer.cc +++ b/src/CairoBackgroundRenderer.cc @@ -20,6 +20,10 @@ void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y, // CairoOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen); } +void CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno, const std::string & filename) +{ +} + } // namespace pdf2htmlEX #endif // HAVE_CAIRO diff --git a/src/HTMLRenderer/LineBuffer.cc b/src/HTMLRenderer/LineBuffer.cc index 32e557a..acbb944 100644 --- a/src/HTMLRenderer/LineBuffer.cc +++ b/src/HTMLRenderer/LineBuffer.cc @@ -75,7 +75,7 @@ void HTMLRenderer::LineBuffer::flush(void) for(auto iter = states.begin(); iter != states.end(); ++iter) { const auto & s = *iter; - max_ascent = max(max_ascent, s.ascent * s.draw_font_size); + max_ascent = max(max_ascent, s.ascent * s.draw_font_size); } ostream & out = renderer->html_fout; @@ -155,7 +155,7 @@ void HTMLRenderer::LineBuffer::flush(void) ++ cur_offset_iter; } - size_t next_text_idx = min(cur_state_iter->start_idx, cur_offset_iter->start_idx); + size_t next_text_idx = min(cur_state_iter->start_idx, cur_offset_iter->start_idx); outputUnicodes(out, (&text.front()) + cur_text_idx, next_text_idx - cur_text_idx); cur_text_idx = next_text_idx; diff --git a/src/HTMLRenderer/draw.cc b/src/HTMLRenderer/draw.cc index 9e3d611..1ee161f 100644 --- a/src/HTMLRenderer/draw.cc +++ b/src/HTMLRenderer/draw.cc @@ -7,6 +7,12 @@ * 2012.10.01 */ +#include +#include +#include +#include +#include + #include "HTMLRenderer.h" #include "util.h" #include "namespace.h" @@ -14,6 +20,14 @@ namespace pdf2htmlEX { using std::swap; +using std::min; +using std::max; +using std::acos; +using std::asin; +using std::ostringstream; +using std::sqrt; +using std::vector; +using std::ostream; static bool is_horizontal_line(GfxSubpath * path) { @@ -44,6 +58,157 @@ static bool is_rectangle(GfxSubpath * path) && _equal(path->getY(3), path->getY(0))); } +static void get_shading_bbox(GfxState * state, GfxShading * shading, + double & x1, double & y1, double & x2, double & y2) +{ + // from SplashOutputDev.cc in poppler + if(shading->getHasBBox()) + { + shading->getBBox(&x1, &y1, &x2, &y2); + } + else + { + state->getClipBBox(&x1, &y1, &x2, &y2); + Matrix ctm, ictm; + state->getCTM(&ctm); + ctm.invertTo(&ictm); + + double x[4], y[4]; + ictm.transform(x1, y1, &x[0], &y[0]); + ictm.transform(x2, y1, &x[1], &y[1]); + ictm.transform(x1, y2, &x[2], &y[2]); + ictm.transform(x2, y2, &x[3], &y[3]); + + x1 = x2 = x[0]; + y1 = y2 = y[0]; + + for(int i = 1; i < 4; ++i) + { + x1 = min(x1, x[i]); + y1 = min(y1, y[i]); + x2 = max(x2, x[i]); + y2 = max(y2, y[i]); + } + } +} + +static double get_degree(double dx, double dy) +{ + static const double PI = acos(-1.0); + double r = hypot(dx, dy); + + double ang = acos(dx / r); + if(!_is_positive(dy)) + ang = 2 * PI - ang; + + return ang * 180.0 / PI; +} + +class LinearGradient +{ +public: + LinearGradient(GfxAxialShading * shading, + double x1, double y1, double x2, double y2); + + void dumpto (ostream & out); + + static void style_function (void * p, ostream & out) + { + static_cast(p)->dumpto(out); + } + + // TODO, add alpha + class ColorStop + { + public: + GfxRGB rgb; + double pos; // [0,1] + }; + + vector stops; + double degree; +}; + +LinearGradient::LinearGradient (GfxAxialShading * shading, + double x1, double y1, double x2, double y2) +{ + // coordinate for t = 0 and t = 1 + double t0x, t0y, t1x, t1y; + shading->getCoords(&t0x, &t0y, &t1x, &t1y); + + degree = get_degree(t1x - t0x, t1y - t0y); + + // get the range of t in the box + // from GfxState.cc in poppler + double box_tmin, box_tmax; + { + double idx = t1x - t0x; + double idy = t1y - t0y; + double inv_len = 1.0 / (idx * idx + idy * idy); + idx *= inv_len; + idy *= inv_len; + + // t of (x1,y1) + box_tmin = box_tmax = (x1 - t0x) * idx + (y1 - t0y) * idy; + double tdx = (x2 - x1) * idx; + if(tdx < 0) + box_tmin += tdx; + else + box_tmax += tdx; + + double tdy = (y2 - y1) * idy; + if(tdy < 0) + box_tmin += tdy; + else + box_tmax += tdy; + } + + // get the domain of t in the box + double domain_tmin = max(box_tmin, shading->getDomain0()); + double domain_tmax = min(box_tmax, shading->getDomain1()); + + // TODO: better sampling + // TODO: check background color + { + stops.clear(); + double tstep = (domain_tmax - domain_tmin) / 13.0; + for(double t = domain_tmin; t <= domain_tmax; t += tstep) + { + GfxColor color; + shading->getColor(t, &color); + + ColorStop stop; + shading->getColorSpace()->getRGB(&color, &stop.rgb); + stop.pos = (t - box_tmin) / (box_tmax - box_tmin); + + stops.push_back(stop); + } + } +} + +void LinearGradient::dumpto (ostream & out) +{ + out << "background-color:red;"; +} + +GBool HTMLRenderer::axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax) +{ + if(!(param->css_draw)) return gFalse; + + double x1, y1, x2, y2; + get_shading_bbox(state, shading, x1, y1, x2, y2); + + LinearGradient lg(shading, x1, y1, x2, y2); + + // TODO: check background color + css_draw_rectangle(x1, y1, x2-x1, y2-y1, state->getCTM(), + nullptr, 0, + nullptr, nullptr, + LinearGradient::style_function, &lg); + + return gTrue; +} + //TODO track state //TODO connection style void HTMLRenderer::css_draw(GfxState *state, bool fill) @@ -67,9 +232,9 @@ void HTMLRenderer::css_draw(GfxState *state, bool fill) double lw = state->getLineWidth(); - css_draw_rectangle(x1, y - lw/2, x2-x1, lw, + css_draw_rectangle(x1, y - lw/2, x2-x1, lw, state->getCTM(), nullptr, 0, - nullptr, &stroke_color, state); + nullptr, &stroke_color); } else if (is_rectangle(subpath)) { @@ -108,34 +273,36 @@ void HTMLRenderer::css_draw(GfxState *state, bool fill) w += lw[1]; } - css_draw_rectangle(x, y, w, h, + css_draw_rectangle(x, y, w, h, state->getCTM(), lw, lw_count, - ps, pf, state); + ps, pf); } } } -void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, +void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, const double * tm, double * line_width_array, int line_width_count, - const GfxRGB * line_color, const GfxRGB * fill_color, - GfxState * state) + const GfxRGB * line_color, const GfxRGB * fill_color, + void (*style_function)(void *, ostream &), void * style_function_data) { close_text_line(); - double ctm[6]; - memcpy(ctm, state->getCTM(), sizeof(ctm)); + double new_tm[6]; + memcpy(new_tm, tm, sizeof(new_tm)); - _transform(ctm, x, y); + _transform(new_tm, x, y); double scale = 1.0; { - double i1 = ctm[0] + ctm[2]; - double i2 = ctm[1] + ctm[3]; - scale = sqrt((i1 * i1 + i2 * i2) / 2.0); + static const double sqrt2 = sqrt(2.0); + + double i1 = (new_tm[0] + new_tm[2]) / sqrt2; + double i2 = (new_tm[1] + new_tm[3]) / sqrt2; + scale = hypot(i1, i2); if(_is_positive(scale)) { for(int i = 0; i < 4; ++i) - ctm[i] /= scale; + new_tm[i] /= scale; } else { @@ -143,7 +310,7 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, } } - html_fout << "
font_size_multiplier); + text_scale_factor1 = max(zoom, param->font_size_multiplier); text_scale_factor2 = zoom / text_scale_factor1; } diff --git a/src/HTMLRenderer/install.cc b/src/HTMLRenderer/install.cc index e2bd654..b741e26 100644 --- a/src/HTMLRenderer/install.cc +++ b/src/HTMLRenderer/install.cc @@ -215,9 +215,11 @@ long long HTMLRenderer::install_font_size(double font_size) long long HTMLRenderer::install_transform_matrix(const double * tm) { - TM m(tm); + Matrix m; + memcpy(m.m, tm, sizeof(m.m)); + auto iter = transform_matrix_map.lower_bound(m); - if((iter != transform_matrix_map.end()) && (m == (iter->first))) + if((iter != transform_matrix_map.end()) && (_tm_equal(m.m, iter->first.m, 4))) return iter->second; long long new_tm_id = transform_matrix_map.size(); diff --git a/src/HTMLRenderer/link.cc b/src/HTMLRenderer/link.cc index a38fc65..aa579c4 100644 --- a/src/HTMLRenderer/link.cc +++ b/src/HTMLRenderer/link.cc @@ -191,10 +191,10 @@ void HTMLRenderer::processLink(AnnotLink * al) double x,y,w,h; double x1, y1, x2, y2; al->getRect(&x1, &y1, &x2, &y2); - x = min(x1, x2); - y = min(y1, y2); - w = max(x1, x2) - x; - h = max(y1, y2) - y; + x = min(x1, x2); + y = min(y1, y2); + w = max(x1, x2) - x; + h = max(y1, y2) - y; double border_width = 0; double border_top_bottom_width = 0; diff --git a/src/HTMLRenderer/state.cc b/src/HTMLRenderer/state.cc index 862d7d8..fc94015 100644 --- a/src/HTMLRenderer/state.cc +++ b/src/HTMLRenderer/state.cc @@ -12,6 +12,7 @@ * optimize lines using nested (reuse classes) */ +#include #include #include "HTMLRenderer.h" @@ -22,6 +23,7 @@ namespace pdf2htmlEX { using std::max; using std::abs; +using std::hypot; void HTMLRenderer::updateAll(GfxState * state) { @@ -98,7 +100,7 @@ void HTMLRenderer::check_state_change(GfxState * state) if(!(new_font_info->id == cur_font_info->id)) { - new_line_state = max(new_line_state, NLS_SPAN); + new_line_state = max(new_line_state, NLS_SPAN); cur_font_info = new_font_info; } @@ -146,7 +148,7 @@ void HTMLRenderer::check_state_change(GfxState * state) double new_draw_text_tm[6]; memcpy(new_draw_text_tm, cur_text_tm, sizeof(new_draw_text_tm)); - double new_draw_text_scale = 1.0/text_scale_factor2 * sqrt(new_draw_text_tm[2] * new_draw_text_tm[2] + new_draw_text_tm[3] * new_draw_text_tm[3]); + double new_draw_text_scale = 1.0/text_scale_factor2 * hypot(new_draw_text_tm[2], new_draw_text_tm[3]); double new_draw_font_size = cur_font_size; if(_is_positive(new_draw_text_scale)) @@ -168,13 +170,13 @@ void HTMLRenderer::check_state_change(GfxState * state) if(!(_equal(new_draw_font_size, draw_font_size))) { - new_line_state = max(new_line_state, NLS_SPAN); + new_line_state = max(new_line_state, NLS_SPAN); draw_font_size = new_draw_font_size; cur_fs_id = install_font_size(draw_font_size); } if(!(_tm_equal(new_draw_text_tm, draw_text_tm, 4))) { - new_line_state = max(new_line_state, NLS_DIV); + new_line_state = max(new_line_state, NLS_DIV); memcpy(draw_text_tm, new_draw_text_tm, sizeof(draw_text_tm)); cur_ttm_id = install_transform_matrix(draw_text_tm); } @@ -236,7 +238,7 @@ void HTMLRenderer::check_state_change(GfxState * state) if(!merged) { - new_line_state = max(new_line_state, NLS_DIV); + new_line_state = max(new_line_state, NLS_DIV); } } @@ -247,7 +249,7 @@ void HTMLRenderer::check_state_change(GfxState * state) double new_letter_space = state->getCharSpace(); if(!_equal(cur_letter_space, new_letter_space)) { - new_line_state = max(new_line_state, NLS_SPAN); + new_line_state = max(new_line_state, NLS_SPAN); cur_letter_space = new_letter_space; cur_ls_id = install_letter_space(cur_letter_space * draw_text_scale); } @@ -260,7 +262,7 @@ void HTMLRenderer::check_state_change(GfxState * state) double new_word_space = state->getWordSpace(); if(!_equal(cur_word_space, new_word_space)) { - new_line_state = max(new_line_state, NLS_SPAN); + new_line_state = max(new_line_state, NLS_SPAN); cur_word_space = new_word_space; cur_ws_id = install_word_space(cur_word_space * draw_text_scale); } @@ -273,7 +275,7 @@ void HTMLRenderer::check_state_change(GfxState * state) state->getFillRGB(&new_color); if(!((new_color.r == cur_color.r) && (new_color.g == cur_color.g) && (new_color.b == cur_color.b))) { - new_line_state = max(new_line_state, NLS_SPAN); + new_line_state = max(new_line_state, NLS_SPAN); cur_color = new_color; cur_color_id = install_color(&new_color); } @@ -286,7 +288,7 @@ void HTMLRenderer::check_state_change(GfxState * state) double new_rise = state->getRise(); if(!_equal(cur_rise, new_rise)) { - new_line_state = max(new_line_state, NLS_SPAN); + new_line_state = max(new_line_state, NLS_SPAN); cur_rise = new_rise; cur_rise_id = install_rise(new_rise * draw_text_scale); } diff --git a/src/HTMLRenderer/text.cc b/src/HTMLRenderer/text.cc index 1a6e323..1486d23 100644 --- a/src/HTMLRenderer/text.cc +++ b/src/HTMLRenderer/text.cc @@ -310,7 +310,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo memset(width_list, -1, 0x10000 * sizeof(*width_list)); if(code2GID) - maxcode = min(maxcode, code2GID_len - 1); + maxcode = min(maxcode, code2GID_len - 1); bool is_truetype = is_truetype_suffix(suffix); int max_key = maxcode; diff --git a/src/HTMLRenderer/text.cc.orig b/src/HTMLRenderer/text.cc.orig deleted file mode 100644 index 1a4b44f..0000000 --- a/src/HTMLRenderer/text.cc.orig +++ /dev/null @@ -1,567 +0,0 @@ -/* - * text.cc - * - * Handling text & font, and relative stuffs - * - * by WangLu - * 2012.08.14 - */ - -#include -#include -#include -#include -#include - -#include -#include - -#include "ffw.h" -#include "HTMLRenderer.h" -#include "namespace.h" - -namespace pdf2htmlEX { - -using std::unordered_set; -using std::min; -using std::all_of; -using std::floor; -using std::swap; - -string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id) -{ - Object obj, obj1, obj2; - Object font_obj, font_obj2, fontdesc_obj; - string suffix; - string filepath; - - try - { - // mupdf consulted - string subtype; - - auto * id = font->getID(); - - Object ref_obj; - ref_obj.initRef(id->num, id->gen); - ref_obj.fetch(xref, &font_obj); - ref_obj.free(); - - if(!font_obj.isDict()) - { - cerr << "Font object is not a dictionary" << endl; - throw 0; - } - - Dict * dict = font_obj.getDict(); - if(dict->lookup("DescendantFonts", &font_obj2)->isArray()) - { - if(font_obj2.arrayGetLength() == 0) - { - cerr << "Warning: empty DescendantFonts array" << endl; - } - else - { - if(font_obj2.arrayGetLength() > 1) - cerr << "TODO: multiple entries in DescendantFonts array" << endl; - - if(font_obj2.arrayGet(0, &obj2)->isDict()) - { - dict = obj2.getDict(); - } - } - } - - if(!dict->lookup("FontDescriptor", &fontdesc_obj)->isDict()) - { - cerr << "Cannot find FontDescriptor " << endl; - throw 0; - } - - dict = fontdesc_obj.getDict(); - - if(dict->lookup("FontFile3", &obj)->isStream()) - { - if(obj.streamGetDict()->lookup("Subtype", &obj1)->isName()) - { - subtype = obj1.getName(); - if(subtype == "Type1C") - { - suffix = ".cff"; - } - else if (subtype == "CIDFontType0C") - { - suffix = ".cid"; - } - else - { - cerr << "Unknown subtype: " << subtype << endl; - throw 0; - } - } - else - { - cerr << "Invalid subtype in font descriptor" << endl; - throw 0; - } - } - else if (dict->lookup("FontFile2", &obj)->isStream()) - { - suffix = ".ttf"; - } - else if (dict->lookup("FontFile", &obj)->isStream()) - { - suffix = ".pfa"; - } - else - { - cerr << "Cannot find FontFile for dump" << endl; - throw 0; - } - - if(suffix == "") - { - cerr << "Font type unrecognized" << endl; - throw 0; - } - - obj.streamReset(); - - filepath = (char*)str_fmt("%s/f%llx%s", param->tmp_dir.c_str(), fn_id, suffix.c_str()); - add_tmp_file(filepath); - - ofstream outf(filepath, ofstream::binary); - if(!outf) - throw string("Cannot open file ") + filepath + " for writing"; - - char buf[1024]; - int len; - while((len = obj.streamGetChars(1024, (Guchar*)buf)) > 0) - { - outf.write(buf, len); - } - outf.close(); - obj.streamClose(); - } - catch(int) - { - cerr << "Someting wrong when trying to dump font " << hex << fn_id << dec << endl; - } - - obj2.free(); - obj1.free(); - obj.free(); - - fontdesc_obj.free(); - font_obj2.free(); - font_obj.free(); - - return filepath; -} - -void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only) -{ - if(param->debug) - { - cerr << "Embed font: " << filepath << " " << info.id << endl; - } - - ffw_load_font(filepath.c_str()); - int * code2GID = nullptr; - int code2GID_len = 0; - int maxcode = 0; - - Gfx8BitFont * font_8bit = nullptr; - GfxCIDFont * font_cid = nullptr; - - string suffix = get_suffix(filepath); - for(auto iter = suffix.begin(); iter != suffix.end(); ++iter) - *iter = tolower(*iter); - - /* - * if parm->tounicode is 0, try the provided tounicode map first - */ - info.use_tounicode = (is_truetype_suffix(suffix) || (param->tounicode >= 0)); - info.has_space = false; - - const char * used_map = nullptr; - - info.em_size = ffw_get_em_size(); - - if(get_metric_only) - return; - - used_map = preprocessor.get_code_map(hash_ref(font->getID())); - - /* - * Step 1 - * dump the font file directly from the font descriptor and put the glyphs into the correct slots - * - * for 8bit + nonTrueType - * re-encoding the font using a PostScript encoding list (glyph id <-> glpyh name) - * - * for 8bit + TrueType - * sort the glpyhs as the original order, and later will map GID (instead of char code) to Unicode - * - * for CID + nonTrueType - * Flatten the font - * - * for CID Truetype - * same as 8bitTrueType, except for that we have to check 65536 charcodes - */ - if(!font->isCIDFont()) - { - font_8bit = dynamic_cast(font); - maxcode = 0xff; - if(is_truetype_suffix(suffix)) - { - ffw_reencode_glyph_order(); - FoFiTrueType *fftt = nullptr; - if((fftt = FoFiTrueType::load((char*)filepath.c_str())) != nullptr) - { - code2GID = font_8bit->getCodeToGIDMap(fftt); - code2GID_len = 256; - delete fftt; - } - } - else - { - // move the slot such that it's consistent with the encoding seen in PDF - unordered_set nameset; - bool name_conflict_warned = false; - - memset(cur_mapping2, 0, 0x100 * sizeof(char*)); - - for(int i = 0; i < 256; ++i) - { - if(!used_map[i]) continue; - - auto cn = font_8bit->getCharName(i); - if(cn == nullptr) - { - continue; - } - else - { - if(nameset.insert(string(cn)).second) - { - cur_mapping2[i] = cn; - } - else - { - if(!name_conflict_warned) - { - name_conflict_warned = true; - //TODO: may be resolved using advanced font properties? - cerr << "Warning: encoding confliction detected in font: " << hex << info.id << dec << endl; - } - } - } - } - - ffw_reencode_raw2(cur_mapping2, 256, 0); - } - } - else - { - font_cid = dynamic_cast(font); - maxcode = 0xffff; - - if(is_truetype_suffix(suffix)) - { - ffw_reencode_glyph_order(); - - GfxCIDFont * _font = dynamic_cast(font); - - // code2GID has been stored for embedded CID fonts - code2GID = _font->getCIDToGID(); - code2GID_len = _font->getCIDToGIDLen(); - } - else - { - ffw_cidflatten(); - } - } - - /* - * Step 2 - * map charcode (or GID for CID truetype) - * generate an Consortium encoding file and let fontforge handle it. - * - * - Always map to Unicode for 8bit TrueType fonts and CID fonts - * - * - For 8bit nonTruetype fonts: - * Try to calculate the correct Unicode value from the glyph names, unless param->always_apply_tounicode is set - * - * - * Also fill in the width_list, and set widths accordingly - */ - - - { - unordered_set codeset; - bool name_conflict_warned = false; - - auto ctu = font->getToUnicode(); - memset(cur_mapping, -1, 0x10000 * sizeof(*cur_mapping)); - memset(width_list, -1, 0x10000 * sizeof(*width_list)); - - if(code2GID) - maxcode = min(maxcode, code2GID_len - 1); - - bool is_truetype = is_truetype_suffix(suffix); - int max_key = maxcode; - /* - * Traverse all possible codes - */ - bool retried = false; // avoid infinite loop - for(int i = 0; i <= maxcode; ++i) - { - if(!used_map[i]) - continue; - - /* - * Skip glyphs without names (only for non-ttf fonts) - */ - if(!is_truetype && (font_8bit != nullptr) - && (font_8bit->getCharName(i) == nullptr)) - { - continue; - } - - int k = i; - if(code2GID) - { - if((k = code2GID[i]) == 0) continue; - } - - if(k > max_key) - max_key = k; - - Unicode u, *pu=&u; - if(info.use_tounicode) - { - int n = ctu ? (ctu->mapToUnicode(i, &pu)) : 0; - u = check_unicode(pu, n, i, font); - } - else - { - u = unicode_from_font(i, font); - } - - if(u == ' ') - info.has_space = true; - - if(codeset.insert(u).second) - { - cur_mapping[k] = u; - } - else - { - // collision detected - if(param->tounicode == 0) - { - // in auto mode, just drop the tounicode map - if(!retried) - { - cerr << "ToUnicode CMap is not valid and got dropped" << endl; - retried = true; - codeset.clear(); - info.use_tounicode = false; - memset(cur_mapping, -1, 0x10000 * sizeof(*cur_mapping)); - memset(width_list, -1, 0x10000 * sizeof(*width_list)); - i = -1; - continue; - } - } - if(!name_conflict_warned) - { - name_conflict_warned = true; - //TODO: may be resolved using advanced font properties? - cerr << "Warning: encoding confliction detected in font: " << hex << info.id << dec << endl; - } - } - - if(font_8bit) - { - width_list[k] = (int)floor(font_8bit->getWidth(i) * info.em_size + 0.5); - } - else - { - char buf[2]; - buf[0] = (i >> 8) & 0xff; - buf[1] = (i & 0xff); - width_list[k] = (int)floor(font_cid->getWidth(buf, 2) * info.em_size + 0.5); - } - } - - ffw_reencode_raw(cur_mapping, max_key + 1, 1); - ffw_set_widths(width_list, max_key + 1); - - if(ctu) - ctu->decRefCnt(); - } - - /* - * Step 3 - * - * Generate the font as desired - * - */ - string cur_tmp_fn = (char*)str_fmt("%s/__tmp_font1%s", param->tmp_dir.c_str(), param->font_suffix.c_str()); - add_tmp_file(cur_tmp_fn); - string other_tmp_fn = (char*)str_fmt("%s/__tmp_font2%s", param->tmp_dir.c_str(), param->font_suffix.c_str()); - add_tmp_file(other_tmp_fn); - - ffw_save(cur_tmp_fn.c_str()); - ffw_close(); - - /* - * Step 4 - * Font Hinting - */ - bool hinted = false; - - // Call external hinting program if specified - if(param->external_hint_tool != "") - { - hinted = (system((char*)str_fmt("%s \"%s\" \"%s\"", param->external_hint_tool.c_str(), cur_tmp_fn.c_str(), other_tmp_fn.c_str())) == 0); - } - - // Call internal hinting procedure if specified - if((!hinted) && (param->auto_hint)) - { - ffw_load_font(cur_tmp_fn.c_str()); - ffw_auto_hint(); - ffw_save(other_tmp_fn.c_str()); - ffw_close(); - hinted = true; - } - - if(hinted) - { - swap(cur_tmp_fn, other_tmp_fn); - } - - /* - * Step 5 - * Generate the font - * Reload to retrieve/fix accurate ascent/descent - */ - string fn = (char*)str_fmt("%s/f%llx%s", - (param->single_html ? param->tmp_dir : param->dest_dir).c_str(), - info.id, param->font_suffix.c_str()); - - if(param->single_html) - add_tmp_file(fn); - - ffw_load_font(cur_tmp_fn.c_str()); - ffw_metric(&info.ascent, &info.descent); - ffw_save(fn.c_str()); - ffw_close(); -} - -void HTMLRenderer::drawString(GfxState * state, GooString * s) -{ - if(s->getLength() == 0) - return; - - auto font = state->getFont(); - if((font == nullptr) || (font->getWMode())) - { - return; - } - - //hidden - if((state->getRender() & 3) == 3) - { - return; - } - - // see if the line has to be closed due to state change - check_state_change(state); - prepare_line(state); - - // Now ready to output - // get the unicodes - char *p = s->getCString(); - int len = s->getLength(); - - double dx = 0; - double dy = 0; - double dxerr = 0; - double dx1,dy1; - double ox, oy; - - int nChars = 0; - int nSpaces = 0; - int uLen; - - CharCode code; - Unicode *u = nullptr; - - while (len > 0) { - auto n = font->getNextChar(p, len, &code, &u, &uLen, &dx1, &dy1, &ox, &oy); - - if(!(_equal(ox, 0) && _equal(oy, 0))) - { - cerr << "TODO: non-zero origins" << endl; - } - - bool is_space = false; - if (n == 1 && *p == ' ') - { - ++nSpaces; - is_space = true; - } - - if(is_space && (param->space_as_offset)) - { - // ignore horiz_scaling, as it's merged in CTM - line_buf.append_offset((dx1 * cur_font_size + cur_letter_space + cur_word_space) * draw_scale); - } - else - { - if((param->decompose_ligature) && (uLen > 1) && all_of(u, u+uLen, isLegalUnicode)) - { - line_buf.append_unicodes(u, uLen); - } - else - { - if(cur_font_info->use_tounicode) - { - Unicode uu = check_unicode(u, uLen, code, font); - line_buf.append_unicodes(&uu, 1); - } - else - { - Unicode uu = unicode_from_font(code, font); - line_buf.append_unicodes(&uu, 1); - } - } - } - - dx += dx1; - dy += dy1; - - ++nChars; - p += n; - len -= n; - } - - double hs = state->getHorizScaling(); - - // horiz_scaling is merged into ctm now, - // so the coordinate system is ugly - dx = (dx * cur_font_size + nChars * cur_letter_space + nSpaces * cur_word_space) * hs; - - dy *= cur_font_size; - - cur_tx += dx; - cur_ty += dy; - - draw_tx += dx + dxerr * cur_font_size * hs; - draw_ty += dy; -} - -} // namespace pdf2htmlEX diff --git a/src/Preprocessor.cc b/src/Preprocessor.cc index c64b04a..f96b602 100644 --- a/src/Preprocessor.cc +++ b/src/Preprocessor.cc @@ -82,8 +82,8 @@ void Preprocessor::drawChar(GfxState *state, double x, double y, void Preprocessor::startPage(int pageNum, GfxState *state) { - max_width = max(max_width, state->getPageWidth()); - max_height = max(max_height, state->getPageHeight()); + max_width = max(max_width, state->getPageWidth()); + max_height = max(max_height, state->getPageHeight()); } const char * Preprocessor::get_code_map (long long font_id) const diff --git a/src/include/CairoBackgroundRenderer.h b/src/include/CairoBackgroundRenderer.h index 5a2d2ab..c3c8e90 100644 --- a/src/include/CairoBackgroundRenderer.h +++ b/src/include/CairoBackgroundRenderer.h @@ -1,6 +1,6 @@ /* - * Splash Background renderer - * Render all those things not supported as Image, with Splash + * Cairo Background renderer + * Render all those things not supported as Image, with Cairo * * Copyright (C) 2012 Lu Wang */ @@ -9,27 +9,32 @@ #ifndef CAIRO_BACKGROUND_RENDERER_H__ #define CAIRO_BACKGROUND_RENDERER_H__ -#include -#include +#include + +#include "Param.h" namespace pdf2htmlEX { // Based on BackgroundRenderer from poppler -class SplashBackgroundRenderer : public SplashOutputDev +class CairoBackgroundRenderer : public CairoOutputDev { public: - static const SplashColor white; - - SplashBackgroundRenderer() - :SplashOutputDev(splashModeRGB8, 4, gFalse, (SplashColorPtr)&white, gTrue, gTrue) + CairoBackgroundRenderer(const Param * param) + :CairoOutputDev() + , param(param) { } - virtual ~SplashBackgroundRenderer() { } + virtual ~CairoBackgroundRenderer() { } virtual void drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, int nBytes, Unicode *u, int uLen); + + void render_page(PDFDoc * doc, int pageno, const std::string & filename); + +protected: + const Param * param; }; } diff --git a/src/include/HTMLRenderer.h b/src/include/HTMLRenderer.h index 9ebc872..ec7692b 100644 --- a/src/include/HTMLRenderer.h +++ b/src/include/HTMLRenderer.h @@ -79,6 +79,12 @@ class HTMLRenderer : public OutputDev // Does this device use drawChar() or drawString()? virtual GBool useDrawChar() { return gFalse; } + // Does this device use functionShadedFill(), axialShadedFill(), and + // radialShadedFill()? If this returns false, these shaded fills + // will be reduced to a series of other drawing operations. + virtual GBool useShadedFills(int type) { return type == 2; } + + // Does this device use beginType3Char/endType3Char? Otherwise, // text in Type 3 fonts will be drawn with drawChar/drawString. virtual GBool interpretType3Chars() { return gFalse; } @@ -125,6 +131,7 @@ class HTMLRenderer : public OutputDev virtual void stroke(GfxState *state) { css_draw(state, false); } virtual void fill(GfxState *state) { css_draw(state, true); } + virtual GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax); virtual void processLink(AnnotLink * al); @@ -208,11 +215,12 @@ class HTMLRenderer : public OutputDev * w,h should be the metrics WITHOUT border * * line_color & fill_color may be specified as nullptr to indicate none + * style_function & style_function_data may be provided to provide more styles */ - void css_draw_rectangle(double x, double y, double w, double h, + void css_draw_rectangle(double x, double y, double w, double h, const double * tm, double * line_width_array, int line_width_count, - const GfxRGB * line_color, const GfxRGB * fill_color, - GfxState * state); + const GfxRGB * line_color, const GfxRGB * fill_color, + void (*style_function)(void *, std::ostream &) = nullptr, void * style_function_data = nullptr ); //////////////////////////////////////////////////// @@ -396,7 +404,7 @@ class HTMLRenderer : public OutputDev std::unordered_map font_name_map; std::map font_size_map; - std::map transform_matrix_map; + std::map transform_matrix_map; std::map letter_space_map; std::map word_space_map; std::unordered_map color_map; diff --git a/src/include/util.h b/src/include/util.h index 746077b..1ea84fe 100644 --- a/src/include/util.h +++ b/src/include/util.h @@ -104,28 +104,21 @@ public: bool has_space; // whether space is included in the font }; -// wrapper of the transform matrix double[6] -// Transform Matrix -class TM +class Matrix_less { public: - TM() {} - TM(const double * m) {memcpy(_, m, sizeof(_));} - bool operator < (const TM & m) const { + bool operator () (const Matrix & m1, const Matrix & m2) const + { // Note that we only care about the first 4 elements for(int i = 0; i < 4; ++i) { - if(_[i] < m._[i] - EPS) + if(m1.m[i] < m2.m[i] - EPS) return true; - if(_[i] > m._[i] + EPS) + if(m1.m[i] > m2.m[i] + EPS) return false; } return false; } - bool operator == (const TM & m) const { - return _tm_equal(_, m._, 4); - } - double _[6]; }; class base64stream @@ -203,7 +196,7 @@ public: va_end(vlist); if(l >= (int)buf.capacity()) { - buf.reserve(std::max((long)(l+1), (long)buf.capacity() * 2)); + buf.reserve(std::max((long)(l+1), (long)buf.capacity() * 2)); va_start(vlist, format); l = vsnprintf(&buf.front(), buf.capacity(), format, vlist); va_end(vlist); diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index d95c70e..c65eeb0 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -183,8 +183,8 @@ int main(int argc, char **argv) throw "Copying of text from this document is not allowed."; } - param.first_page = min(max(param.first_page, 1), doc->getNumPages()); - param.last_page = min(max(param.last_page, param.first_page), doc->getNumPages()); + param.first_page = min(max(param.first_page, 1), doc->getNumPages()); + param.last_page = min(max(param.last_page, param.first_page), doc->getNumPages()); if(param.output_filename == "") { From 2755cf65672d1960a704d6cd9376c0ee00bfc6c1 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 3 Oct 2012 02:52:45 +0800 Subject: [PATCH 12/17] first implementation of linear gradient --- src/HTMLRenderer/draw.cc | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/HTMLRenderer/draw.cc b/src/HTMLRenderer/draw.cc index 1ee161f..169c2b6 100644 --- a/src/HTMLRenderer/draw.cc +++ b/src/HTMLRenderer/draw.cc @@ -92,16 +92,21 @@ static void get_shading_bbox(GfxState * state, GfxShading * shading, } } -static double get_degree(double dx, double dy) +/* + * Note that the coordinate system in HTML and PDF are different + * In HTML: + * UP = 0 + * RIGHT = PI / 2 + * DOWN = PI + * LEFT = - PI / 2 + */ +static double get_angle(double dx, double dy) { - static const double PI = acos(-1.0); double r = hypot(dx, dy); double ang = acos(dx / r); - if(!_is_positive(dy)) - ang = 2 * PI - ang; - return ang * 180.0 / PI; + return ang; } class LinearGradient @@ -118,6 +123,7 @@ public: } // TODO, add alpha + class ColorStop { public: @@ -126,7 +132,7 @@ public: }; vector stops; - double degree; + double angle; }; LinearGradient::LinearGradient (GfxAxialShading * shading, @@ -136,7 +142,7 @@ LinearGradient::LinearGradient (GfxAxialShading * shading, double t0x, t0y, t1x, t1y; shading->getCoords(&t0x, &t0y, &t1x, &t1y); - degree = get_degree(t1x - t0x, t1y - t0y); + angle = get_angle(t1x - t0x, t1y - t0y); // get the range of t in the box // from GfxState.cc in poppler @@ -188,7 +194,12 @@ LinearGradient::LinearGradient (GfxAxialShading * shading, void LinearGradient::dumpto (ostream & out) { - out << "background-color:red;"; + out << "background-image:-moz-linear-gradient(" << _round(angle) << "rad"; + for(auto iter = stops.begin(); iter != stops.end(); ++iter) + { + out << "," << (iter->rgb) << " " << _round((iter->pos) * 100) << "%"; + } + out << ");"; } GBool HTMLRenderer::axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax) From 2c56c6f6aea3dceab602ff2c7332cd5920150d3f Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 3 Oct 2012 03:00:51 +0800 Subject: [PATCH 13/17] cross-browser linear gradient --- src/HTMLRenderer/draw.cc | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/HTMLRenderer/draw.cc b/src/HTMLRenderer/draw.cc index 169c2b6..d63b405 100644 --- a/src/HTMLRenderer/draw.cc +++ b/src/HTMLRenderer/draw.cc @@ -194,12 +194,16 @@ LinearGradient::LinearGradient (GfxAxialShading * shading, void LinearGradient::dumpto (ostream & out) { - out << "background-image:-moz-linear-gradient(" << _round(angle) << "rad"; - for(auto iter = stops.begin(); iter != stops.end(); ++iter) + auto prefixes = {"", "-ms-", "-moz-", "-webkit-", "-o-"}; + for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter) { - out << "," << (iter->rgb) << " " << _round((iter->pos) * 100) << "%"; + out << "background-image:" << (*iter) << "linear-gradient(" << _round(angle) << "rad"; + for(auto iter2 = stops.begin(); iter2 != stops.end(); ++iter2) + { + out << "," << (iter2->rgb) << " " << _round((iter2->pos) * 100) << "%"; + } + out << ");"; } - out << ");"; } GBool HTMLRenderer::axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax) From 19db1a908da1fce2251b197b165e30bad6437142 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 3 Oct 2012 03:37:00 +0800 Subject: [PATCH 14/17] test.py --- test/test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test.py b/test/test.py index 6ccdf11..bfa9068 100755 --- a/test/test.py +++ b/test/test.py @@ -11,7 +11,8 @@ with open('out.html','w') as outf: if not f.lower().endswith('.pdf'): continue print f - os.system('pdf2htmlEX --dest-dir html --auto-hint=1 --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) + #os.system('pdf2htmlEX --dest-dir html --auto-hint=1 --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) + os.system('pdf2htmlEX --dest-dir html --process-nontext 0 --css-draw 1 "%s/%s"' % (DIR,f)) ff = f[:-3] outf.write('%s
' % (ff,ff)) outf.flush(); From 49fd259d6fcaaa8357902f94f41229db1b967ff1 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 3 Oct 2012 12:42:00 +0800 Subject: [PATCH 15/17] css draw --- src/HTMLRenderer/draw.cc | 42 ++++++++++++++++++++++++++++++++-------- src/HTMLRenderer/link.cc | 2 +- src/include/util.h | 3 ++- src/util.cc | 23 +++++++++++++++++----- 4 files changed, 55 insertions(+), 15 deletions(-) diff --git a/src/HTMLRenderer/draw.cc b/src/HTMLRenderer/draw.cc index d63b405..8ed7602 100644 --- a/src/HTMLRenderer/draw.cc +++ b/src/HTMLRenderer/draw.cc @@ -36,6 +36,13 @@ static bool is_horizontal_line(GfxSubpath * path) && (_equal(path->getY(0), path->getY(1)))); } +static bool is_vertical_line(GfxSubpath * path) +{ + return ((path->getNumPoints() == 2) + && (!path->getCurve(1)) + && (_equal(path->getX(0), path->getX(1)))); +} + static bool is_rectangle(GfxSubpath * path) { if (!(((path->getNumPoints() != 4) && (path->isClosed())) @@ -94,17 +101,20 @@ static void get_shading_bbox(GfxState * state, GfxShading * shading, /* * Note that the coordinate system in HTML and PDF are different - * In HTML: - * UP = 0 - * RIGHT = PI / 2 - * DOWN = PI - * LEFT = - PI / 2 */ static double get_angle(double dx, double dy) { double r = hypot(dx, dy); + /* + * acos always returns [0, pi] + */ double ang = acos(dx / r); + /* + * for angle below x-axis + */ + if(dy < 0) + ang = -ang; return ang; } @@ -112,7 +122,7 @@ static double get_angle(double dx, double dy) class LinearGradient { public: - LinearGradient(GfxAxialShading * shading, + LinearGradient(GfxAxialShading * shading, double x1, double y1, double x2, double y2); void dumpto (ostream & out); @@ -251,7 +261,23 @@ void HTMLRenderer::css_draw(GfxState *state, bool fill) nullptr, 0, nullptr, &stroke_color); } - else if (is_rectangle(subpath)) + else if(is_vertical_line(subpath)) + { + double x = subpath->getX(0); + double y1 = subpath->getY(0); + double y2 = subpath->getY(1); + if(y1 > y2) swap(y1, y2); + + GfxRGB stroke_color; + state->getStrokeRGB(&stroke_color); + + double lw = state->getLineWidth(); + + css_draw_rectangle(x-lw/2, y1, lw, y2-y1, state->getCTM(), + nullptr, 0, + nullptr, &stroke_color); + } + else if(is_rectangle(subpath)) { close_text_line(); double x1 = subpath->getX(0); @@ -305,7 +331,7 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, co double new_tm[6]; memcpy(new_tm, tm, sizeof(new_tm)); - _transform(new_tm, x, y); + _tm_transform(new_tm, x, y); double scale = 1.0; { diff --git a/src/HTMLRenderer/link.cc b/src/HTMLRenderer/link.cc index aa579c4..83cf6aa 100644 --- a/src/HTMLRenderer/link.cc +++ b/src/HTMLRenderer/link.cc @@ -267,7 +267,7 @@ void HTMLRenderer::processLink(AnnotLink * al) html_fout << "border-style:none;"; } - _transform(default_ctm, x, y); + _tm_transform(default_ctm, x, y); html_fout << "position:absolute;" << "left:" << _round(x) << "px;" diff --git a/src/include/util.h b/src/include/util.h index 1ea84fe..5c1032b 100644 --- a/src/include/util.h +++ b/src/include/util.h @@ -47,7 +47,8 @@ static inline bool _tm_equal(const double * tm1, const double * tm2, int size = return true; } -void _transform(const double * ctm, double & x, double & y, bool is_delta = false); +void _tm_transform(const double * tm, double & x, double & y, bool is_delta = false); +void _tm_multiply(double * tm_left, const double * tm_right); static inline long long hash_ref(const Ref * id) { diff --git a/src/util.cc b/src/util.cc index 9432cc8..a69654e 100644 --- a/src/util.cc +++ b/src/util.cc @@ -54,18 +54,31 @@ const std::map, std::pair {{".js", 1}, {""}} }); -void _transform(const double * ctm, double & x, double & y, bool is_delta) +void _tm_transform(const double * tm, double & x, double & y, bool is_delta) { double xx = x, yy = y; - x = ctm[0] * xx + ctm[2] * yy; - y = ctm[1] * xx + ctm[3] * yy; + x = tm[0] * xx + tm[2] * yy; + y = tm[1] * xx + tm[3] * yy; if(!is_delta) { - x += ctm[4]; - y += ctm[5]; + x += tm[4]; + y += tm[5]; } } +void _tm_multiply(double * tm_left, const double * tm_right) +{ + double old[4]; + memcpy(old, tm_left, sizeof(old)); + + tm_left[0] = old[0] * tm_right[0] + old[2] * tm_right[1]; + tm_left[1] = old[1] * tm_right[0] + old[3] * tm_right[1]; + tm_left[2] = old[0] * tm_right[2] + old[2] * tm_right[3]; + tm_left[3] = old[1] * tm_right[2] + old[3] * tm_right[3]; + tm_left[4] += old[0] * tm_right[4] + old[2] * tm_right[5]; + tm_left[5] += old[1] * tm_right[4] + old[3] * tm_right[5]; +} + bool isLegalUnicode(Unicode u) { /* From fed5d1bd8e3023299ea28f567bb224beff3aeafa Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 3 Oct 2012 12:43:28 +0800 Subject: [PATCH 16/17] README --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 7c08ff7..1449ed0 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,12 @@ I have tested with CYGWIN without any problem, and I believe it also works on Mi pdf2htmlEX --help +### For Geeks + +* Experimental and unsupported + + pdf2htmlEX --process-nontext 0 --css-draw 0 /path/to/foobar.pdf + ## FAQ [here](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ) From caac23b62f2a4d9abd4d7d5cee4596a7f4fb8db5 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 3 Oct 2012 12:51:04 +0800 Subject: [PATCH 17/17] README --- README.md | 1 + test/test.py | 1 + 2 files changed, 2 insertions(+) diff --git a/README.md b/README.md index 1449ed0..450d153 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,7 @@ This program is designed for scientific papers with complicate formulas and figu * Proper styling (Color, Transformation...) * Links * Optimization for Web +* [EXPERIMENTAL] Path drawing with CSS ### Not supported yet diff --git a/test/test.py b/test/test.py index bfa9068..bf81859 100755 --- a/test/test.py +++ b/test/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python DIR = 'pdf' +DIR = '../../pdf.js/test/pdfs' import os