From 87204a9e9e2b40b53ca3d9f781525dab37806e54 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 15 Aug 2012 18:48:11 +0800 Subject: [PATCH] clean code; add support for letter-spacing, word-spacing, horizontal-scale and rise --- lib/all.css | 2 +- src/HTMLRenderer.h | 69 ++++++++++-- src/HTMLRenderer/export.cc | 22 +++- src/HTMLRenderer/general.cc | 22 +++- src/HTMLRenderer/install.cc | 57 +++++++--- src/HTMLRenderer/state.cc | 215 ++++++++++++++++++++++++------------ src/HTMLRenderer/text.cc | 20 ++-- src/util.h | 9 +- 8 files changed, 287 insertions(+), 129 deletions(-) diff --git a/lib/all.css b/lib/all.css index 5317c33..8234d55 100644 --- a/lib/all.css +++ b/lib/all.css @@ -21,7 +21,7 @@ position:absolute; white-space:pre; } -.l > .w { +.l > ._ { display:inline-block; font-family: monospace; } diff --git a/src/HTMLRenderer.h b/src/HTMLRenderer.h index 10a99c4..be4a94f 100644 --- a/src/HTMLRenderer.h +++ b/src/HTMLRenderer.h @@ -39,16 +39,18 @@ * * p - Page * l - Line - * w - White space + * _ - white space * i - Image * * Reusable CSS classes * + * t - Transform matrix * f - Font (also for font names) * s - font Size - * w - White space - * t - Transform matrix + * l - Letter spacing + * w - Word spacing * c - Color + * _ - white space * */ @@ -93,13 +95,26 @@ class HTMLRenderer : public OutputDev * We just mark as changed, and recheck if they have been changed when we are about to output a new string */ virtual void updateAll(GfxState * state); - virtual void updateFont(GfxState * state); - virtual void updateTextMat(GfxState * state); - virtual void updateCTM(GfxState * state, double m11, double m12, double m21, double m22, double m31, double m32); + + virtual void updateRise(GfxState * state); virtual void updateTextPos(GfxState * state); virtual void updateTextShift(GfxState * state, double shift); + + virtual void updateFont(GfxState * state); + virtual void updateCTM(GfxState * state, double m11, double m12, double m21, double m22, double m31, double m32); + virtual void updateTextMat(GfxState * state); + virtual void updateHorizScaling(GfxState * state); + + virtual void updateCharSpace(GfxState * state); + virtual void updateWordSpace(GfxState * state); + virtual void updateFillColor(GfxState * state); + + /* + * Rendering + */ + virtual void drawString(GfxState * state, GooString * s); virtual void drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg); @@ -121,9 +136,11 @@ class HTMLRenderer : public OutputDev void install_external_font (GfxFont * font, long long fn_id); long long install_font_size(double font_size); - long long install_whitespace(double ws_width, double & actual_width); long long install_transform_matrix(const double * tm); + long long install_letter_space(double letter_space); + long long install_word_space(double word_space); long long install_color(const GfxRGB * rgb); + long long install_whitespace(double ws_width, double & actual_width); //////////////////////////////////////////////////// // export css styles @@ -136,9 +153,11 @@ class HTMLRenderer : public OutputDev void export_remote_default_font(long long fn_id); void export_local_font(long long fn_id, GfxFont * font, const std::string & original_font_name, const std::string & cssfont); void export_font_size(long long fs_id, double font_size); - void export_whitespace(long long ws_id, double ws_width); void export_transform_matrix(long long tm_id, const double * tm); + void export_letter_space(long long ls_id, double letter_space); + void export_word_space(long long ws_id, double word_space); void export_color(long long color_id, const GfxRGB * rgb); + void export_whitespace(long long ws_id, double ws_width); //////////////////////////////////////////////////// // state tracking @@ -165,26 +184,46 @@ class HTMLRenderer : public OutputDev //////////////////////////////////////////////////// // if we have a pending opened line bool line_opened; - + + // The order is according to the appearance in check_state_change // any state changed bool all_changed; + // rise + double cur_rise; + bool rise_changed; // current position double cur_tx, cur_ty; // real text position, in text coords bool text_pos_changed; + // font & size long long cur_fn_id; double cur_font_size; long long cur_fs_id; bool font_changed; + // transform matrix long long cur_tm_id; bool ctm_changed; bool text_mat_changed; - + // horizontal scaling + bool hori_scale_changed; // this is CTM * TextMAT in PDF, not only CTM - // [4] and [5] are ignored, we'll calculate the position of the origin separately + // [4] and [5] are ignored, + // as we'll calculate the position of the origin separately + // TODO: changed this for images double cur_ctm[6]; // unscaled + // letter spacing + long long cur_ls_id; + double cur_letter_space; + bool letter_space_changed; + + // word spacing + long long cur_ws_id; + double cur_word_space; + bool word_space_changed; + + // color long long cur_color_id; GfxRGB cur_color; bool color_changed; @@ -208,10 +247,16 @@ class HTMLRenderer : public OutputDev std::unordered_map font_name_map; std::map font_size_map; - std::map whitespace_map; + std::map transform_matrix_map; + + std::map letter_space_map; + std::map word_space_map; + std::map color_map; + std::map whitespace_map; + int image_count; const Param * param; diff --git a/src/HTMLRenderer/export.cc b/src/HTMLRenderer/export.cc index d15e0d4..ad477ef 100644 --- a/src/HTMLRenderer/export.cc +++ b/src/HTMLRenderer/export.cc @@ -94,11 +94,6 @@ void HTMLRenderer::export_font_size (long long fs_id, double font_size) allcss_fout << format(".s%|1$x|{font-size:%2%px;}") % fs_id % font_size << endl; } -void HTMLRenderer::export_whitespace (long long ws_id, double ws_width) -{ - allcss_fout << format(".w%|1$x|{width:%2%px;}") % ws_id % ws_width << endl; -} - void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm) { allcss_fout << format(".t%|1$x|{") % tm_id; @@ -128,10 +123,25 @@ void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm) allcss_fout << "}" << endl; } -void HTMLRenderer::export_color(long long color_id, const GfxRGB * rgb) +void HTMLRenderer::export_letter_space (long long ls_id, double letter_space) +{ + allcss_fout << format(".l%|1$x|{letter-spacing:%2%px;}") % ls_id % letter_space << endl; +} + +void HTMLRenderer::export_word_space (long long ws_id, double word_space) +{ + allcss_fout << format(".w%|1$x|{word-spacing:%2%px;}") % ws_id % word_space << endl; +} + +void HTMLRenderer::export_color (long long color_id, const GfxRGB * rgb) { allcss_fout << format(".c%|1$x|{color:rgb(%2%,%3%,%4%);}") % color_id % (int)colToByte(rgb->r) % (int)colToByte(rgb->g) % (int)colToByte(rgb->b) << endl; } +void HTMLRenderer::export_whitespace (long long ws_id, double ws_width) +{ + allcss_fout << format("._%|1$x|{width:%2%px;}") % ws_id % ws_width << endl; +} + diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 52909db..8842650 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -30,6 +30,9 @@ HTMLRenderer::HTMLRenderer(const Param * param) install_font_size(0); install_transform_matrix(id_matrix); + + install_letter_space(0); + install_word_space(0); GfxRGB black; black.r = black.g = black.b = 0; @@ -158,18 +161,27 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state) html_fout << format(");background-position:0 0;background-size:%1%px %2%px;background-repeat:no-repeat;\">") % pageWidth % pageHeight; - cur_fn_id = cur_fs_id = cur_tm_id = cur_color_id = 0; - cur_tx = cur_ty = 0; - cur_font_size = 0; + cur_rise = 0; + cur_fn_id = cur_fs_id = 0; + cur_font_size = 0; + + cur_tm_id = 0; memcpy(cur_ctm, id_matrix, sizeof(cur_ctm)); + + cur_ls_id = cur_ws_id = 0; + cur_letter_space = cur_word_space = 0; + + cur_color_id = 0; + cur_color.r = cur_color.g = cur_color.b = 0; + + cur_tx = cur_ty = 0; + memcpy(draw_ctm, id_matrix, sizeof(draw_ctm)); draw_font_size = 0; draw_scale = 1.0; draw_tx = draw_ty = 0; - cur_color.r = cur_color.g = cur_color.b = 0; - reset_state_track(); } diff --git a/src/HTMLRenderer/install.cc b/src/HTMLRenderer/install.cc index 5af6852..d258c92 100644 --- a/src/HTMLRenderer/install.cc +++ b/src/HTMLRenderer/install.cc @@ -237,22 +237,6 @@ long long HTMLRenderer::install_font_size(double font_size) return new_fs_id; } -long long HTMLRenderer::install_whitespace(double ws_width, double & actual_width) -{ - auto iter = whitespace_map.lower_bound(ws_width - param->h_eps); - if((iter != whitespace_map.end()) && (abs(iter->first - ws_width) < param->h_eps)) - { - actual_width = iter->first; - return iter->second; - } - - actual_width = ws_width; - long long new_ws_id = whitespace_map.size(); - whitespace_map.insert(make_pair(ws_width, new_ws_id)); - export_whitespace(new_ws_id, ws_width); - return new_ws_id; -} - long long HTMLRenderer::install_transform_matrix(const double * tm) { TM m(tm); @@ -266,6 +250,30 @@ long long HTMLRenderer::install_transform_matrix(const double * tm) return new_tm_id; } +long long HTMLRenderer::install_letter_space(double letter_space) +{ + auto iter = letter_space_map.lower_bound(letter_space - EPS); + if((iter != letter_space_map.end()) && (_equal(iter->first, letter_space))) + return iter->second; + + long long new_ls_id = letter_space_map.size(); + letter_space_map.insert(make_pair(letter_space, new_ls_id)); + export_letter_space(new_ls_id, letter_space); + return new_ls_id; +} + +long long HTMLRenderer::install_word_space(double word_space) +{ + auto iter = word_space_map.lower_bound(word_space - EPS); + if((iter != word_space_map.end()) && (_equal(iter->first, word_space))) + return iter->second; + + long long new_ws_id = word_space_map.size(); + word_space_map.insert(make_pair(word_space, new_ws_id)); + export_word_space(new_ws_id, word_space); + return new_ws_id; +} + long long HTMLRenderer::install_color(const GfxRGB * rgb) { const GfxRGB & c = *rgb; @@ -279,3 +287,20 @@ long long HTMLRenderer::install_color(const GfxRGB * rgb) return new_color_id; } +long long HTMLRenderer::install_whitespace(double ws_width, double & actual_width) +{ + // ws_width is already mulitpled by draw_scale + auto iter = whitespace_map.lower_bound(ws_width - param->h_eps); + if((iter != whitespace_map.end()) && (abs(iter->first - ws_width) < param->h_eps)) + { + actual_width = iter->first; + return iter->second; + } + + actual_width = ws_width; + long long new_ws_id = whitespace_map.size(); + whitespace_map.insert(make_pair(ws_width, new_ws_id)); + export_whitespace(new_ws_id, ws_width); + return new_ws_id; +} + diff --git a/src/HTMLRenderer/state.cc b/src/HTMLRenderer/state.cc index cb81d70..d5d04d1 100644 --- a/src/HTMLRenderer/state.cc +++ b/src/HTMLRenderer/state.cc @@ -11,34 +11,97 @@ #include "HTMLRenderer.h" #include "namespace.h" +void HTMLRenderer::updateAll(GfxState * state) +{ + all_changed = true; + updateTextPos(state); +} +void HTMLRenderer::updateRise(GfxState * state) +{ + rise_changed = true; +} +void HTMLRenderer::updateTextPos(GfxState * state) +{ + text_pos_changed = true; + cur_tx = state->getLineX(); + cur_ty = state->getLineY(); +} +void HTMLRenderer::updateTextShift(GfxState * state, double shift) +{ + text_pos_changed = true; + cur_tx -= shift * 0.001 * state->getFontSize() * state->getHorizScaling(); +} +void HTMLRenderer::updateFont(GfxState * state) +{ + font_changed = true; +} +void HTMLRenderer::updateCTM(GfxState * state, double m11, double m12, double m21, double m22, double m31, double m32) +{ + ctm_changed = true; +} +void HTMLRenderer::updateTextMat(GfxState * state) +{ + text_mat_changed = true; +} +void HTMLRenderer::updateHorizScaling(GfxState * state) +{ + hori_scale_changed = true; +} +void HTMLRenderer::updateCharSpace(GfxState * state) +{ + letter_space_changed = true; +} + +void HTMLRenderer::updateWordSpace(GfxState * state) +{ + word_space_changed = true; +} + +void HTMLRenderer::updateFillColor(GfxState * state) +{ + color_changed = true; +} void HTMLRenderer::check_state_change(GfxState * state) { + // DEPENDENCY WARNING + // don't adjust the order of state checking + bool close_line = false; + bool need_recheck_position = false; + bool need_rescale_font = false; + + // rise + if(all_changed || rise_changed) + { + double new_rise = state->getRise(); + if(!_equal(cur_rise, new_rise)) + { + need_recheck_position = true; + cur_rise = new_rise; + } + } + + // text position + // we've been tracking the text position positively in update... function if(all_changed || text_pos_changed) { - if(!(abs(cur_ty - draw_ty) * draw_scale < param->v_eps)) - { - close_line = true; - draw_ty = cur_ty; - draw_tx = cur_tx; - } + need_rescale_font = true; } - // TODO, we may use nested span if only color has been changed - if(all_changed || color_changed) + // draw_tx, draw_ty + // depends: rise & text position + if(need_recheck_position) { - GfxRGB new_color; - state->getFillRGB(&new_color); - if(!((new_color.r == cur_color.r) && (new_color.g == cur_color.g) && (new_color.b == cur_color.b))) + // it's ok to use the old draw_scale + // should draw_scale be updated, we'll close the line anyway + if(!(abs((cur_ty + cur_rise) - draw_ty) * draw_scale < param->v_eps)) { close_line = true; - cur_color = new_color; - cur_color_id = install_color(&new_color); } } - bool need_rescale_font = false; + // font name & size if(all_changed || font_changed) { long long new_fn_id = install_font(state->getFont()); @@ -49,22 +112,25 @@ void HTMLRenderer::check_state_change(GfxState * state) cur_fn_id = new_fn_id; } - if(!_equal(cur_font_size, state->getFontSize())) + double new_font_size = state->getFontSize(); + if(!_equal(cur_font_size, new_font_size)) { - cur_font_size = state->getFontSize(); need_rescale_font = true; + cur_font_size = new_font_size; } } - // TODO - // Rise, HorizScale etc - if(all_changed || text_mat_changed || ctm_changed) + // ctm & text ctm & hori scale + if(all_changed || ctm_changed || text_mat_changed || hori_scale_changed) { double new_ctm[6]; - double * m1 = state->getCTM(); - double * m2 = state->getTextMat(); - new_ctm[0] = m1[0] * m2[0] + m1[2] * m2[1]; - new_ctm[1] = m1[1] * m2[0] + m1[3] * m2[1]; + + const double * m1 = state->getCTM(); + const double * m2 = state->getTextMat(); + double hori_scale = state->getHorizScaling(); + + new_ctm[0] = (m1[0] * m2[0] + m1[2] * m2[1]) * hori_scale; + new_ctm[1] = (m1[1] * m2[0] + m1[3] * m2[1]) * hori_scale; new_ctm[2] = m1[0] * m2[2] + m1[2] * m2[3]; new_ctm[3] = m1[1] * m2[2] + m1[3] * m2[3]; new_ctm[4] = new_ctm[5] = 0; @@ -76,6 +142,8 @@ void HTMLRenderer::check_state_change(GfxState * state) } } + // draw_ctm, draw_scale + // depends: font size & ctm & text_ctm & hori scale if(need_rescale_font) { double new_draw_ctm[6]; @@ -97,24 +165,58 @@ void HTMLRenderer::check_state_change(GfxState * state) if(!(_equal(new_draw_font_size, draw_font_size))) { + close_line = true; draw_font_size = new_draw_font_size; cur_fs_id = install_font_size(draw_font_size); - close_line = true; } if(!(_tm_equal(new_draw_ctm, draw_ctm))) { + close_line = true; memcpy(draw_ctm, new_draw_ctm, sizeof(draw_ctm)); cur_tm_id = install_transform_matrix(draw_ctm); - close_line = true; } } - // TODO: track these - /* - if(!(_equal(s1->getCharSpace(), s2->getCharSpace()) && _equal(s1->getWordSpace(), s2->getWordSpace()) - && _equal(s1->getHorizScaling(), s2->getHorizScaling()))) - return false; - */ + // letter space + // depends: draw_scale + if(all_changed || letter_space_changed) + { + double new_letter_space = state->getCharSpace(); + if(!_equal(cur_letter_space, new_letter_space)) + { + close_line = true; + cur_letter_space = new_letter_space; + cur_ls_id = install_letter_space(cur_letter_space * draw_scale); + } + } + + // word space + // depends draw_scale + if(all_changed || word_space_changed) + { + double new_word_space = state->getWordSpace(); + if(!_equal(cur_word_space, new_word_space)) + { + close_line = true; + cur_word_space = new_word_space; + cur_ws_id = install_word_space(cur_word_space * draw_scale); + } + } + + // TODO, we may use nested span if only color is changed + + // color + if(all_changed || color_changed) + { + GfxRGB new_color; + state->getFillRGB(&new_color); + if(!((new_color.r == cur_color.r) && (new_color.g == cur_color.g) && (new_color.b == cur_color.b))) + { + close_line = true; + cur_color = new_color; + cur_color_id = install_color(&new_color); + } + } reset_state_track(); @@ -124,10 +226,18 @@ void HTMLRenderer::check_state_change(GfxState * state) void HTMLRenderer::reset_state_track() { all_changed = false; + + rise_changed = false; text_pos_changed = false; + + font_changed = false; ctm_changed = false; text_mat_changed = false; - font_changed = false; + hori_scale_changed = false; + + letter_space_changed = false; + word_space_changed = false; + color_changed = false; } void HTMLRenderer::close_cur_line() @@ -137,43 +247,8 @@ void HTMLRenderer::close_cur_line() html_fout << "" << endl; line_opened = false; } + + draw_ty = cur_ty + cur_rise; + draw_tx = cur_tx; } -void HTMLRenderer::updateAll(GfxState * state) -{ - all_changed = true; - updateTextPos(state); -} - -void HTMLRenderer::updateFont(GfxState * state) -{ - font_changed = true; -} - -void HTMLRenderer::updateTextMat(GfxState * state) -{ - text_mat_changed = true; -} - -void HTMLRenderer::updateCTM(GfxState * state, double m11, double m12, double m21, double m22, double m31, double m32) -{ - ctm_changed = true; -} - -void HTMLRenderer::updateTextPos(GfxState * state) -{ - text_pos_changed = true; - cur_tx = state->getLineX(); - cur_ty = state->getLineY(); -} - -void HTMLRenderer::updateTextShift(GfxState * state, double shift) -{ - text_pos_changed = true; - cur_tx -= shift * 0.001 * state->getFontSize() * state->getHorizScaling(); -} - -void HTMLRenderer::updateFillColor(GfxState * state) -{ - color_changed = true; -} diff --git a/src/HTMLRenderer/text.cc b/src/HTMLRenderer/text.cc index f680c27..c14ba2d 100644 --- a/src/HTMLRenderer/text.cc +++ b/src/HTMLRenderer/text.cc @@ -166,7 +166,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s) { double w; auto wid = install_whitespace(target, w); - html_fout << format(" ") % wid; + html_fout << format(" ") % wid; draw_tx += w / draw_scale; } } @@ -188,6 +188,13 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s) // "t0" is the id_matrix if(cur_tm_id != 0) html_fout << format(" t%|1$x|") % cur_tm_id; + + if(cur_ls_id != 0) + html_fout << format(" l%|1$x|") % cur_ls_id; + + if(cur_ws_id != 0) + html_fout << format(" w%|1$x|") % cur_ws_id; + { double x,y; // in user space @@ -199,16 +206,8 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s) << "left:" << x << "px;" ; } - - // TODO: tracking - // letter & word spacing - if(_is_positive(state->getCharSpace())) - html_fout << "letter-spacing:" << state->getCharSpace() << "px;"; - if(_is_positive(state->getWordSpace())) - html_fout << "word-spacing:" << state->getWordSpace() << "px;"; //debug - //real pos & hori_scale if(0) { #if 0 @@ -223,11 +222,8 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s) html_fout << "\">"; line_opened = true; - - draw_tx = cur_tx; } - // Now ready to output // get the unicodes char *p = s->getCString(); diff --git a/src/util.h b/src/util.h index 2123c0c..df3110d 100644 --- a/src/util.h +++ b/src/util.h @@ -122,13 +122,8 @@ class base64stream { public: - base64stream(istream & in) - : in(&in) - { } - - base64stream(istream && in) - : in(&in) - { } + base64stream(istream & in) : in(&in) { } + base64stream(istream && in) : in(&in) { } ostream & dumpto(ostream & out) {