From ab2588a5fda36772718e18bcc064d69c17f13e0f Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Fri, 5 Apr 2013 21:53:34 +0800 Subject: [PATCH] merge sub/sup into one line --- src/HTMLRenderer/HTMLRenderer.h | 6 ++- src/HTMLRenderer/TextLineBuffer.cc | 76 +++++++++++++++++++++--------- src/HTMLRenderer/TextLineBuffer.h | 3 +- src/HTMLRenderer/general.cc | 10 ++-- src/HTMLRenderer/state.cc | 34 ++++++++++++- src/css_class_names.cmakelists.txt | 40 ++++++++-------- src/util/StateManager.h | 4 +- src/util/css_const.h.in | 2 +- 8 files changed, 121 insertions(+), 54 deletions(-) diff --git a/src/HTMLRenderer/HTMLRenderer.h b/src/HTMLRenderer/HTMLRenderer.h index a3d2aa1..0f9bbed 100644 --- a/src/HTMLRenderer/HTMLRenderer.h +++ b/src/HTMLRenderer/HTMLRenderer.h @@ -50,7 +50,9 @@ struct HTMLState Color stroke_color; double letter_space; double word_space; - double rise; + + // relative to the previous state + double vertical_align; double x,y; double transform_matrix[4]; @@ -337,6 +339,7 @@ protected: // managers store values actually used in HTML (i.e. scaled) //////////////////////////////////////////////// TransformMatrixManager transform_matrix_manager; + VerticalAlignManager vertical_align_manager; StrokeColorManager stroke_color_manager; LetterSpaceManager letter_space_manager; WhitespaceManager whitespace_manager; @@ -346,7 +349,6 @@ protected: BottomManager bottom_manager; HeightManager height_manager; WidthManager width_manager; - RiseManager rise_manager; LeftManager left_manager; //////////////////////////////////////////////// BGImageSizeManager bgimage_size_manager; diff --git a/src/HTMLRenderer/TextLineBuffer.cc b/src/HTMLRenderer/TextLineBuffer.cc index 60e2232..6ae591c 100644 --- a/src/HTMLRenderer/TextLineBuffer.cc +++ b/src/HTMLRenderer/TextLineBuffer.cc @@ -91,22 +91,14 @@ void HTMLRenderer::TextLineBuffer::flush(void) ostream & out = renderer->f_pages.fs; { // max_ascent determines the height of the div + double accum_vertical_align = 0; // accumulated double max_ascent = 0; for(auto iter = states.begin(); iter != states.end(); ++iter) { - double cur_ascent = iter->rise + iter->font_info->ascent * iter->font_size; + accum_vertical_align += iter->vertical_align; + double cur_ascent = accum_vertical_align + iter->font_info->ascent * iter->font_size; if(cur_ascent > max_ascent) max_ascent = cur_ascent; - - // set id - iter->ids[State::FONT_ID] = iter->font_info->id; - iter->ids[State::FONT_SIZE_ID] = renderer->font_size_manager .install(iter->font_size); - iter->ids[State::FILL_COLOR_ID] = renderer->fill_color_manager .install(iter->fill_color); - iter->ids[State::STROKE_COLOR_ID] = renderer->stroke_color_manager.install(iter->stroke_color); - iter->ids[State::LETTER_SPACE_ID] = renderer->letter_space_manager.install(iter->letter_space); - iter->ids[State::WORD_SPACE_ID] = renderer->word_space_manager .install(iter->word_space); - iter->ids[State::RISE_ID] = renderer->rise_manager .install(iter->rise); - iter->hash(); } // open
for the current text line @@ -138,20 +130,36 @@ void HTMLRenderer::TextLineBuffer::flush(void) { // export current state, find a closest parent { + // set id + state_iter1->ids[State::FONT_ID] = state_iter1->font_info->id; + state_iter1->ids[State::FONT_SIZE_ID] = renderer->font_size_manager .install(state_iter1->font_size); + state_iter1->ids[State::FILL_COLOR_ID] = renderer->fill_color_manager .install(state_iter1->fill_color); + state_iter1->ids[State::STROKE_COLOR_ID] = renderer->stroke_color_manager .install(state_iter1->stroke_color); + state_iter1->ids[State::LETTER_SPACE_ID] = renderer->letter_space_manager .install(state_iter1->letter_space); + state_iter1->ids[State::WORD_SPACE_ID] = renderer->word_space_manager .install(state_iter1->word_space); + state_iter1->hash(); + // greedy - int best_cost = State::ID_COUNT; + double vertical_align = state_iter1->vertical_align; + int best_cost = State::HASH_ID_COUNT + 1; // we have a nullptr at the beginning, so no need to check for rend for(auto iter = stack.rbegin(); *iter; ++iter) { int cost = state_iter1->diff(**iter); + if(!equal(vertical_align,0)) + ++cost; + if(cost < best_cost) { while(stack.back() != *iter) { + state_iter1->vertical_align += stack.back()->vertical_align; + stack.back()->end(out); stack.pop_back(); } best_cost = cost; + state_iter1->vertical_align = vertical_align; if(best_cost == 0) break; @@ -160,7 +168,11 @@ void HTMLRenderer::TextLineBuffer::flush(void) // cannot go further if((*iter)->start_idx <= last_text_pos_with_negative_offset) break; + + vertical_align += (*iter)->vertical_align; } + // + state_iter1->ids[State::VERTICAL_ALIGN_ID] = renderer->vertical_align_manager.install(state_iter1->vertical_align); // export the diff between *state_iter1 and stack.back() state_iter1->begin(out, stack.back()); stack.push_back(&*state_iter1); @@ -338,9 +350,8 @@ void HTMLRenderer::TextLineBuffer::optimize() } } - // now we would like to adjust letter space to most_used width - // we shall apply the optimization only when it can significantly reduce the number of elements - if(max_count <= text_count / 2) + // negative letter space may cause problems + if(!is_positive(state_iter1->letter_space + most_used_width)) { // the old value is the best // just copy old offsets @@ -348,6 +359,8 @@ void HTMLRenderer::TextLineBuffer::optimize() } else { + // now we would like to adjust letter space to most_used width + // install new letter space const double old_ls = state_iter1->letter_space; state_iter1->ids[State::LETTER_SPACE_ID] = ls_manager.install(old_ls + most_used_width, &(state_iter1->letter_space)); @@ -439,7 +452,7 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr { long long cur_mask = 0xff; bool first = true; - for(int i = 0; i < ID_COUNT; ++i, cur_mask<<=8) + for(int i = 0; i < HASH_ID_COUNT; ++i, cur_mask<<=8) { if(hash_umask & cur_mask) // we don't care about this ID { @@ -463,10 +476,8 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr case WORD_SPACE_ID: word_space = prev_state->word_space; break; - case RISE_ID: - rise = prev_state->rise; - break; default: + cerr << "unexpected state mask" << endl; break; } } @@ -495,6 +506,28 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr else out << ids[i]; } + // veritcal align + if(!equal(vertical_align, 0)) + { + // so we have to dump it + if(first) + { + out << " left there + // it is not necessary to output vertical align long long cur_mask = 0xff; - for(int i = 0; i < ID_COUNT; ++i, cur_mask<<=8) + for(int i = 0; i < HASH_ID_COUNT; ++i, cur_mask<<=8) { if(hash_umask & cur_mask) // we don't care about this ID continue; @@ -591,7 +625,7 @@ const char * const HTMLRenderer::TextLineBuffer::State::css_class_names [] = { CSS::STROKE_COLOR_CN, CSS::LETTER_SPACE_CN, CSS::WORD_SPACE_CN, - CSS::RISE_CN + CSS::VERTICAL_ALIGN_CN, }; } //namespace pdf2htmlEX diff --git a/src/HTMLRenderer/TextLineBuffer.h b/src/HTMLRenderer/TextLineBuffer.h index 943176c..51f8699 100644 --- a/src/HTMLRenderer/TextLineBuffer.h +++ b/src/HTMLRenderer/TextLineBuffer.h @@ -41,8 +41,9 @@ public: STROKE_COLOR_ID, LETTER_SPACE_ID, WORD_SPACE_ID, - RISE_ID, + HASH_ID_COUNT, + VERTICAL_ALIGN_ID = HASH_ID_COUNT, ID_COUNT }; diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 77c8a8c..cfc6011 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -61,9 +61,9 @@ HTMLRenderer::HTMLRenderer(const Param * param) * or may be handled well (whitespace_manager) * So we can set a large eps here */ - rise_manager .set_eps(param->v_eps); - whitespace_manager .set_eps(param->h_eps); - left_manager .set_eps(param->h_eps); + vertical_align_manager.set_eps(param->v_eps); + whitespace_manager .set_eps(param->h_eps); + left_manager .set_eps(param->h_eps); /* * For othere states, we need accurate values * optimization will be done separately @@ -446,6 +446,7 @@ void HTMLRenderer::set_stream_flags(std::ostream & out) void HTMLRenderer::dump_css (void) { transform_matrix_manager.dump_css(f_css.fs); + vertical_align_manager .dump_css(f_css.fs); letter_space_manager .dump_css(f_css.fs); stroke_color_manager .dump_css(f_css.fs); word_space_manager .dump_css(f_css.fs); @@ -455,7 +456,6 @@ void HTMLRenderer::dump_css (void) bottom_manager .dump_css(f_css.fs); height_manager .dump_css(f_css.fs); width_manager .dump_css(f_css.fs); - rise_manager .dump_css(f_css.fs); left_manager .dump_css(f_css.fs); bgimage_size_manager .dump_css(f_css.fs); @@ -463,6 +463,7 @@ void HTMLRenderer::dump_css (void) double ps = print_scale(); f_css.fs << CSS::PRINT_ONLY << "{" << endl; transform_matrix_manager.dump_print_css(f_css.fs, ps); + vertical_align_manager .dump_print_css(f_css.fs, ps); letter_space_manager .dump_print_css(f_css.fs, ps); stroke_color_manager .dump_print_css(f_css.fs, ps); word_space_manager .dump_print_css(f_css.fs, ps); @@ -472,7 +473,6 @@ void HTMLRenderer::dump_css (void) bottom_manager .dump_print_css(f_css.fs, ps); height_manager .dump_print_css(f_css.fs, ps); width_manager .dump_print_css(f_css.fs, ps); - rise_manager .dump_print_css(f_css.fs, ps); left_manager .dump_print_css(f_css.fs, ps); bgimage_size_manager .dump_print_css(f_css.fs, ps); f_css.fs << "}" << endl; diff --git a/src/HTMLRenderer/state.cc b/src/HTMLRenderer/state.cc index 2072a92..b4d4015 100644 --- a/src/HTMLRenderer/state.cc +++ b/src/HTMLRenderer/state.cc @@ -101,7 +101,7 @@ void HTMLRenderer::reset_state() cur_html_state.stroke_color.transparent = true; cur_html_state.letter_space = 0; cur_html_state.word_space = 0; - cur_html_state.rise = 0; + cur_html_state.vertical_align = 0; cur_html_state.x = 0; cur_html_state.y = 0; memcpy(cur_html_state.transform_matrix, ID_MATRIX, sizeof(cur_html_state.transform_matrix)); @@ -148,6 +148,9 @@ void HTMLRenderer::check_state_change(GfxState * state) need_recheck_position = true; } + // save current info for later use + auto old_font_info = cur_html_state.font_info; + double old_font_size = cur_html_state.font_size; // font name & size if(all_changed || font_changed) { @@ -306,11 +309,28 @@ void HTMLRenderer::check_state_change(GfxState * state) inverted[3] = old_tm[0] / det; dx = inverted[0] * lhs1 + inverted[2] * lhs2; dy = inverted[1] * lhs1 + inverted[3] * lhs2; - // currently we merge only text on a same horizontal line if(equal(dy, 0)) { + // text on a same horizontal line, we can insert positive or negaive x-offsets merged = true; } + else + { + // otherwise we merge the lines only when + // - text are not shifted to the left too much + // - text are not moved too high or too low + if((dx * draw_text_scale) >= -(old_font_info->ascent - old_font_info->descent) * old_font_size - EPS) + { + double oldymin = old_font_info->descent * old_font_size; + double oldymax = old_font_info->ascent * old_font_size; + double ymin = dy * draw_text_scale + cur_html_state.font_info->descent * cur_html_state.font_size; + double ymax = dy * draw_text_scale + cur_html_state.font_info->ascent * cur_html_state.font_size; + if((ymin <= oldymax + EPS) && (ymax >= oldymin - EPS)) + { + merged = true; + } + } + } } //else no solution } @@ -319,6 +339,15 @@ void HTMLRenderer::check_state_change(GfxState * state) if(merged) { text_line_buf->append_offset(dx * draw_text_scale); + if(equal(dy, 0)) + { + cur_html_state.vertical_align = 0; + } + else + { + cur_html_state.vertical_align = (dy * draw_text_scale); + new_line_state = max(new_line_state, NLS_SPAN); + } draw_tx = cur_tx; draw_ty = cur_ty; } @@ -419,6 +448,7 @@ void HTMLRenderer::prepare_text_line(GfxState * state) // update position such that they will be recorded by text_line_buf state->transform(state->getCurX(), state->getCurY(), &cur_html_state.x, &cur_html_state.y); + cur_html_state.vertical_align = 0; //resync position draw_ty = cur_ty; diff --git a/src/css_class_names.cmakelists.txt b/src/css_class_names.cmakelists.txt index 0eb0ec7..0158e44 100644 --- a/src/css_class_names.cmakelists.txt +++ b/src/css_class_names.cmakelists.txt @@ -6,29 +6,29 @@ set(CSS_INVALID_ID "_") -set(CSS_LINE_CN "t") # text -set(CSS_TRANSFORM_MATRIX_CN "m") # matrix +set(CSS_LINE_CN "t") # Text +set(CSS_TRANSFORM_MATRIX_CN "m") # Matrix -set(CSS_PAGE_DECORATION_CN "pd") # page decoration -set(CSS_PAGE_FRAME_CN "pf") # page frame -set(CSS_PAGE_CONTENT_BOX_CN "pc") # page content -set(CSS_PAGE_DATA_CN "pi") # page info +set(CSS_PAGE_DECORATION_CN "pd") # Page Decoration +set(CSS_PAGE_FRAME_CN "pf") # Page Frame +set(CSS_PAGE_CONTENT_BOX_CN "pc") # Page Content +set(CSS_PAGE_DATA_CN "pi") # Page Info -set(CSS_BACKGROUND_IMAGE_CN "bi") # background image +set(CSS_BACKGROUND_IMAGE_CN "bi") # Background Image -set(CSS_FONT_FAMILY_CN "ff") # font family -set(CSS_FONT_SIZE_CN "fs") # font size +set(CSS_FONT_FAMILY_CN "ff") # Font Family +set(CSS_FONT_SIZE_CN "fs") # Font Size -set(CSS_FILL_COLOR_CN "fc") # fill color -set(CSS_STROKE_COLOR_CN "sc") # stroke color +set(CSS_FILL_COLOR_CN "fc") # Fill Color +set(CSS_STROKE_COLOR_CN "sc") # Stroke Color -set(CSS_LETTER_SPACE_CN "ls") # letter space -set(CSS_WORD_SPACE_CN "ws") # word space -set(CSS_RISE_CN "r") # rise +set(CSS_LETTER_SPACE_CN "ls") # Letter Space +set(CSS_WORD_SPACE_CN "ws") # Word Space +set(CSS_VERTICAL_ALIGN_CN "v") # Vertial align set(CSS_WHITESPACE_CN "_") # whitespace -set(CSS_LEFT_CN "x") # x -set(CSS_HEIGHT_CN "h") # height -set(CSS_WIDTH_CN "w") # width -set(CSS_BOTTTOM_CN "y") # y -set(CSS_CSS_DRAW_CN "d") # draw -set(CSS_LINK_CN "l") # link +set(CSS_LEFT_CN "x") # X +set(CSS_HEIGHT_CN "h") # Height +set(CSS_WIDTH_CN "w") # Width +set(CSS_BOTTTOM_CN "y") # Y +set(CSS_CSS_DRAW_CN "d") # Draw +set(CSS_LINK_CN "l") # Link diff --git a/src/util/StateManager.h b/src/util/StateManager.h index 87dca8c..ca65d8d 100644 --- a/src/util/StateManager.h +++ b/src/util/StateManager.h @@ -231,10 +231,10 @@ public: void dump_print_value(std::ostream & out, double value, double scale) { out << "word-spacing:" << round(value*scale) << "pt;"; } }; -class RiseManager : public StateManager +class VerticalAlignManager : public StateManager { public: - static const char * get_css_class_name (void) { return CSS::RISE_CN; } + static const char * get_css_class_name (void) { return CSS::VERTICAL_ALIGN_CN; } double default_value(void) { return 0; } void dump_value(std::ostream & out, double value) { out << "vertical-align:" << round(value) << "px;"; } void dump_print_value(std::ostream & out, double value, double scale) { out << "vertical-align:" << round(value*scale) << "pt;"; } diff --git a/src/util/css_const.h.in b/src/util/css_const.h.in index ed3221d..260f898 100644 --- a/src/util/css_const.h.in +++ b/src/util/css_const.h.in @@ -43,7 +43,7 @@ const char * const FILL_COLOR_CN = "@CSS_FILL_COLOR_CN@"; const char * const STROKE_COLOR_CN = "@CSS_STROKE_COLOR_CN@"; const char * const LETTER_SPACE_CN = "@CSS_LETTER_SPACE_CN@"; const char * const WORD_SPACE_CN = "@CSS_WORD_SPACE_CN@"; -const char * const RISE_CN = "@CSS_RISE_CN@"; +const char * const VERTICAL_ALIGN_CN = "@CSS_VERTICAL_ALIGN_CN@"; const char * const WHITESPACE_CN = "@CSS_WHITESPACE_CN@"; const char * const LEFT_CN = "@CSS_LEFT_CN@"; const char * const HEIGHT_CN = "@CSS_HEIGHT_CN@";