From 495b04f0465815874f2f95dc9a2b14d3443ae175 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Sat, 30 Mar 2013 22:37:20 +0800 Subject: [PATCH] fix space optimization --- src/HTMLRenderer/TextLineBuffer.cc | 86 ++++++++-------- src/HTMLRenderer/TextLineBuffer.h | 2 +- src/HTMLRenderer/draw.cc | 2 +- src/HTMLRenderer/font.cc | 68 ++++++++----- src/HTMLRenderer/general.cc | 8 +- src/HTMLRenderer/link.cc | 2 +- src/HTMLRenderer/state.cc | 20 ++-- src/util/StateManager.h | 153 +++++++++++++++-------------- 8 files changed, 175 insertions(+), 166 deletions(-) diff --git a/src/HTMLRenderer/TextLineBuffer.cc b/src/HTMLRenderer/TextLineBuffer.cc index 78d92da..8a68fec 100644 --- a/src/HTMLRenderer/TextLineBuffer.cc +++ b/src/HTMLRenderer/TextLineBuffer.cc @@ -29,7 +29,7 @@ using std::endl; using std::find; using std::abs; -void HTMLRenderer::TextLineBuffer::reset(GfxState * state) +void HTMLRenderer::TextLineBuffer::set_pos(GfxState * state) { state->transform(state->getCurX(), state->getCurY(), &x, &y); tm_id = renderer->transform_matrix_manager.get_id(); @@ -94,15 +94,15 @@ void HTMLRenderer::TextLineBuffer::flush(void) offsets.push_back(Offset({text.size(), 0})); ostream & out = renderer->f_pages.fs; - renderer->height_manager.install(max_ascent); - renderer->left_manager .install(x); - renderer->bottom_manager.install(y); + long long hid = renderer->height_manager.install(max_ascent); + long long lid = renderer->left_manager .install(x); + long long bid = renderer->bottom_manager.install(y); out << "
left_manager .get_id() - << " " << CSS::HEIGHT_CN << renderer->height_manager.get_id() - << " " << CSS::BOTTOM_CN << renderer->bottom_manager.get_id() + << " " << CSS::LEFT_CN << lid + << " " << CSS::HEIGHT_CN << hid + << " " << CSS::BOTTOM_CN << bid << "\">"; auto cur_state_iter = states.begin(); @@ -180,10 +180,7 @@ void HTMLRenderer::TextLineBuffer::flush(void) if(!done) { - auto & wm = renderer->whitespace_manager; - wm.install(target); - auto wid = wm.get_id(); - actual_offset = wm.get_actual_value(); + long long wid = renderer->whitespace_manager.install(target, &actual_offset); if(!equal(actual_offset, 0)) { @@ -217,11 +214,9 @@ void HTMLRenderer::TextLineBuffer::flush(void) out << "
"; - states.clear(); offsets.clear(); text.clear(); - } void HTMLRenderer::TextLineBuffer::set_state (State & state) @@ -242,9 +237,6 @@ void HTMLRenderer::TextLineBuffer::set_state (State & state) void HTMLRenderer::TextLineBuffer::optimize(void) { - // need more work - return; - assert(!states.empty()); // set proper hash_umask @@ -291,43 +283,45 @@ void HTMLRenderer::TextLineBuffer::optimize(void) avg_width += iter->width; } } - avg_width /= posive_offset_count; - // now check if the width of offsets are close enough - // TODO: it might make more sense if the threshold is proportion to the font size - bool ok = true; - double accum_off = 0; - double orig_accum_off = 0; - for(auto iter = offsets.begin(); iter != offsets.end(); ++iter) + if(posive_offset_count > 0) { - orig_accum_off += iter->width; - accum_off += avg_width; - if(is_positive(iter->width) && abs(orig_accum_off - accum_off) >= renderer->param->h_eps) - { - ok = false; - break; - } - } - if(ok) - { - // ok, make all offsets equi-width + avg_width /= posive_offset_count; + + // now check if the width of offsets are close enough + // TODO: it might make more sense if the threshold is proportion to the font size + bool ok = true; + double accum_off = 0; + double orig_accum_off = 0; for(auto iter = offsets.begin(); iter != offsets.end(); ++iter) { - if(is_positive(iter->width)) - iter->width = avg_width; + orig_accum_off += iter->width; + accum_off += avg_width; + if(is_positive(iter->width) && abs(orig_accum_off - accum_off) >= renderer->param->h_eps) + { + ok = false; + break; + } } - // set new word_space - for(auto iter = states.begin(); iter != states.end(); ++iter) + if(ok) { - double new_word_space = avg_width - iter->single_space_offset() + iter->word_space; + // ok, make all offsets equi-width + for(auto iter = offsets.begin(); iter != offsets.end(); ++iter) + { + if(is_positive(iter->width)) + iter->width = avg_width; + } + // set new word_space + for(auto iter = states.begin(); iter != states.end(); ++iter) + { + iter->word_space = 0; + double new_word_space = avg_width - iter->single_space_offset(); - // install new word_space - // we might introduce more variance here - auto & wm = renderer->word_space_manager; - wm.install(new_word_space); - iter->ids[State::WORD_SPACE_ID] = wm.get_id(); - iter->word_space = wm.get_actual_value(); - iter->hash_umask &= (~word_space_umask); + // install new word_space + // we might introduce more variance here + iter->ids[State::WORD_SPACE_ID] = renderer->word_space_manager.install(new_word_space, &(iter->word_space)); + iter->hash_umask &= (~word_space_umask); + } } } } diff --git a/src/HTMLRenderer/TextLineBuffer.h b/src/HTMLRenderer/TextLineBuffer.h index c79bb24..53e3dab 100644 --- a/src/HTMLRenderer/TextLineBuffer.h +++ b/src/HTMLRenderer/TextLineBuffer.h @@ -69,7 +69,7 @@ public: double width; }; - void reset(GfxState * state); + void set_pos(GfxState * state); void append_unicodes(const Unicode * u, int l); void append_offset(double width); void append_state(void); diff --git a/src/HTMLRenderer/draw.cc b/src/HTMLRenderer/draw.cc index b0fc1f3..7c76e80 100644 --- a/src/HTMLRenderer/draw.cc +++ b/src/HTMLRenderer/draw.cc @@ -372,7 +372,7 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, co } } - transform_matrix_manager.install(new_tm); + transform_matrix_manager.update(new_tm); f_pages.fs << "
isCIDFont()) { font_8bit = dynamic_cast(font); - info.space_width = font_8bit->getWidth(' '); } else { font_cid = dynamic_cast(font); - char buf[2] = {0, ' '}; - info.space_width = font_cid->getWidth(buf, 2); } if(get_metric_only) @@ -343,47 +341,44 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo * Traverse all possible codes */ bool retried = false; // avoid infinite loop - for(int i = 0; i <= maxcode; ++i) + for(int cur_code = 0; cur_code <= maxcode; ++cur_code) { - if(!used_map[i]) + if(!used_map[cur_code]) continue; /* * Skip glyphs without names (only for non-ttf fonts) */ if(!is_truetype && (font_8bit != nullptr) - && (font_8bit->getCharName(i) == nullptr)) + && (font_8bit->getCharName(cur_code) == nullptr)) { continue; } - int k = i; + int mapped_code = cur_code; if(code2GID) { // for fonts with GID (e.g. TTF) we need to map GIDs instead of codes - if((k = code2GID[i]) == 0) continue; + if((mapped_code = code2GID[cur_code]) == 0) continue; } - if(k > max_key) - max_key = k; + if(mapped_code > max_key) + max_key = mapped_code; Unicode u, *pu=&u; if(info.use_tounicode) { - int n = ctu ? (ctu->mapToUnicode(i, &pu)) : 0; - u = check_unicode(pu, n, i, font); + int n = ctu ? (ctu->mapToUnicode(cur_code, &pu)) : 0; + u = check_unicode(pu, n, cur_code, font); } else { - u = unicode_from_font(i, font); + u = unicode_from_font(cur_code, font); } - if(u == ' ') - has_space = true; - if(codeset.insert(u).second) { - cur_mapping[k] = u; + cur_mapping[mapped_code] = u; } else { @@ -400,7 +395,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo //TODO: constant for the length memset(cur_mapping, -1, 0x10000 * sizeof(*cur_mapping)); memset(width_list, -1, 0x10000 * sizeof(*width_list)); - i = -1; + cur_code = -1; continue; } } @@ -412,16 +407,26 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo } } - if(font_8bit) { - width_list[k] = (int)floor(font_8bit->getWidth(i) * info.em_size + 0.5); - } - else - { - char buf[2]; - buf[0] = (i >> 8) & 0xff; - buf[1] = (i & 0xff); - width_list[k] = (int)floor(font_cid->getWidth(buf, 2) * info.em_size + 0.5); + double cur_width = 0; + if(font_8bit) + { + cur_width = font_8bit->getWidth(cur_code); + } + else + { + char buf[2]; + buf[0] = (cur_code >> 8) & 0xff; + buf[1] = (cur_code & 0xff); + cur_width = font_cid->getWidth(buf, 2) ; + } + width_list[mapped_code] = (int)floor(cur_width * info.em_size + 0.5); + + if(u == ' ') + { + has_space = true; + info.space_width = cur_width; + } } } @@ -434,6 +439,15 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo // Might be a problem if ' ' is in the font, but not empty if(!has_space) { + if(font_8bit) + { + info.space_width = font_8bit->getWidth(' '); + } + else + { + char buf[2] = {0, ' '}; + info.space_width = font_cid->getWidth(buf, 2); + } ffw_add_empty_char((int32_t)' ', (int)floor(info.space_width * info.em_size + 0.5)); } diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 9e93db4..196ba07 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -164,12 +164,12 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref) this->pageNum = pageNum; - width_manager.install(state->getPageWidth()); - height_manager.install(state->getPageHeight()); + long long wid = width_manager.install(state->getPageWidth()); + long long hid = height_manager.install(state->getPageHeight()); f_pages.fs << "
" << "
"; } - transform_matrix_manager.install(default_ctm); + transform_matrix_manager.update(default_ctm); f_pages.fs << "
(new_line_state, NLS_SPAN); } - if(transform_matrix_manager.install(new_draw_text_tm)) + if(transform_matrix_manager.update(new_draw_text_tm)) { new_line_state = max(new_line_state, NLS_DIV); } @@ -334,7 +334,7 @@ void HTMLRenderer::check_state_change(GfxState * state) // letter space // depends: draw_text_scale if((all_changed || letter_space_changed || draw_text_scale_changed) - && (letter_space_manager.install(state->getCharSpace() * draw_text_scale))) + && (letter_space_manager.update(state->getCharSpace() * draw_text_scale))) { new_line_state = max(new_line_state, NLS_SPAN); } @@ -342,7 +342,7 @@ void HTMLRenderer::check_state_change(GfxState * state) // word space // depends draw_text_scale if((all_changed || word_space_changed || draw_text_scale_changed) - && (word_space_manager.install(state->getWordSpace() * draw_text_scale))) + && (word_space_manager.update(state->getWordSpace() * draw_text_scale))) { new_line_state = max(new_line_state, NLS_SPAN); } @@ -360,11 +360,11 @@ void HTMLRenderer::check_state_change(GfxState * state) { GfxRGB new_color; state->getFillRGB(&new_color); - changed = fill_color_manager.install(new_color); + changed = fill_color_manager.update(new_color); } else { - changed = fill_color_manager.install_transparent(); + changed = fill_color_manager.update_transparent(); } if(changed) new_line_state = max(new_line_state, NLS_SPAN); @@ -384,11 +384,11 @@ void HTMLRenderer::check_state_change(GfxState * state) { GfxRGB new_color; state->getStrokeRGB(&new_color); - changed = stroke_color_manager.install(new_color); + changed = stroke_color_manager.update(new_color); } else { - changed = stroke_color_manager.install_transparent(); + changed = stroke_color_manager.update_transparent(); } if(changed) new_line_state = max(new_line_state, NLS_SPAN); @@ -397,7 +397,7 @@ void HTMLRenderer::check_state_change(GfxState * state) // rise // depends draw_text_scale if((all_changed || rise_changed || draw_text_scale_changed) - && (rise_manager.install(state->getRise() * draw_text_scale))) + && (rise_manager.update(state->getRise() * draw_text_scale))) { new_line_state = max(new_line_state, NLS_SPAN); } @@ -416,7 +416,7 @@ void HTMLRenderer::prepare_text_line(GfxState * state) { close_text_line(); - text_line_buf->reset(state); + text_line_buf->set_pos(state); //resync position draw_ty = cur_ty; diff --git a/src/util/StateManager.h b/src/util/StateManager.h index 5b3ff33..93dc6f0 100644 --- a/src/util/StateManager.h +++ b/src/util/StateManager.h @@ -38,23 +38,44 @@ public: // usually called at the beginning of a page void reset(void) { - _install(imp->default_value()); + cur_value = imp->default_value(); + cur_id = install(cur_value, &cur_actual_value); } /* - * install new_value if changed (equal() should be faster than map::lower_bound) + * update the current state, which will be installed automatically * return if the state has been indeed changed */ - bool install(double new_value) { - if(equal(new_value, value)) + bool update(double new_value) { + if(equal(new_value, cur_value)) return false; - _install(new_value); + cur_value = new_value; + cur_id = install(cur_value, &cur_actual_value); return true; } - long long get_id (void) const { return id; } - double get_value (void) const { return value; } - double get_actual_value (void) const { return actual_value; } + // install new_value into the map, but do not update the state + // return the corresponding id, and set + long long install(double new_value, double * actual_value_ptr = nullptr) { + auto iter = value_map.lower_bound(new_value - eps); + if((iter != value_map.end()) && (abs(iter->first - new_value) <= eps)) + { + if(actual_value_ptr != nullptr) + *actual_value_ptr = iter->first; + return iter->second; + } + + long long id = value_map.size(); + double v = value_map.insert(std::make_pair(new_value, id)).first->first; + if(actual_value_ptr != nullptr) + *actual_value_ptr = v; + return id; + } + + // get current state + long long get_id (void) const { return cur_id; } + double get_value (void) const { return cur_value; } + double get_actual_value (void) const { return cur_actual_value; } void dump_css(std::ostream & out) { for(auto iter = value_map.begin(); iter != value_map.end(); ++iter) @@ -75,34 +96,19 @@ public: } protected: - // this version of install does not check if value has been updated - // return if a new entry has been created - bool _install(double new_value) { - value = new_value; - - auto iter = value_map.lower_bound(new_value - eps); - if((iter != value_map.end()) && (abs(iter->first - value) <= eps)) - { - actual_value = iter->first; - id = iter->second; - return false; - } - - id = value_map.size(); - actual_value = value_map.insert(std::make_pair(new_value, id)).first->first; - return true; - } - double eps; Imp * imp; - long long id; - double value; // the value we are tracking - double actual_value; // the value we actually exported to HTML + long long cur_id; + double cur_value; // the value we are tracking + double cur_actual_value; // the value we actually exported to HTML std::map value_map; }; // Be careful about the mixed usage of Matrix and const double * +// the input is usually double *, which might be changed, so we have to copy the content out +// in the map we use Matrix instead of double * such that the array may be automatically release when deconstructign +// since the address of cur_value.m cannot be changed, we can export double * instead of Matrix template class StateManager { @@ -112,21 +118,24 @@ public: { } void reset(void) { - _install(imp->default_value()); + memcpy(cur_value.m, imp->default_value(), sizeof(cur_value.m)); + cur_id = install(cur_value); } // return if changed - bool install(const double * new_value) { + bool update(const double * new_value) { // For a transform matrix m // m[4] & m[5] have been taken care of - if(tm_equal(new_value, value.m, 4)) + if(tm_equal(new_value, cur_value.m, 4)) return false; - _install(new_value); + + memcpy(cur_value.m, new_value, sizeof(cur_value.m)); + cur_id = install(cur_value); return true; } - long long get_id (void) const { return id; } - const Matrix & get_value (void) const { return value; } + long long get_id (void) const { return cur_id; } + const double * get_value (void) const { return cur_value.m; } void dump_css(std::ostream & out) { for(auto iter = value_map.begin(); iter != value_map.end(); ++iter) @@ -140,26 +149,23 @@ public: void dump_print_css(std::ostream & out, double scale) {} protected: - // return if a new entry has been created - bool _install(const double * new_value) { - memcpy(value.m, new_value, sizeof(value.m)); - - auto iter = value_map.lower_bound(value); - if((iter != value_map.end()) && (tm_equal(value.m, iter->first.m, 4))) + // return id + long long install(const Matrix & new_value) { + auto iter = value_map.lower_bound(new_value); + if((iter != value_map.end()) && (tm_equal(new_value.m, iter->first.m, 4))) { - id = iter->second; - return false; + return iter->second; } - id = value_map.size(); - value_map.insert(std::make_pair(value, id)); - return true; + long long id = value_map.size(); + value_map.insert(std::make_pair(new_value, id)); + return id; } Imp * imp; - long long id; - Matrix value; + long long cur_id; + Matrix cur_value; class Matrix_less { @@ -177,6 +183,7 @@ protected: return false; } }; + std::map value_map; }; @@ -189,28 +196,31 @@ public: { } void reset(void) { - is_transparent = true; - id = -1; + cur_is_transparent = true; + cur_id = -1; } - bool install(const GfxRGB & new_value) { - if((!is_transparent) && gfxrgb_equal_obj(new_value, value)) + bool update(const GfxRGB & new_value) { + if((!cur_is_transparent) && gfxrgb_equal_obj(new_value, cur_value)) return false; - _install(new_value); + cur_value = new_value; + cur_is_transparent = false; + cur_id = install(cur_value); return true; } - bool install_transparent (void) { - if(is_transparent) + bool update_transparent (void) { + if(cur_is_transparent) return false; - _install_transparent(); + cur_is_transparent = true; + cur_id = -1; return true; } - long long get_id (void) const { return id; } - const GfxRGB & get_value (void) const { return value; } - bool get_is_transparent (void) const { return is_transparent; } + long long get_id (void) const { return cur_id; } + const GfxRGB & get_value (void) const { return cur_value; } + bool get_is_transparent (void) const { return cur_is_transparent; } void dump_css(std::ostream & out) { out << "." << imp->get_css_class_name() << CSS::INVALID_ID << "{"; @@ -228,32 +238,23 @@ public: void dump_print_css(std::ostream & out, double scale) {} protected: - bool _install(const GfxRGB & new_value) { - is_transparent = false; - value = new_value; + long long install(const GfxRGB & new_value) { auto iter = value_map.find(new_value); if(iter != value_map.end()) { - id = iter->second; - return false; + return iter->second; } - id = value_map.size(); - value_map.insert(std::make_pair(value, id)); - return true; - } - - bool _install_transparent(void) { - is_transparent = true; - id = -1; - return false; + long long id = value_map.size(); + value_map.insert(std::make_pair(new_value, id)); + return id; } Imp * imp; - long long id; - GfxRGB value; - bool is_transparent; + long long cur_id; + GfxRGB cur_value; + bool cur_is_transparent; class GfxRGB_hash {