From 8942f92a9c5e2bf41b5e690892c26453cccdd757 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Mon, 25 Mar 2013 12:23:29 +0800 Subject: [PATCH] working on space optimization --- src/HTMLRenderer/TextLineBuffer.cc | 61 ++++++++++++++++++------------ src/HTMLRenderer/TextLineBuffer.h | 2 + 2 files changed, 39 insertions(+), 24 deletions(-) diff --git a/src/HTMLRenderer/TextLineBuffer.cc b/src/HTMLRenderer/TextLineBuffer.cc index 3c52dc0..78d92da 100644 --- a/src/HTMLRenderer/TextLineBuffer.cc +++ b/src/HTMLRenderer/TextLineBuffer.cc @@ -164,14 +164,21 @@ void HTMLRenderer::TextLineBuffer::flush(void) } else { - double space_off = stack.back()->single_space_offset(); - if(abs(target - space_off) <= renderer->param->h_eps) + bool done = false; + auto cur_state = stack.back(); + if(!(cur_state->hash_umask & State::umask_by_id(State::WORD_SPACE_ID))) { - Unicode u = ' '; - outputUnicodes(out, &u, 1); - actual_offset = space_off; + double space_off = cur_state->single_space_offset(); + if(abs(target - space_off) <= renderer->param->h_eps) + { + Unicode u = ' '; + outputUnicodes(out, &u, 1); + actual_offset = space_off; + done = true; + } } - else + + if(!done) { auto & wm = renderer->whitespace_manager; wm.install(target); @@ -183,8 +190,7 @@ void HTMLRenderer::TextLineBuffer::flush(void) if(is_positive(-actual_offset)) last_text_pos_with_negative_offset = cur_text_idx; - auto * p = stack.back(); - double threshold = p->draw_font_size * (p->font_info->ascent - p->font_info->descent) * (renderer->param->space_threshold); + double threshold = cur_state->draw_font_size * (cur_state->font_info->ascent - cur_state->font_info->descent) * (renderer->param->space_threshold); out << "" << (target > (threshold - EPS) ? " " : "") << ""; @@ -236,13 +242,13 @@ void HTMLRenderer::TextLineBuffer::set_state (State & state) void HTMLRenderer::TextLineBuffer::optimize(void) { - // this function needs more work + // need more work return; assert(!states.empty()); // set proper hash_umask - long long word_space_umask = ((long long)0xff) << (8*((int)State::WORD_SPACE_ID)); + long long word_space_umask = State::umask_by_id(State::WORD_SPACE_ID); for(auto iter = states.begin(); iter != states.end(); ++iter) { auto text_iter1 = text.begin() + (iter->start_idx); @@ -255,20 +261,6 @@ void HTMLRenderer::TextLineBuffer::optimize(void) iter->hash_umask |= word_space_umask; } } - - // clean zero offsets - { - auto write_iter = offsets.begin(); - for(auto iter = offsets.begin(); iter != offsets.end(); ++iter) - { - if(!equal(iter->width, 0)) - { - *write_iter = *iter; - ++write_iter; - } - } - offsets.erase(write_iter, offsets.end()); - } // In some PDF files all spaces are converted into positionig shifts // We may try to change them to ' ' and adjusted word_spaces @@ -358,6 +350,22 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr // we have to inherit it ids[i] = prev_state->ids[i]; hash_umask &= (~cur_mask); + //copy the corresponding value + //TODO: this is so ugly + switch(i) + { + case FONT_SIZE_ID: + draw_font_size = prev_state->draw_font_size; + break; + case LETTER_SPACE_ID: + letter_space = prev_state->letter_space; + break; + case WORD_SPACE_ID: + word_space = prev_state->word_space; + break; + default: + break; + } } //anyway we don't have to output it continue; @@ -437,6 +445,11 @@ double HTMLRenderer::TextLineBuffer::State::single_space_offset(void) const return word_space + letter_space + font_info->space_width * draw_font_size; } +long long HTMLRenderer::TextLineBuffer::State::umask_by_id(int id) +{ + return (((long long)0xff) << (8*id)); +} + // the order should be the same as in the enum const char * const HTMLRenderer::TextLineBuffer::State::css_class_names [] = { CSS::FONT_FAMILY_CN, diff --git a/src/HTMLRenderer/TextLineBuffer.h b/src/HTMLRenderer/TextLineBuffer.h index c289eb7..c79bb24 100644 --- a/src/HTMLRenderer/TextLineBuffer.h +++ b/src/HTMLRenderer/TextLineBuffer.h @@ -44,6 +44,8 @@ public: ID_COUNT }; + static long long umask_by_id(int id); + long long ids[ID_COUNT]; const FontInfo * font_info;