From a82996eeb7275d7e7a1b6e3630df8bac44b5c8cf Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Sat, 4 May 2013 21:17:35 +0800 Subject: [PATCH] check clip changes --- src/HTMLRenderer/HTMLRenderer.h | 4 +- src/HTMLRenderer/state.cc | 57 +++++++++++++++------- src/HTMLTextLine.cc | 6 +-- src/HTMLTextLine.h | 2 +- src/HTMLTextPage.cc | 83 ++++++++++++++++----------------- src/HTMLTextPage.h | 15 +++--- 6 files changed, 95 insertions(+), 72 deletions(-) diff --git a/src/HTMLRenderer/HTMLRenderer.h b/src/HTMLRenderer/HTMLRenderer.h index cdd17e0..4634592 100644 --- a/src/HTMLRenderer/HTMLRenderer.h +++ b/src/HTMLRenderer/HTMLRenderer.h @@ -289,6 +289,7 @@ protected: AllStateManager all_manager; HTMLTextState cur_text_state; HTMLLineState cur_line_state; + HTMLClipState cur_clip_state; HTMLTextPage html_text_page; @@ -296,7 +297,8 @@ protected: { NLS_NONE, NLS_NEWSTATE, - NLS_NEWLINE + NLS_NEWLINE, + NLS_NEWCLIP } new_line_state; // for font reencoding diff --git a/src/HTMLRenderer/state.cc b/src/HTMLRenderer/state.cc index 6199701..3486314 100644 --- a/src/HTMLRenderer/state.cc +++ b/src/HTMLRenderer/state.cc @@ -119,6 +119,11 @@ void HTMLRenderer::reset_state() cur_line_state.y = 0; memcpy(cur_line_state.transform_matrix, ID_MATRIX, sizeof(cur_line_state.transform_matrix)); + cur_clip_state.xmin = 0; + cur_clip_state.xmax = 0; + cur_clip_state.ymin = 0; + cur_clip_state.ymax = 0; + cur_tx = cur_ty = 0; draw_tx = draw_ty = 0; @@ -145,6 +150,14 @@ void HTMLRenderer::reset_state_change() clip_changed = false; } + +template +void set_line_state(NewLineState & cur_ls, NewLineState new_ls) +{ + if(new_ls > cur_ls) + cur_ls = new_ls; +} + void HTMLRenderer::check_state_change(GfxState * state) { // DEPENDENCY WARNING @@ -154,11 +167,16 @@ void HTMLRenderer::check_state_change(GfxState * state) if(all_changed || clip_changed) { - //TODO: compare with current clip box - double x1, x2, y1, y2; - state->getClipBBox(&x1, &y1, &x2, &y2); - html_text_page.clip(x1, y1, x2, y2); - new_line_state = NLS_NEWLINE; + HTMLClipState new_clip_state; + state->getClipBBox(&new_clip_state.xmin, &new_clip_state.ymin, &new_clip_state.xmax, &new_clip_state.ymax); + if(!(equal(cur_clip_state.xmin, new_clip_state.xmin) + && equal(cur_clip_state.xmax, new_clip_state.xmax) + && equal(cur_clip_state.ymin, new_clip_state.ymin) + && equal(cur_clip_state.ymax, new_clip_state.ymax))) + { + cur_clip_state = new_clip_state; + set_line_state(new_line_state, NLS_NEWCLIP); + } } bool need_recheck_position = false; @@ -191,11 +209,11 @@ void HTMLRenderer::check_state_change(GfxState * state) // TODO: consider the font matrix and estimate the metrics if(new_font_info->is_type3 || cur_text_state.font_info->is_type3) { - new_line_state = max(new_line_state, NLS_NEWLINE); + set_line_state(new_line_state, NLS_NEWLINE); } else { - new_line_state = max(new_line_state, NLS_NEWSTATE); + set_line_state(new_line_state, NLS_NEWSTATE); } cur_text_state.font_info = new_font_info; } @@ -279,13 +297,13 @@ void HTMLRenderer::check_state_change(GfxState * state) if(!equal(new_draw_font_size, cur_text_state.font_size)) { - new_line_state = max(new_line_state, NLS_NEWSTATE); + set_line_state(new_line_state, NLS_NEWSTATE); cur_text_state.font_size = new_draw_font_size; } if(!tm_equal(new_draw_text_tm, cur_line_state.transform_matrix, 4)) { - new_line_state = max(new_line_state, NLS_NEWLINE); + set_line_state(new_line_state, NLS_NEWLINE); memcpy(cur_line_state.transform_matrix, new_draw_text_tm, sizeof(cur_line_state.transform_matrix)); } } @@ -368,14 +386,14 @@ void HTMLRenderer::check_state_change(GfxState * state) else { cur_text_state.vertical_align = (dy * old_draw_text_scale); - new_line_state = max(new_line_state, NLS_NEWSTATE); + set_line_state(new_line_state, NLS_NEWSTATE); } draw_tx = cur_tx; draw_ty = cur_ty; } else { - new_line_state = max(new_line_state, NLS_NEWLINE); + set_line_state(new_line_state, NLS_NEWLINE); } } else @@ -392,7 +410,7 @@ void HTMLRenderer::check_state_change(GfxState * state) if(!equal(new_letter_space, cur_text_state.letter_space)) { cur_text_state.letter_space = new_letter_space; - new_line_state = max(new_line_state, NLS_NEWSTATE); + set_line_state(new_line_state, NLS_NEWSTATE); } } @@ -404,7 +422,7 @@ void HTMLRenderer::check_state_change(GfxState * state) if(!equal(new_word_space, cur_text_state.word_space)) { cur_text_state.word_space = new_word_space; - new_line_state = max(new_line_state, NLS_NEWSTATE); + set_line_state(new_line_state, NLS_NEWSTATE); } } @@ -429,7 +447,7 @@ void HTMLRenderer::check_state_change(GfxState * state) if(!(new_fill_color == cur_text_state.fill_color)) { cur_text_state.fill_color = new_fill_color; - new_line_state = max(new_line_state, NLS_NEWSTATE); + set_line_state(new_line_state, NLS_NEWSTATE); } } @@ -455,7 +473,7 @@ void HTMLRenderer::check_state_change(GfxState * state) if(!(new_stroke_color == cur_text_state.stroke_color)) { cur_text_state.stroke_color = new_stroke_color; - new_line_state = max(new_line_state, NLS_NEWSTATE); + set_line_state(new_line_state, NLS_NEWSTATE); } } @@ -465,9 +483,14 @@ void HTMLRenderer::check_state_change(GfxState * state) void HTMLRenderer::prepare_text_line(GfxState * state) { if(!(html_text_page.get_cur_line())) - new_line_state = NLS_NEWLINE; + new_line_state = NLS_NEWCLIP; - if(new_line_state == NLS_NEWLINE) + if(new_line_state >= NLS_NEWCLIP) + { + html_text_page.clip(cur_clip_state); + } + + if(new_line_state >= NLS_NEWLINE) { // update position such that they will be recorded by text_line_buf state->transform(state->getCurX(), state->getCurY(), &cur_line_state.x, &cur_line_state.y); diff --git a/src/HTMLTextLine.cc b/src/HTMLTextLine.cc index e2d81f7..d37269c 100644 --- a/src/HTMLTextLine.cc +++ b/src/HTMLTextLine.cc @@ -233,10 +233,10 @@ void HTMLTextLine::clear(void) text.clear(); } -void HTMLTextLine::clip(double x1, double y1, double x2, double y2) +void HTMLTextLine::clip(const HTMLClipState & clip_state) { - clip_x1 = x1; - clip_y1 = y1; + clip_x1 = clip_state.xmin; + clip_y1 = clip_state.ymin; } void HTMLTextLine::prepare(void) diff --git a/src/HTMLTextLine.h b/src/HTMLTextLine.h index 9b78bf4..c974c0f 100644 --- a/src/HTMLTextLine.h +++ b/src/HTMLTextLine.h @@ -81,7 +81,7 @@ public: bool text_empty(void) const { return text.empty(); } void clear(void); - void clip(double x1, double y1, double x2, double y2); + void clip(const HTMLClipState &); /* * Optimize and calculate necessary values diff --git a/src/HTMLTextPage.cc b/src/HTMLTextPage.cc index d1e092b..e7d6c6b 100644 --- a/src/HTMLTextPage.cc +++ b/src/HTMLTextPage.cc @@ -30,39 +30,49 @@ void HTMLTextPage::dump_text(ostream & out) optimize(); //push a dummy entry for convenience - clip_boxes.emplace_back(0, 0, page_width, page_height, text_lines.size()); + clips.emplace_back(HTMLClipState{0, 0, page_width, page_height}, text_lines.size()); - ClipBox cur_cb(0, 0, page_width, page_height, 0); + Clip cur_clip(HTMLClipState{0, 0, page_width, page_height}, 0); bool has_clip = false; auto text_line_iter = text_lines.begin(); - for(auto clip_iter = clip_boxes.begin(); clip_iter != clip_boxes.end(); ++clip_iter) + for(auto clip_iter = clips.begin(); clip_iter != clips.end(); ++clip_iter) { - if(has_clip) - { - out << "
"; - } - auto next_text_line_iter = text_lines.begin() + clip_iter->start_idx; - while(text_line_iter != next_text_line_iter) + if(text_line_iter != next_text_line_iter) { - (*text_line_iter)->clip(cur_cb.x1, cur_cb.y1, cur_cb.x2, cur_cb.y2); - (*text_line_iter)->dump_text(out); - ++text_line_iter; - } - if(has_clip) - { - out << "
"; + const auto & cs = cur_clip.clip_state; + if(has_clip) + { + out << "
"; + } + + while(text_line_iter != next_text_line_iter) + { + if(has_clip) + { + (*text_line_iter)->clip(cs); + } + (*text_line_iter)->dump_text(out); + ++text_line_iter; + } + if(has_clip) + { + out << "
"; + } } - cur_cb = *clip_iter; - has_clip = !(equal(0, cur_cb.x1) && equal(0, cur_cb.y1) - && equal(page_width, cur_cb.x2) && equal(page_height, cur_cb.y2)); + { + cur_clip = *clip_iter; + const auto & cs = cur_clip.clip_state; + has_clip = !(equal(0, cs.xmin) && equal(0, cs.ymin) + && equal(page_width, cs.xmax) && equal(page_height, cs.ymax)); + } } } @@ -74,7 +84,7 @@ void HTMLTextPage::dump_css(ostream & out) void HTMLTextPage::clear(void) { text_lines.clear(); - clip_boxes.clear(); + clips.clear(); cur_line = nullptr; } @@ -94,32 +104,21 @@ void HTMLTextPage::set_page_size(double width, double height) page_height = height; } -void HTMLTextPage::clip(double x1, double y1, double x2, double y2) +void HTMLTextPage::clip(const HTMLClipState & clip_state) { - if(!clip_boxes.empty()) + if(!clips.empty()) { - auto & cb = clip_boxes.back(); - if(cb.start_idx == text_lines.size()) + auto & clip = clips.back(); + if(clip.start_idx == text_lines.size()) { /* * Previous ClipBox is not used */ - cb.x1 = x1; - cb.y1 = y1; - cb.x2 = x2; - cb.y2 = y2; - return; - } - if(equal(cb.x1, x1) && equal(cb.y1, y1) - && equal(cb.x2, x2) && equal(cb.y2, y2)) - { - /* - * same as previous ClipBox - */ + clip.clip_state = clip_state; return; } } - clip_boxes.emplace_back(x1, y1, x2, y2, text_lines.size()); + clips.emplace_back(clip_state, text_lines.size()); } void HTMLTextPage::optimize(void) diff --git a/src/HTMLTextPage.h b/src/HTMLTextPage.h index 2125519..ec01e24 100644 --- a/src/HTMLTextPage.h +++ b/src/HTMLTextPage.h @@ -22,7 +22,6 @@ namespace pdf2htmlEX { * * contains a series of HTMLTextLine */ - class HTMLTextPage { public: @@ -38,7 +37,7 @@ public: /* for clipping */ void set_page_size(double width, double height); - void clip(double x1, double y1, double x2, double y2); + void clip(const HTMLClipState & clip_state); private: void optimize(void); @@ -50,14 +49,14 @@ private: std::vector> text_lines; - struct ClipBox { - ClipBox(double x1, double y1, double x2, double y2, size_t start_idx) - :x1(x1),y1(y1),x2(x2),y2(y2),start_idx(start_idx) - { } - double x1, y1, x2, y2; + struct Clip { + HTMLClipState clip_state; size_t start_idx; + Clip(const HTMLClipState & clip_state, size_t start_idx) + :clip_state(clip_state),start_idx(start_idx) + { } }; - std::vector clip_boxes; + std::vector clips; }; } //namespace pdf2htmlEX