From 4a0f422b3130aa176fd21e8beba3b7324bca27ad Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Sun, 7 Apr 2013 16:10:52 +0800 Subject: [PATCH] working --- src/HTMLRenderer/HTMLRenderer.h | 2 +- src/HTMLTextLine.cc | 76 ++++++++++++++++++--------------- src/HTMLTextLine.h | 20 ++++++--- src/HTMLTextPage.cc | 14 +++++- src/HTMLTextPage.h | 5 ++- src/StateManager.h | 2 +- 6 files changed, 71 insertions(+), 48 deletions(-) diff --git a/src/HTMLRenderer/HTMLRenderer.h b/src/HTMLRenderer/HTMLRenderer.h index 16a24cc..a0f62db 100644 --- a/src/HTMLRenderer/HTMLRenderer.h +++ b/src/HTMLRenderer/HTMLRenderer.h @@ -284,7 +284,7 @@ protected: double draw_tx, draw_ty; // managers store values actually used in HTML (i.e. scaled) - AllStateManater all_manager; + AllStateManager all_manager; enum NewLineState { diff --git a/src/HTMLTextLine.cc b/src/HTMLTextLine.cc index e4f8f30..886869c 100644 --- a/src/HTMLTextLine.cc +++ b/src/HTMLTextLine.cc @@ -25,7 +25,7 @@ using std::endl; using std::find; using std::abs; -HTMLTextLine::HTMLTextLine (const Param & param, AllStateManater & all_manager) +HTMLTextLine::HTMLTextLine (const Param & param, AllStateManager & all_manager) : param(param), all_manager(all_manager) { } @@ -59,53 +59,35 @@ void HTMLTextLine::append_state(const HTMLState & html_state) (HTMLState&)(states.back()) = html_state; } -void HTMLTextLine::dump_text(ostream & out) +bool HTMLTextLine::dump_text(ostream & out) { /* * Each Line is an independent absolute positioned block * so even we have a few states or offsets, we may omit them */ if(text.empty()) - return; - - // remove unuseful states in the end - while((!states.empty()) && (states.back().start_idx >= text.size())) - states.pop_back(); + return false; if(states.empty() || (states[0].start_idx != 0)) { cerr << "Warning: text without a style! Must be a bug in pdf2htmlEX" << endl; - return; + return false; } - // optimize before output - optimize(); - // Start Output { - // max_ascent determines the height of the div - double accum_vertical_align = 0; // accumulated - double max_ascent = 0; - for(auto iter = states.begin(); iter != states.end(); ++iter) - { - accum_vertical_align += iter->vertical_align; - double cur_ascent = accum_vertical_align + iter->font_info->ascent * iter->font_size; - if(cur_ascent > max_ascent) - max_ascent = cur_ascent; - } - // open
for the current text line out << "
stack; // a special safeguard in the bottom - stack.clear(); stack.push_back(nullptr); //accumulated horizontal offset; @@ -123,15 +105,6 @@ void HTMLTextLine::dump_text(ostream & out) { // export current state, find a closest parent { - // set id - state_iter1->ids[State::FONT_ID] = state_iter1->font_info->id; - state_iter1->ids[State::FONT_SIZE_ID] = all_manager.font_size.install(state_iter1->font_size); - state_iter1->ids[State::FILL_COLOR_ID] = all_manager.fill_color.install(state_iter1->fill_color); - state_iter1->ids[State::STROKE_COLOR_ID] = all_manager.stroke_color.install(state_iter1->stroke_color); - state_iter1->ids[State::LETTER_SPACE_ID] = all_manager.letter_space.install(state_iter1->letter_space); - state_iter1->ids[State::WORD_SPACE_ID] = all_manager.word_space.install(state_iter1->word_space); - state_iter1->hash(); - // greedy double vertical_align = state_iter1->vertical_align; int best_cost = State::HASH_ID_COUNT + 1; @@ -249,6 +222,7 @@ void HTMLTextLine::dump_text(ostream & out) } out << "
"; + return true; } void HTMLTextLine::clear(void) @@ -258,14 +232,46 @@ void HTMLTextLine::clear(void) text.clear(); } +void HTMLTextLine::prepare(void) +{ + if(param.optimize_text) + optimize(); + + // max_ascent determines the height of the div + double accum_vertical_align = 0; // accumulated + ascent = 0; + descent = 0; + // note that vertical_align cannot be calculated here + for(auto iter = states.begin(); iter != states.end(); ++iter) + { + iter->ids[State::FONT_ID] = iter->font_info->id; + iter->ids[State::FONT_SIZE_ID] = all_manager.font_size.install(iter->font_size); + iter->ids[State::FILL_COLOR_ID] = all_manager.fill_color.install(iter->fill_color); + iter->ids[State::STROKE_COLOR_ID] = all_manager.stroke_color.install(iter->stroke_color); + iter->ids[State::LETTER_SPACE_ID] = all_manager.letter_space.install(iter->letter_space); + iter->ids[State::WORD_SPACE_ID] = all_manager.word_space.install(iter->word_space); + iter->hash(); + + accum_vertical_align += iter->vertical_align; + double cur_ascent = accum_vertical_align + iter->font_info->ascent * iter->font_size; + if(cur_ascent > ascent) + ascent = cur_ascent; + double cur_descent = accum_vertical_align + iter->font_info->descent * iter->font_size; + if(cur_descent < descent) + descent = cur_descent; + } +} + + /* * Adjust letter space and word space in order to reduce the number of HTML elements * May also unmask word space */ void HTMLTextLine::optimize() { - if(!(param.optimize_text)) - return; + // remove unuseful states in the end + while((!states.empty()) && (states.back().start_idx >= text.size())) + states.pop_back(); assert(!states.empty()); diff --git a/src/HTMLTextLine.h b/src/HTMLTextLine.h index ac2a0f5..bccfe86 100644 --- a/src/HTMLTextLine.h +++ b/src/HTMLTextLine.h @@ -27,7 +27,7 @@ namespace pdf2htmlEX { class HTMLTextLine { public: - HTMLTextLine (const Param & param, AllStateManater & all_manager); + HTMLTextLine (const Param & param, AllStateManager & all_manager); struct State : public HTMLState { // before output @@ -65,7 +65,6 @@ public: static const char * const css_class_names []; // class names for each id }; - struct Offset { Offset(size_t size_idx, double width) :start_idx(size_idx),width(width) @@ -77,26 +76,33 @@ public: void append_unicodes(const Unicode * u, int l); void append_offset(double width); void append_state(const HTMLState & html_state); - void dump_text(std::ostream & out); + // return if anything dumped + bool dump_text(std::ostream & out); bool empty(void) const { return text.empty(); } void clear(void); + /* + * Optimize and calculate necessary values + */ + void prepare(void); + + double get_ascent (void) const { return ascent; } + double get_descent(void) const { return descent; } private: void optimize(void); const Param & param; - AllStateManater & all_manager; + AllStateManager & all_manager; double x, y; long long tm_id; + double ascent, descent; + std::vector states; std::vector offsets; std::vector text; - - // for flush - std::vector stack; }; } // namespace pdf2htmlEX diff --git a/src/HTMLTextPage.cc b/src/HTMLTextPage.cc index edc4eb0..b0f4503 100644 --- a/src/HTMLTextPage.cc +++ b/src/HTMLTextPage.cc @@ -12,7 +12,7 @@ namespace pdf2htmlEX { using std::ostream; -HTMLTextPage::HTMLTextPage(const Param & param, AllStateManater & all_manager) +HTMLTextPage::HTMLTextPage(const Param & param, AllStateManager & all_manager) : param(param) , all_manager(all_manager) , last_line(nullptr) @@ -20,7 +20,7 @@ HTMLTextPage::HTMLTextPage(const Param & param, AllStateManater & all_manager) void HTMLTextPage::dump_text(ostream & out) { - optimize(); + prepare(); for(auto iter = text_lines.begin(); iter != text_lines.end(); ++iter) (*iter)->dump_text(out); } @@ -71,6 +71,16 @@ void HTMLTextPage::open_new_line(void) } } +void HTMLTextPage::prepare(void) +{ + for(auto iter = text_lines.begin(); iter != text_lines.end(); ++iter) + { + (*iter)->prepare(); + } + if(param.optimize_text) + optimize(); +} + void HTMLTextPage::optimize(void) { //TODO diff --git a/src/HTMLTextPage.h b/src/HTMLTextPage.h index bbe813e..8d441a8 100644 --- a/src/HTMLTextPage.h +++ b/src/HTMLTextPage.h @@ -26,7 +26,7 @@ namespace pdf2htmlEX { class HTMLTextPage { public: - HTMLTextPage (const Param & param, AllStateManater & all_manager); + HTMLTextPage (const Param & param, AllStateManager & all_manager); void append_unicodes(const Unicode * u, int l); void append_offset(double offset); @@ -39,10 +39,11 @@ public: void open_new_line(void); private: + void prepare(void); void optimize(void); const Param & param; - AllStateManater & all_manager; + AllStateManager & all_manager; HTMLTextLine * last_line; std::vector> text_lines; }; diff --git a/src/StateManager.h b/src/StateManager.h index 5fe8bf6..34bab08 100644 --- a/src/StateManager.h +++ b/src/StateManager.h @@ -411,7 +411,7 @@ private: std::unordered_map> value_map; }; -struct AllStateManater +struct AllStateManager { TransformMatrixManager transform_matrix; VerticalAlignManager vertical_align;