diff --git a/src/HTMLRenderer/HTMLRenderer.h b/src/HTMLRenderer/HTMLRenderer.h
index 16a24cc..a0f62db 100644
--- a/src/HTMLRenderer/HTMLRenderer.h
+++ b/src/HTMLRenderer/HTMLRenderer.h
@@ -284,7 +284,7 @@ protected:
double draw_tx, draw_ty;
// managers store values actually used in HTML (i.e. scaled)
- AllStateManater all_manager;
+ AllStateManager all_manager;
enum NewLineState
{
diff --git a/src/HTMLTextLine.cc b/src/HTMLTextLine.cc
index e4f8f30..886869c 100644
--- a/src/HTMLTextLine.cc
+++ b/src/HTMLTextLine.cc
@@ -25,7 +25,7 @@ using std::endl;
using std::find;
using std::abs;
-HTMLTextLine::HTMLTextLine (const Param & param, AllStateManater & all_manager)
+HTMLTextLine::HTMLTextLine (const Param & param, AllStateManager & all_manager)
: param(param), all_manager(all_manager)
{ }
@@ -59,53 +59,35 @@ void HTMLTextLine::append_state(const HTMLState & html_state)
(HTMLState&)(states.back()) = html_state;
}
-void HTMLTextLine::dump_text(ostream & out)
+bool HTMLTextLine::dump_text(ostream & out)
{
/*
* Each Line is an independent absolute positioned block
* so even we have a few states or offsets, we may omit them
*/
if(text.empty())
- return;
-
- // remove unuseful states in the end
- while((!states.empty()) && (states.back().start_idx >= text.size()))
- states.pop_back();
+ return false;
if(states.empty() || (states[0].start_idx != 0))
{
cerr << "Warning: text without a style! Must be a bug in pdf2htmlEX" << endl;
- return;
+ return false;
}
- // optimize before output
- optimize();
-
// Start Output
{
- // max_ascent determines the height of the div
- double accum_vertical_align = 0; // accumulated
- double max_ascent = 0;
- for(auto iter = states.begin(); iter != states.end(); ++iter)
- {
- accum_vertical_align += iter->vertical_align;
- double cur_ascent = accum_vertical_align + iter->font_info->ascent * iter->font_size;
- if(cur_ascent > max_ascent)
- max_ascent = cur_ascent;
- }
-
// open
for the current text line
out << "
stack;
// a special safeguard in the bottom
- stack.clear();
stack.push_back(nullptr);
//accumulated horizontal offset;
@@ -123,15 +105,6 @@ void HTMLTextLine::dump_text(ostream & out)
{
// export current state, find a closest parent
{
- // set id
- state_iter1->ids[State::FONT_ID] = state_iter1->font_info->id;
- state_iter1->ids[State::FONT_SIZE_ID] = all_manager.font_size.install(state_iter1->font_size);
- state_iter1->ids[State::FILL_COLOR_ID] = all_manager.fill_color.install(state_iter1->fill_color);
- state_iter1->ids[State::STROKE_COLOR_ID] = all_manager.stroke_color.install(state_iter1->stroke_color);
- state_iter1->ids[State::LETTER_SPACE_ID] = all_manager.letter_space.install(state_iter1->letter_space);
- state_iter1->ids[State::WORD_SPACE_ID] = all_manager.word_space.install(state_iter1->word_space);
- state_iter1->hash();
-
// greedy
double vertical_align = state_iter1->vertical_align;
int best_cost = State::HASH_ID_COUNT + 1;
@@ -249,6 +222,7 @@ void HTMLTextLine::dump_text(ostream & out)
}
out << "
";
+ return true;
}
void HTMLTextLine::clear(void)
@@ -258,14 +232,46 @@ void HTMLTextLine::clear(void)
text.clear();
}
+void HTMLTextLine::prepare(void)
+{
+ if(param.optimize_text)
+ optimize();
+
+ // max_ascent determines the height of the div
+ double accum_vertical_align = 0; // accumulated
+ ascent = 0;
+ descent = 0;
+ // note that vertical_align cannot be calculated here
+ for(auto iter = states.begin(); iter != states.end(); ++iter)
+ {
+ iter->ids[State::FONT_ID] = iter->font_info->id;
+ iter->ids[State::FONT_SIZE_ID] = all_manager.font_size.install(iter->font_size);
+ iter->ids[State::FILL_COLOR_ID] = all_manager.fill_color.install(iter->fill_color);
+ iter->ids[State::STROKE_COLOR_ID] = all_manager.stroke_color.install(iter->stroke_color);
+ iter->ids[State::LETTER_SPACE_ID] = all_manager.letter_space.install(iter->letter_space);
+ iter->ids[State::WORD_SPACE_ID] = all_manager.word_space.install(iter->word_space);
+ iter->hash();
+
+ accum_vertical_align += iter->vertical_align;
+ double cur_ascent = accum_vertical_align + iter->font_info->ascent * iter->font_size;
+ if(cur_ascent > ascent)
+ ascent = cur_ascent;
+ double cur_descent = accum_vertical_align + iter->font_info->descent * iter->font_size;
+ if(cur_descent < descent)
+ descent = cur_descent;
+ }
+}
+
+
/*
* Adjust letter space and word space in order to reduce the number of HTML elements
* May also unmask word space
*/
void HTMLTextLine::optimize()
{
- if(!(param.optimize_text))
- return;
+ // remove unuseful states in the end
+ while((!states.empty()) && (states.back().start_idx >= text.size()))
+ states.pop_back();
assert(!states.empty());
diff --git a/src/HTMLTextLine.h b/src/HTMLTextLine.h
index ac2a0f5..bccfe86 100644
--- a/src/HTMLTextLine.h
+++ b/src/HTMLTextLine.h
@@ -27,7 +27,7 @@ namespace pdf2htmlEX {
class HTMLTextLine
{
public:
- HTMLTextLine (const Param & param, AllStateManater & all_manager);
+ HTMLTextLine (const Param & param, AllStateManager & all_manager);
struct State : public HTMLState {
// before output
@@ -65,7 +65,6 @@ public:
static const char * const css_class_names []; // class names for each id
};
-
struct Offset {
Offset(size_t size_idx, double width)
:start_idx(size_idx),width(width)
@@ -77,26 +76,33 @@ public:
void append_unicodes(const Unicode * u, int l);
void append_offset(double width);
void append_state(const HTMLState & html_state);
- void dump_text(std::ostream & out);
+ // return if anything dumped
+ bool dump_text(std::ostream & out);
bool empty(void) const { return text.empty(); }
void clear(void);
+ /*
+ * Optimize and calculate necessary values
+ */
+ void prepare(void);
+
+ double get_ascent (void) const { return ascent; }
+ double get_descent(void) const { return descent; }
private:
void optimize(void);
const Param & param;
- AllStateManater & all_manager;
+ AllStateManager & all_manager;
double x, y;
long long tm_id;
+ double ascent, descent;
+
std::vector
states;
std::vector offsets;
std::vector text;
-
- // for flush
- std::vector stack;
};
} // namespace pdf2htmlEX
diff --git a/src/HTMLTextPage.cc b/src/HTMLTextPage.cc
index edc4eb0..b0f4503 100644
--- a/src/HTMLTextPage.cc
+++ b/src/HTMLTextPage.cc
@@ -12,7 +12,7 @@ namespace pdf2htmlEX {
using std::ostream;
-HTMLTextPage::HTMLTextPage(const Param & param, AllStateManater & all_manager)
+HTMLTextPage::HTMLTextPage(const Param & param, AllStateManager & all_manager)
: param(param)
, all_manager(all_manager)
, last_line(nullptr)
@@ -20,7 +20,7 @@ HTMLTextPage::HTMLTextPage(const Param & param, AllStateManater & all_manager)
void HTMLTextPage::dump_text(ostream & out)
{
- optimize();
+ prepare();
for(auto iter = text_lines.begin(); iter != text_lines.end(); ++iter)
(*iter)->dump_text(out);
}
@@ -71,6 +71,16 @@ void HTMLTextPage::open_new_line(void)
}
}
+void HTMLTextPage::prepare(void)
+{
+ for(auto iter = text_lines.begin(); iter != text_lines.end(); ++iter)
+ {
+ (*iter)->prepare();
+ }
+ if(param.optimize_text)
+ optimize();
+}
+
void HTMLTextPage::optimize(void)
{
//TODO
diff --git a/src/HTMLTextPage.h b/src/HTMLTextPage.h
index bbe813e..8d441a8 100644
--- a/src/HTMLTextPage.h
+++ b/src/HTMLTextPage.h
@@ -26,7 +26,7 @@ namespace pdf2htmlEX {
class HTMLTextPage
{
public:
- HTMLTextPage (const Param & param, AllStateManater & all_manager);
+ HTMLTextPage (const Param & param, AllStateManager & all_manager);
void append_unicodes(const Unicode * u, int l);
void append_offset(double offset);
@@ -39,10 +39,11 @@ public:
void open_new_line(void);
private:
+ void prepare(void);
void optimize(void);
const Param & param;
- AllStateManater & all_manager;
+ AllStateManager & all_manager;
HTMLTextLine * last_line;
std::vector> text_lines;
};
diff --git a/src/StateManager.h b/src/StateManager.h
index 5fe8bf6..34bab08 100644
--- a/src/StateManager.h
+++ b/src/StateManager.h
@@ -411,7 +411,7 @@ private:
std::unordered_map> value_map;
};
-struct AllStateManater
+struct AllStateManager
{
TransformMatrixManager transform_matrix;
VerticalAlignManager vertical_align;