diff --git a/src/HTMLRenderer/HTMLRenderer.h b/src/HTMLRenderer/HTMLRenderer.h
index a3d2aa1..0f9bbed 100644
--- a/src/HTMLRenderer/HTMLRenderer.h
+++ b/src/HTMLRenderer/HTMLRenderer.h
@@ -50,7 +50,9 @@ struct HTMLState
Color stroke_color;
double letter_space;
double word_space;
- double rise;
+
+ // relative to the previous state
+ double vertical_align;
double x,y;
double transform_matrix[4];
@@ -337,6 +339,7 @@ protected:
// managers store values actually used in HTML (i.e. scaled)
////////////////////////////////////////////////
TransformMatrixManager transform_matrix_manager;
+ VerticalAlignManager vertical_align_manager;
StrokeColorManager stroke_color_manager;
LetterSpaceManager letter_space_manager;
WhitespaceManager whitespace_manager;
@@ -346,7 +349,6 @@ protected:
BottomManager bottom_manager;
HeightManager height_manager;
WidthManager width_manager;
- RiseManager rise_manager;
LeftManager left_manager;
////////////////////////////////////////////////
BGImageSizeManager bgimage_size_manager;
diff --git a/src/HTMLRenderer/TextLineBuffer.cc b/src/HTMLRenderer/TextLineBuffer.cc
index 60e2232..6ae591c 100644
--- a/src/HTMLRenderer/TextLineBuffer.cc
+++ b/src/HTMLRenderer/TextLineBuffer.cc
@@ -91,22 +91,14 @@ void HTMLRenderer::TextLineBuffer::flush(void)
ostream & out = renderer->f_pages.fs;
{
// max_ascent determines the height of the div
+ double accum_vertical_align = 0; // accumulated
double max_ascent = 0;
for(auto iter = states.begin(); iter != states.end(); ++iter)
{
- double cur_ascent = iter->rise + iter->font_info->ascent * iter->font_size;
+ accum_vertical_align += iter->vertical_align;
+ double cur_ascent = accum_vertical_align + iter->font_info->ascent * iter->font_size;
if(cur_ascent > max_ascent)
max_ascent = cur_ascent;
-
- // set id
- iter->ids[State::FONT_ID] = iter->font_info->id;
- iter->ids[State::FONT_SIZE_ID] = renderer->font_size_manager .install(iter->font_size);
- iter->ids[State::FILL_COLOR_ID] = renderer->fill_color_manager .install(iter->fill_color);
- iter->ids[State::STROKE_COLOR_ID] = renderer->stroke_color_manager.install(iter->stroke_color);
- iter->ids[State::LETTER_SPACE_ID] = renderer->letter_space_manager.install(iter->letter_space);
- iter->ids[State::WORD_SPACE_ID] = renderer->word_space_manager .install(iter->word_space);
- iter->ids[State::RISE_ID] = renderer->rise_manager .install(iter->rise);
- iter->hash();
}
// open
for the current text line
@@ -138,20 +130,36 @@ void HTMLRenderer::TextLineBuffer::flush(void)
{
// export current state, find a closest parent
{
+ // set id
+ state_iter1->ids[State::FONT_ID] = state_iter1->font_info->id;
+ state_iter1->ids[State::FONT_SIZE_ID] = renderer->font_size_manager .install(state_iter1->font_size);
+ state_iter1->ids[State::FILL_COLOR_ID] = renderer->fill_color_manager .install(state_iter1->fill_color);
+ state_iter1->ids[State::STROKE_COLOR_ID] = renderer->stroke_color_manager .install(state_iter1->stroke_color);
+ state_iter1->ids[State::LETTER_SPACE_ID] = renderer->letter_space_manager .install(state_iter1->letter_space);
+ state_iter1->ids[State::WORD_SPACE_ID] = renderer->word_space_manager .install(state_iter1->word_space);
+ state_iter1->hash();
+
// greedy
- int best_cost = State::ID_COUNT;
+ double vertical_align = state_iter1->vertical_align;
+ int best_cost = State::HASH_ID_COUNT + 1;
// we have a nullptr at the beginning, so no need to check for rend
for(auto iter = stack.rbegin(); *iter; ++iter)
{
int cost = state_iter1->diff(**iter);
+ if(!equal(vertical_align,0))
+ ++cost;
+
if(cost < best_cost)
{
while(stack.back() != *iter)
{
+ state_iter1->vertical_align += stack.back()->vertical_align;
+
stack.back()->end(out);
stack.pop_back();
}
best_cost = cost;
+ state_iter1->vertical_align = vertical_align;
if(best_cost == 0)
break;
@@ -160,7 +168,11 @@ void HTMLRenderer::TextLineBuffer::flush(void)
// cannot go further
if((*iter)->start_idx <= last_text_pos_with_negative_offset)
break;
+
+ vertical_align += (*iter)->vertical_align;
}
+ //
+ state_iter1->ids[State::VERTICAL_ALIGN_ID] = renderer->vertical_align_manager.install(state_iter1->vertical_align);
// export the diff between *state_iter1 and stack.back()
state_iter1->begin(out, stack.back());
stack.push_back(&*state_iter1);
@@ -338,9 +350,8 @@ void HTMLRenderer::TextLineBuffer::optimize()
}
}
- // now we would like to adjust letter space to most_used width
- // we shall apply the optimization only when it can significantly reduce the number of elements
- if(max_count <= text_count / 2)
+ // negative letter space may cause problems
+ if(!is_positive(state_iter1->letter_space + most_used_width))
{
// the old value is the best
// just copy old offsets
@@ -348,6 +359,8 @@ void HTMLRenderer::TextLineBuffer::optimize()
}
else
{
+ // now we would like to adjust letter space to most_used width
+
// install new letter space
const double old_ls = state_iter1->letter_space;
state_iter1->ids[State::LETTER_SPACE_ID] = ls_manager.install(old_ls + most_used_width, &(state_iter1->letter_space));
@@ -439,7 +452,7 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr
{
long long cur_mask = 0xff;
bool first = true;
- for(int i = 0; i < ID_COUNT; ++i, cur_mask<<=8)
+ for(int i = 0; i < HASH_ID_COUNT; ++i, cur_mask<<=8)
{
if(hash_umask & cur_mask) // we don't care about this ID
{
@@ -463,10 +476,8 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr
case WORD_SPACE_ID:
word_space = prev_state->word_space;
break;
- case RISE_ID:
- rise = prev_state->rise;
- break;
default:
+ cerr << "unexpected state mask" << endl;
break;
}
}
@@ -495,6 +506,28 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr
else
out << ids[i];
}
+ // veritcal align
+ if(!equal(vertical_align, 0))
+ {
+ // so we have to dump it
+ if(first)
+ {
+ out << " left there
+ // it is not necessary to output vertical align
long long cur_mask = 0xff;
- for(int i = 0; i < ID_COUNT; ++i, cur_mask<<=8)
+ for(int i = 0; i < HASH_ID_COUNT; ++i, cur_mask<<=8)
{
if(hash_umask & cur_mask) // we don't care about this ID
continue;
@@ -591,7 +625,7 @@ const char * const HTMLRenderer::TextLineBuffer::State::css_class_names [] = {
CSS::STROKE_COLOR_CN,
CSS::LETTER_SPACE_CN,
CSS::WORD_SPACE_CN,
- CSS::RISE_CN
+ CSS::VERTICAL_ALIGN_CN,
};
} //namespace pdf2htmlEX
diff --git a/src/HTMLRenderer/TextLineBuffer.h b/src/HTMLRenderer/TextLineBuffer.h
index 943176c..51f8699 100644
--- a/src/HTMLRenderer/TextLineBuffer.h
+++ b/src/HTMLRenderer/TextLineBuffer.h
@@ -41,8 +41,9 @@ public:
STROKE_COLOR_ID,
LETTER_SPACE_ID,
WORD_SPACE_ID,
- RISE_ID,
+ HASH_ID_COUNT,
+ VERTICAL_ALIGN_ID = HASH_ID_COUNT,
ID_COUNT
};
diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc
index 77c8a8c..cfc6011 100644
--- a/src/HTMLRenderer/general.cc
+++ b/src/HTMLRenderer/general.cc
@@ -61,9 +61,9 @@ HTMLRenderer::HTMLRenderer(const Param * param)
* or may be handled well (whitespace_manager)
* So we can set a large eps here
*/
- rise_manager .set_eps(param->v_eps);
- whitespace_manager .set_eps(param->h_eps);
- left_manager .set_eps(param->h_eps);
+ vertical_align_manager.set_eps(param->v_eps);
+ whitespace_manager .set_eps(param->h_eps);
+ left_manager .set_eps(param->h_eps);
/*
* For othere states, we need accurate values
* optimization will be done separately
@@ -446,6 +446,7 @@ void HTMLRenderer::set_stream_flags(std::ostream & out)
void HTMLRenderer::dump_css (void)
{
transform_matrix_manager.dump_css(f_css.fs);
+ vertical_align_manager .dump_css(f_css.fs);
letter_space_manager .dump_css(f_css.fs);
stroke_color_manager .dump_css(f_css.fs);
word_space_manager .dump_css(f_css.fs);
@@ -455,7 +456,6 @@ void HTMLRenderer::dump_css (void)
bottom_manager .dump_css(f_css.fs);
height_manager .dump_css(f_css.fs);
width_manager .dump_css(f_css.fs);
- rise_manager .dump_css(f_css.fs);
left_manager .dump_css(f_css.fs);
bgimage_size_manager .dump_css(f_css.fs);
@@ -463,6 +463,7 @@ void HTMLRenderer::dump_css (void)
double ps = print_scale();
f_css.fs << CSS::PRINT_ONLY << "{" << endl;
transform_matrix_manager.dump_print_css(f_css.fs, ps);
+ vertical_align_manager .dump_print_css(f_css.fs, ps);
letter_space_manager .dump_print_css(f_css.fs, ps);
stroke_color_manager .dump_print_css(f_css.fs, ps);
word_space_manager .dump_print_css(f_css.fs, ps);
@@ -472,7 +473,6 @@ void HTMLRenderer::dump_css (void)
bottom_manager .dump_print_css(f_css.fs, ps);
height_manager .dump_print_css(f_css.fs, ps);
width_manager .dump_print_css(f_css.fs, ps);
- rise_manager .dump_print_css(f_css.fs, ps);
left_manager .dump_print_css(f_css.fs, ps);
bgimage_size_manager .dump_print_css(f_css.fs, ps);
f_css.fs << "}" << endl;
diff --git a/src/HTMLRenderer/state.cc b/src/HTMLRenderer/state.cc
index 2072a92..b4d4015 100644
--- a/src/HTMLRenderer/state.cc
+++ b/src/HTMLRenderer/state.cc
@@ -101,7 +101,7 @@ void HTMLRenderer::reset_state()
cur_html_state.stroke_color.transparent = true;
cur_html_state.letter_space = 0;
cur_html_state.word_space = 0;
- cur_html_state.rise = 0;
+ cur_html_state.vertical_align = 0;
cur_html_state.x = 0;
cur_html_state.y = 0;
memcpy(cur_html_state.transform_matrix, ID_MATRIX, sizeof(cur_html_state.transform_matrix));
@@ -148,6 +148,9 @@ void HTMLRenderer::check_state_change(GfxState * state)
need_recheck_position = true;
}
+ // save current info for later use
+ auto old_font_info = cur_html_state.font_info;
+ double old_font_size = cur_html_state.font_size;
// font name & size
if(all_changed || font_changed)
{
@@ -306,11 +309,28 @@ void HTMLRenderer::check_state_change(GfxState * state)
inverted[3] = old_tm[0] / det;
dx = inverted[0] * lhs1 + inverted[2] * lhs2;
dy = inverted[1] * lhs1 + inverted[3] * lhs2;
- // currently we merge only text on a same horizontal line
if(equal(dy, 0))
{
+ // text on a same horizontal line, we can insert positive or negaive x-offsets
merged = true;
}
+ else
+ {
+ // otherwise we merge the lines only when
+ // - text are not shifted to the left too much
+ // - text are not moved too high or too low
+ if((dx * draw_text_scale) >= -(old_font_info->ascent - old_font_info->descent) * old_font_size - EPS)
+ {
+ double oldymin = old_font_info->descent * old_font_size;
+ double oldymax = old_font_info->ascent * old_font_size;
+ double ymin = dy * draw_text_scale + cur_html_state.font_info->descent * cur_html_state.font_size;
+ double ymax = dy * draw_text_scale + cur_html_state.font_info->ascent * cur_html_state.font_size;
+ if((ymin <= oldymax + EPS) && (ymax >= oldymin - EPS))
+ {
+ merged = true;
+ }
+ }
+ }
}
//else no solution
}
@@ -319,6 +339,15 @@ void HTMLRenderer::check_state_change(GfxState * state)
if(merged)
{
text_line_buf->append_offset(dx * draw_text_scale);
+ if(equal(dy, 0))
+ {
+ cur_html_state.vertical_align = 0;
+ }
+ else
+ {
+ cur_html_state.vertical_align = (dy * draw_text_scale);
+ new_line_state = max(new_line_state, NLS_SPAN);
+ }
draw_tx = cur_tx;
draw_ty = cur_ty;
}
@@ -419,6 +448,7 @@ void HTMLRenderer::prepare_text_line(GfxState * state)
// update position such that they will be recorded by text_line_buf
state->transform(state->getCurX(), state->getCurY(), &cur_html_state.x, &cur_html_state.y);
+ cur_html_state.vertical_align = 0;
//resync position
draw_ty = cur_ty;
diff --git a/src/css_class_names.cmakelists.txt b/src/css_class_names.cmakelists.txt
index 0eb0ec7..0158e44 100644
--- a/src/css_class_names.cmakelists.txt
+++ b/src/css_class_names.cmakelists.txt
@@ -6,29 +6,29 @@
set(CSS_INVALID_ID "_")
-set(CSS_LINE_CN "t") # text
-set(CSS_TRANSFORM_MATRIX_CN "m") # matrix
+set(CSS_LINE_CN "t") # Text
+set(CSS_TRANSFORM_MATRIX_CN "m") # Matrix
-set(CSS_PAGE_DECORATION_CN "pd") # page decoration
-set(CSS_PAGE_FRAME_CN "pf") # page frame
-set(CSS_PAGE_CONTENT_BOX_CN "pc") # page content
-set(CSS_PAGE_DATA_CN "pi") # page info
+set(CSS_PAGE_DECORATION_CN "pd") # Page Decoration
+set(CSS_PAGE_FRAME_CN "pf") # Page Frame
+set(CSS_PAGE_CONTENT_BOX_CN "pc") # Page Content
+set(CSS_PAGE_DATA_CN "pi") # Page Info
-set(CSS_BACKGROUND_IMAGE_CN "bi") # background image
+set(CSS_BACKGROUND_IMAGE_CN "bi") # Background Image
-set(CSS_FONT_FAMILY_CN "ff") # font family
-set(CSS_FONT_SIZE_CN "fs") # font size
+set(CSS_FONT_FAMILY_CN "ff") # Font Family
+set(CSS_FONT_SIZE_CN "fs") # Font Size
-set(CSS_FILL_COLOR_CN "fc") # fill color
-set(CSS_STROKE_COLOR_CN "sc") # stroke color
+set(CSS_FILL_COLOR_CN "fc") # Fill Color
+set(CSS_STROKE_COLOR_CN "sc") # Stroke Color
-set(CSS_LETTER_SPACE_CN "ls") # letter space
-set(CSS_WORD_SPACE_CN "ws") # word space
-set(CSS_RISE_CN "r") # rise
+set(CSS_LETTER_SPACE_CN "ls") # Letter Space
+set(CSS_WORD_SPACE_CN "ws") # Word Space
+set(CSS_VERTICAL_ALIGN_CN "v") # Vertial align
set(CSS_WHITESPACE_CN "_") # whitespace
-set(CSS_LEFT_CN "x") # x
-set(CSS_HEIGHT_CN "h") # height
-set(CSS_WIDTH_CN "w") # width
-set(CSS_BOTTTOM_CN "y") # y
-set(CSS_CSS_DRAW_CN "d") # draw
-set(CSS_LINK_CN "l") # link
+set(CSS_LEFT_CN "x") # X
+set(CSS_HEIGHT_CN "h") # Height
+set(CSS_WIDTH_CN "w") # Width
+set(CSS_BOTTTOM_CN "y") # Y
+set(CSS_CSS_DRAW_CN "d") # Draw
+set(CSS_LINK_CN "l") # Link
diff --git a/src/util/StateManager.h b/src/util/StateManager.h
index 87dca8c..ca65d8d 100644
--- a/src/util/StateManager.h
+++ b/src/util/StateManager.h
@@ -231,10 +231,10 @@ public:
void dump_print_value(std::ostream & out, double value, double scale) { out << "word-spacing:" << round(value*scale) << "pt;"; }
};
-class RiseManager : public StateManager
+class VerticalAlignManager : public StateManager
{
public:
- static const char * get_css_class_name (void) { return CSS::RISE_CN; }
+ static const char * get_css_class_name (void) { return CSS::VERTICAL_ALIGN_CN; }
double default_value(void) { return 0; }
void dump_value(std::ostream & out, double value) { out << "vertical-align:" << round(value) << "px;"; }
void dump_print_value(std::ostream & out, double value, double scale) { out << "vertical-align:" << round(value*scale) << "pt;"; }
diff --git a/src/util/css_const.h.in b/src/util/css_const.h.in
index ed3221d..260f898 100644
--- a/src/util/css_const.h.in
+++ b/src/util/css_const.h.in
@@ -43,7 +43,7 @@ const char * const FILL_COLOR_CN = "@CSS_FILL_COLOR_CN@";
const char * const STROKE_COLOR_CN = "@CSS_STROKE_COLOR_CN@";
const char * const LETTER_SPACE_CN = "@CSS_LETTER_SPACE_CN@";
const char * const WORD_SPACE_CN = "@CSS_WORD_SPACE_CN@";
-const char * const RISE_CN = "@CSS_RISE_CN@";
+const char * const VERTICAL_ALIGN_CN = "@CSS_VERTICAL_ALIGN_CN@";
const char * const WHITESPACE_CN = "@CSS_WHITESPACE_CN@";
const char * const LEFT_CN = "@CSS_LEFT_CN@";
const char * const HEIGHT_CN = "@CSS_HEIGHT_CN@";