1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-07-05 09:38:40 +00:00

merge sub/sup into one line

This commit is contained in:
Lu Wang 2013-04-05 21:53:34 +08:00
parent 623de175f5
commit ab2588a5fd
8 changed files with 121 additions and 54 deletions

View File

@ -50,7 +50,9 @@ struct HTMLState
Color stroke_color; Color stroke_color;
double letter_space; double letter_space;
double word_space; double word_space;
double rise;
// relative to the previous state
double vertical_align;
double x,y; double x,y;
double transform_matrix[4]; double transform_matrix[4];
@ -337,6 +339,7 @@ protected:
// managers store values actually used in HTML (i.e. scaled) // managers store values actually used in HTML (i.e. scaled)
//////////////////////////////////////////////// ////////////////////////////////////////////////
TransformMatrixManager transform_matrix_manager; TransformMatrixManager transform_matrix_manager;
VerticalAlignManager vertical_align_manager;
StrokeColorManager stroke_color_manager; StrokeColorManager stroke_color_manager;
LetterSpaceManager letter_space_manager; LetterSpaceManager letter_space_manager;
WhitespaceManager whitespace_manager; WhitespaceManager whitespace_manager;
@ -346,7 +349,6 @@ protected:
BottomManager bottom_manager; BottomManager bottom_manager;
HeightManager height_manager; HeightManager height_manager;
WidthManager width_manager; WidthManager width_manager;
RiseManager rise_manager;
LeftManager left_manager; LeftManager left_manager;
//////////////////////////////////////////////// ////////////////////////////////////////////////
BGImageSizeManager bgimage_size_manager; BGImageSizeManager bgimage_size_manager;

View File

@ -91,22 +91,14 @@ void HTMLRenderer::TextLineBuffer::flush(void)
ostream & out = renderer->f_pages.fs; ostream & out = renderer->f_pages.fs;
{ {
// max_ascent determines the height of the div // max_ascent determines the height of the div
double accum_vertical_align = 0; // accumulated
double max_ascent = 0; double max_ascent = 0;
for(auto iter = states.begin(); iter != states.end(); ++iter) for(auto iter = states.begin(); iter != states.end(); ++iter)
{ {
double cur_ascent = iter->rise + iter->font_info->ascent * iter->font_size; accum_vertical_align += iter->vertical_align;
double cur_ascent = accum_vertical_align + iter->font_info->ascent * iter->font_size;
if(cur_ascent > max_ascent) if(cur_ascent > max_ascent)
max_ascent = cur_ascent; max_ascent = cur_ascent;
// set id
iter->ids[State::FONT_ID] = iter->font_info->id;
iter->ids[State::FONT_SIZE_ID] = renderer->font_size_manager .install(iter->font_size);
iter->ids[State::FILL_COLOR_ID] = renderer->fill_color_manager .install(iter->fill_color);
iter->ids[State::STROKE_COLOR_ID] = renderer->stroke_color_manager.install(iter->stroke_color);
iter->ids[State::LETTER_SPACE_ID] = renderer->letter_space_manager.install(iter->letter_space);
iter->ids[State::WORD_SPACE_ID] = renderer->word_space_manager .install(iter->word_space);
iter->ids[State::RISE_ID] = renderer->rise_manager .install(iter->rise);
iter->hash();
} }
// open <div> for the current text line // open <div> for the current text line
@ -138,20 +130,36 @@ void HTMLRenderer::TextLineBuffer::flush(void)
{ {
// export current state, find a closest parent // export current state, find a closest parent
{ {
// set id
state_iter1->ids[State::FONT_ID] = state_iter1->font_info->id;
state_iter1->ids[State::FONT_SIZE_ID] = renderer->font_size_manager .install(state_iter1->font_size);
state_iter1->ids[State::FILL_COLOR_ID] = renderer->fill_color_manager .install(state_iter1->fill_color);
state_iter1->ids[State::STROKE_COLOR_ID] = renderer->stroke_color_manager .install(state_iter1->stroke_color);
state_iter1->ids[State::LETTER_SPACE_ID] = renderer->letter_space_manager .install(state_iter1->letter_space);
state_iter1->ids[State::WORD_SPACE_ID] = renderer->word_space_manager .install(state_iter1->word_space);
state_iter1->hash();
// greedy // greedy
int best_cost = State::ID_COUNT; double vertical_align = state_iter1->vertical_align;
int best_cost = State::HASH_ID_COUNT + 1;
// we have a nullptr at the beginning, so no need to check for rend // we have a nullptr at the beginning, so no need to check for rend
for(auto iter = stack.rbegin(); *iter; ++iter) for(auto iter = stack.rbegin(); *iter; ++iter)
{ {
int cost = state_iter1->diff(**iter); int cost = state_iter1->diff(**iter);
if(!equal(vertical_align,0))
++cost;
if(cost < best_cost) if(cost < best_cost)
{ {
while(stack.back() != *iter) while(stack.back() != *iter)
{ {
state_iter1->vertical_align += stack.back()->vertical_align;
stack.back()->end(out); stack.back()->end(out);
stack.pop_back(); stack.pop_back();
} }
best_cost = cost; best_cost = cost;
state_iter1->vertical_align = vertical_align;
if(best_cost == 0) if(best_cost == 0)
break; break;
@ -160,7 +168,11 @@ void HTMLRenderer::TextLineBuffer::flush(void)
// cannot go further // cannot go further
if((*iter)->start_idx <= last_text_pos_with_negative_offset) if((*iter)->start_idx <= last_text_pos_with_negative_offset)
break; break;
vertical_align += (*iter)->vertical_align;
} }
//
state_iter1->ids[State::VERTICAL_ALIGN_ID] = renderer->vertical_align_manager.install(state_iter1->vertical_align);
// export the diff between *state_iter1 and stack.back() // export the diff between *state_iter1 and stack.back()
state_iter1->begin(out, stack.back()); state_iter1->begin(out, stack.back());
stack.push_back(&*state_iter1); stack.push_back(&*state_iter1);
@ -338,9 +350,8 @@ void HTMLRenderer::TextLineBuffer::optimize()
} }
} }
// now we would like to adjust letter space to most_used width // negative letter space may cause problems
// we shall apply the optimization only when it can significantly reduce the number of elements if(!is_positive(state_iter1->letter_space + most_used_width))
if(max_count <= text_count / 2)
{ {
// the old value is the best // the old value is the best
// just copy old offsets // just copy old offsets
@ -348,6 +359,8 @@ void HTMLRenderer::TextLineBuffer::optimize()
} }
else else
{ {
// now we would like to adjust letter space to most_used width
// install new letter space // install new letter space
const double old_ls = state_iter1->letter_space; const double old_ls = state_iter1->letter_space;
state_iter1->ids[State::LETTER_SPACE_ID] = ls_manager.install(old_ls + most_used_width, &(state_iter1->letter_space)); state_iter1->ids[State::LETTER_SPACE_ID] = ls_manager.install(old_ls + most_used_width, &(state_iter1->letter_space));
@ -439,7 +452,7 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr
{ {
long long cur_mask = 0xff; long long cur_mask = 0xff;
bool first = true; bool first = true;
for(int i = 0; i < ID_COUNT; ++i, cur_mask<<=8) for(int i = 0; i < HASH_ID_COUNT; ++i, cur_mask<<=8)
{ {
if(hash_umask & cur_mask) // we don't care about this ID if(hash_umask & cur_mask) // we don't care about this ID
{ {
@ -463,10 +476,8 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr
case WORD_SPACE_ID: case WORD_SPACE_ID:
word_space = prev_state->word_space; word_space = prev_state->word_space;
break; break;
case RISE_ID:
rise = prev_state->rise;
break;
default: default:
cerr << "unexpected state mask" << endl;
break; break;
} }
} }
@ -495,6 +506,28 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr
else else
out << ids[i]; out << ids[i];
} }
// veritcal align
if(!equal(vertical_align, 0))
{
// so we have to dump it
if(first)
{
out << "<span class=\"";
first = false;
}
else
{
out << ' ';
}
// out should have hex set
out << CSS::VERTICAL_ALIGN_CN;
auto id = ids[VERTICAL_ALIGN_ID];
if (id == -1)
out << CSS::INVALID_ID;
else
out << id;
}
if(first) // we actually just inherit the whole prev_state if(first) // we actually just inherit the whole prev_state
{ {
@ -511,8 +544,9 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr
// prev_state == nullptr // prev_state == nullptr
// which means this is the first state of the line // which means this is the first state of the line
// there should be a open pending <div> left there // there should be a open pending <div> left there
// it is not necessary to output vertical align
long long cur_mask = 0xff; long long cur_mask = 0xff;
for(int i = 0; i < ID_COUNT; ++i, cur_mask<<=8) for(int i = 0; i < HASH_ID_COUNT; ++i, cur_mask<<=8)
{ {
if(hash_umask & cur_mask) // we don't care about this ID if(hash_umask & cur_mask) // we don't care about this ID
continue; continue;
@ -591,7 +625,7 @@ const char * const HTMLRenderer::TextLineBuffer::State::css_class_names [] = {
CSS::STROKE_COLOR_CN, CSS::STROKE_COLOR_CN,
CSS::LETTER_SPACE_CN, CSS::LETTER_SPACE_CN,
CSS::WORD_SPACE_CN, CSS::WORD_SPACE_CN,
CSS::RISE_CN CSS::VERTICAL_ALIGN_CN,
}; };
} //namespace pdf2htmlEX } //namespace pdf2htmlEX

View File

@ -41,8 +41,9 @@ public:
STROKE_COLOR_ID, STROKE_COLOR_ID,
LETTER_SPACE_ID, LETTER_SPACE_ID,
WORD_SPACE_ID, WORD_SPACE_ID,
RISE_ID, HASH_ID_COUNT,
VERTICAL_ALIGN_ID = HASH_ID_COUNT,
ID_COUNT ID_COUNT
}; };

View File

@ -61,9 +61,9 @@ HTMLRenderer::HTMLRenderer(const Param * param)
* or may be handled well (whitespace_manager) * or may be handled well (whitespace_manager)
* So we can set a large eps here * So we can set a large eps here
*/ */
rise_manager .set_eps(param->v_eps); vertical_align_manager.set_eps(param->v_eps);
whitespace_manager .set_eps(param->h_eps); whitespace_manager .set_eps(param->h_eps);
left_manager .set_eps(param->h_eps); left_manager .set_eps(param->h_eps);
/* /*
* For othere states, we need accurate values * For othere states, we need accurate values
* optimization will be done separately * optimization will be done separately
@ -446,6 +446,7 @@ void HTMLRenderer::set_stream_flags(std::ostream & out)
void HTMLRenderer::dump_css (void) void HTMLRenderer::dump_css (void)
{ {
transform_matrix_manager.dump_css(f_css.fs); transform_matrix_manager.dump_css(f_css.fs);
vertical_align_manager .dump_css(f_css.fs);
letter_space_manager .dump_css(f_css.fs); letter_space_manager .dump_css(f_css.fs);
stroke_color_manager .dump_css(f_css.fs); stroke_color_manager .dump_css(f_css.fs);
word_space_manager .dump_css(f_css.fs); word_space_manager .dump_css(f_css.fs);
@ -455,7 +456,6 @@ void HTMLRenderer::dump_css (void)
bottom_manager .dump_css(f_css.fs); bottom_manager .dump_css(f_css.fs);
height_manager .dump_css(f_css.fs); height_manager .dump_css(f_css.fs);
width_manager .dump_css(f_css.fs); width_manager .dump_css(f_css.fs);
rise_manager .dump_css(f_css.fs);
left_manager .dump_css(f_css.fs); left_manager .dump_css(f_css.fs);
bgimage_size_manager .dump_css(f_css.fs); bgimage_size_manager .dump_css(f_css.fs);
@ -463,6 +463,7 @@ void HTMLRenderer::dump_css (void)
double ps = print_scale(); double ps = print_scale();
f_css.fs << CSS::PRINT_ONLY << "{" << endl; f_css.fs << CSS::PRINT_ONLY << "{" << endl;
transform_matrix_manager.dump_print_css(f_css.fs, ps); transform_matrix_manager.dump_print_css(f_css.fs, ps);
vertical_align_manager .dump_print_css(f_css.fs, ps);
letter_space_manager .dump_print_css(f_css.fs, ps); letter_space_manager .dump_print_css(f_css.fs, ps);
stroke_color_manager .dump_print_css(f_css.fs, ps); stroke_color_manager .dump_print_css(f_css.fs, ps);
word_space_manager .dump_print_css(f_css.fs, ps); word_space_manager .dump_print_css(f_css.fs, ps);
@ -472,7 +473,6 @@ void HTMLRenderer::dump_css (void)
bottom_manager .dump_print_css(f_css.fs, ps); bottom_manager .dump_print_css(f_css.fs, ps);
height_manager .dump_print_css(f_css.fs, ps); height_manager .dump_print_css(f_css.fs, ps);
width_manager .dump_print_css(f_css.fs, ps); width_manager .dump_print_css(f_css.fs, ps);
rise_manager .dump_print_css(f_css.fs, ps);
left_manager .dump_print_css(f_css.fs, ps); left_manager .dump_print_css(f_css.fs, ps);
bgimage_size_manager .dump_print_css(f_css.fs, ps); bgimage_size_manager .dump_print_css(f_css.fs, ps);
f_css.fs << "}" << endl; f_css.fs << "}" << endl;

View File

@ -101,7 +101,7 @@ void HTMLRenderer::reset_state()
cur_html_state.stroke_color.transparent = true; cur_html_state.stroke_color.transparent = true;
cur_html_state.letter_space = 0; cur_html_state.letter_space = 0;
cur_html_state.word_space = 0; cur_html_state.word_space = 0;
cur_html_state.rise = 0; cur_html_state.vertical_align = 0;
cur_html_state.x = 0; cur_html_state.x = 0;
cur_html_state.y = 0; cur_html_state.y = 0;
memcpy(cur_html_state.transform_matrix, ID_MATRIX, sizeof(cur_html_state.transform_matrix)); memcpy(cur_html_state.transform_matrix, ID_MATRIX, sizeof(cur_html_state.transform_matrix));
@ -148,6 +148,9 @@ void HTMLRenderer::check_state_change(GfxState * state)
need_recheck_position = true; need_recheck_position = true;
} }
// save current info for later use
auto old_font_info = cur_html_state.font_info;
double old_font_size = cur_html_state.font_size;
// font name & size // font name & size
if(all_changed || font_changed) if(all_changed || font_changed)
{ {
@ -306,11 +309,28 @@ void HTMLRenderer::check_state_change(GfxState * state)
inverted[3] = old_tm[0] / det; inverted[3] = old_tm[0] / det;
dx = inverted[0] * lhs1 + inverted[2] * lhs2; dx = inverted[0] * lhs1 + inverted[2] * lhs2;
dy = inverted[1] * lhs1 + inverted[3] * lhs2; dy = inverted[1] * lhs1 + inverted[3] * lhs2;
// currently we merge only text on a same horizontal line
if(equal(dy, 0)) if(equal(dy, 0))
{ {
// text on a same horizontal line, we can insert positive or negaive x-offsets
merged = true; merged = true;
} }
else
{
// otherwise we merge the lines only when
// - text are not shifted to the left too much
// - text are not moved too high or too low
if((dx * draw_text_scale) >= -(old_font_info->ascent - old_font_info->descent) * old_font_size - EPS)
{
double oldymin = old_font_info->descent * old_font_size;
double oldymax = old_font_info->ascent * old_font_size;
double ymin = dy * draw_text_scale + cur_html_state.font_info->descent * cur_html_state.font_size;
double ymax = dy * draw_text_scale + cur_html_state.font_info->ascent * cur_html_state.font_size;
if((ymin <= oldymax + EPS) && (ymax >= oldymin - EPS))
{
merged = true;
}
}
}
} }
//else no solution //else no solution
} }
@ -319,6 +339,15 @@ void HTMLRenderer::check_state_change(GfxState * state)
if(merged) if(merged)
{ {
text_line_buf->append_offset(dx * draw_text_scale); text_line_buf->append_offset(dx * draw_text_scale);
if(equal(dy, 0))
{
cur_html_state.vertical_align = 0;
}
else
{
cur_html_state.vertical_align = (dy * draw_text_scale);
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
}
draw_tx = cur_tx; draw_tx = cur_tx;
draw_ty = cur_ty; draw_ty = cur_ty;
} }
@ -419,6 +448,7 @@ void HTMLRenderer::prepare_text_line(GfxState * state)
// update position such that they will be recorded by text_line_buf // update position such that they will be recorded by text_line_buf
state->transform(state->getCurX(), state->getCurY(), &cur_html_state.x, &cur_html_state.y); state->transform(state->getCurX(), state->getCurY(), &cur_html_state.x, &cur_html_state.y);
cur_html_state.vertical_align = 0;
//resync position //resync position
draw_ty = cur_ty; draw_ty = cur_ty;

View File

@ -6,29 +6,29 @@
set(CSS_INVALID_ID "_") set(CSS_INVALID_ID "_")
set(CSS_LINE_CN "t") # text set(CSS_LINE_CN "t") # Text
set(CSS_TRANSFORM_MATRIX_CN "m") # matrix set(CSS_TRANSFORM_MATRIX_CN "m") # Matrix
set(CSS_PAGE_DECORATION_CN "pd") # page decoration set(CSS_PAGE_DECORATION_CN "pd") # Page Decoration
set(CSS_PAGE_FRAME_CN "pf") # page frame set(CSS_PAGE_FRAME_CN "pf") # Page Frame
set(CSS_PAGE_CONTENT_BOX_CN "pc") # page content set(CSS_PAGE_CONTENT_BOX_CN "pc") # Page Content
set(CSS_PAGE_DATA_CN "pi") # page info set(CSS_PAGE_DATA_CN "pi") # Page Info
set(CSS_BACKGROUND_IMAGE_CN "bi") # background image set(CSS_BACKGROUND_IMAGE_CN "bi") # Background Image
set(CSS_FONT_FAMILY_CN "ff") # font family set(CSS_FONT_FAMILY_CN "ff") # Font Family
set(CSS_FONT_SIZE_CN "fs") # font size set(CSS_FONT_SIZE_CN "fs") # Font Size
set(CSS_FILL_COLOR_CN "fc") # fill color set(CSS_FILL_COLOR_CN "fc") # Fill Color
set(CSS_STROKE_COLOR_CN "sc") # stroke color set(CSS_STROKE_COLOR_CN "sc") # Stroke Color
set(CSS_LETTER_SPACE_CN "ls") # letter space set(CSS_LETTER_SPACE_CN "ls") # Letter Space
set(CSS_WORD_SPACE_CN "ws") # word space set(CSS_WORD_SPACE_CN "ws") # Word Space
set(CSS_RISE_CN "r") # rise set(CSS_VERTICAL_ALIGN_CN "v") # Vertial align
set(CSS_WHITESPACE_CN "_") # whitespace set(CSS_WHITESPACE_CN "_") # whitespace
set(CSS_LEFT_CN "x") # x set(CSS_LEFT_CN "x") # X
set(CSS_HEIGHT_CN "h") # height set(CSS_HEIGHT_CN "h") # Height
set(CSS_WIDTH_CN "w") # width set(CSS_WIDTH_CN "w") # Width
set(CSS_BOTTTOM_CN "y") # y set(CSS_BOTTTOM_CN "y") # Y
set(CSS_CSS_DRAW_CN "d") # draw set(CSS_CSS_DRAW_CN "d") # Draw
set(CSS_LINK_CN "l") # link set(CSS_LINK_CN "l") # Link

View File

@ -231,10 +231,10 @@ public:
void dump_print_value(std::ostream & out, double value, double scale) { out << "word-spacing:" << round(value*scale) << "pt;"; } void dump_print_value(std::ostream & out, double value, double scale) { out << "word-spacing:" << round(value*scale) << "pt;"; }
}; };
class RiseManager : public StateManager<double, RiseManager> class VerticalAlignManager : public StateManager<double, VerticalAlignManager>
{ {
public: public:
static const char * get_css_class_name (void) { return CSS::RISE_CN; } static const char * get_css_class_name (void) { return CSS::VERTICAL_ALIGN_CN; }
double default_value(void) { return 0; } double default_value(void) { return 0; }
void dump_value(std::ostream & out, double value) { out << "vertical-align:" << round(value) << "px;"; } void dump_value(std::ostream & out, double value) { out << "vertical-align:" << round(value) << "px;"; }
void dump_print_value(std::ostream & out, double value, double scale) { out << "vertical-align:" << round(value*scale) << "pt;"; } void dump_print_value(std::ostream & out, double value, double scale) { out << "vertical-align:" << round(value*scale) << "pt;"; }

View File

@ -43,7 +43,7 @@ const char * const FILL_COLOR_CN = "@CSS_FILL_COLOR_CN@";
const char * const STROKE_COLOR_CN = "@CSS_STROKE_COLOR_CN@"; const char * const STROKE_COLOR_CN = "@CSS_STROKE_COLOR_CN@";
const char * const LETTER_SPACE_CN = "@CSS_LETTER_SPACE_CN@"; const char * const LETTER_SPACE_CN = "@CSS_LETTER_SPACE_CN@";
const char * const WORD_SPACE_CN = "@CSS_WORD_SPACE_CN@"; const char * const WORD_SPACE_CN = "@CSS_WORD_SPACE_CN@";
const char * const RISE_CN = "@CSS_RISE_CN@"; const char * const VERTICAL_ALIGN_CN = "@CSS_VERTICAL_ALIGN_CN@";
const char * const WHITESPACE_CN = "@CSS_WHITESPACE_CN@"; const char * const WHITESPACE_CN = "@CSS_WHITESPACE_CN@";
const char * const LEFT_CN = "@CSS_LEFT_CN@"; const char * const LEFT_CN = "@CSS_LEFT_CN@";
const char * const HEIGHT_CN = "@CSS_HEIGHT_CN@"; const char * const HEIGHT_CN = "@CSS_HEIGHT_CN@";