1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 13:00:08 +00:00

merge sub/sup into one line

This commit is contained in:
Lu Wang 2013-04-05 21:53:34 +08:00
parent 623de175f5
commit ab2588a5fd
8 changed files with 121 additions and 54 deletions

View File

@ -50,7 +50,9 @@ struct HTMLState
Color stroke_color;
double letter_space;
double word_space;
double rise;
// relative to the previous state
double vertical_align;
double x,y;
double transform_matrix[4];
@ -337,6 +339,7 @@ protected:
// managers store values actually used in HTML (i.e. scaled)
////////////////////////////////////////////////
TransformMatrixManager transform_matrix_manager;
VerticalAlignManager vertical_align_manager;
StrokeColorManager stroke_color_manager;
LetterSpaceManager letter_space_manager;
WhitespaceManager whitespace_manager;
@ -346,7 +349,6 @@ protected:
BottomManager bottom_manager;
HeightManager height_manager;
WidthManager width_manager;
RiseManager rise_manager;
LeftManager left_manager;
////////////////////////////////////////////////
BGImageSizeManager bgimage_size_manager;

View File

@ -91,22 +91,14 @@ void HTMLRenderer::TextLineBuffer::flush(void)
ostream & out = renderer->f_pages.fs;
{
// max_ascent determines the height of the div
double accum_vertical_align = 0; // accumulated
double max_ascent = 0;
for(auto iter = states.begin(); iter != states.end(); ++iter)
{
double cur_ascent = iter->rise + iter->font_info->ascent * iter->font_size;
accum_vertical_align += iter->vertical_align;
double cur_ascent = accum_vertical_align + iter->font_info->ascent * iter->font_size;
if(cur_ascent > max_ascent)
max_ascent = cur_ascent;
// set id
iter->ids[State::FONT_ID] = iter->font_info->id;
iter->ids[State::FONT_SIZE_ID] = renderer->font_size_manager .install(iter->font_size);
iter->ids[State::FILL_COLOR_ID] = renderer->fill_color_manager .install(iter->fill_color);
iter->ids[State::STROKE_COLOR_ID] = renderer->stroke_color_manager.install(iter->stroke_color);
iter->ids[State::LETTER_SPACE_ID] = renderer->letter_space_manager.install(iter->letter_space);
iter->ids[State::WORD_SPACE_ID] = renderer->word_space_manager .install(iter->word_space);
iter->ids[State::RISE_ID] = renderer->rise_manager .install(iter->rise);
iter->hash();
}
// open <div> for the current text line
@ -138,20 +130,36 @@ void HTMLRenderer::TextLineBuffer::flush(void)
{
// export current state, find a closest parent
{
// set id
state_iter1->ids[State::FONT_ID] = state_iter1->font_info->id;
state_iter1->ids[State::FONT_SIZE_ID] = renderer->font_size_manager .install(state_iter1->font_size);
state_iter1->ids[State::FILL_COLOR_ID] = renderer->fill_color_manager .install(state_iter1->fill_color);
state_iter1->ids[State::STROKE_COLOR_ID] = renderer->stroke_color_manager .install(state_iter1->stroke_color);
state_iter1->ids[State::LETTER_SPACE_ID] = renderer->letter_space_manager .install(state_iter1->letter_space);
state_iter1->ids[State::WORD_SPACE_ID] = renderer->word_space_manager .install(state_iter1->word_space);
state_iter1->hash();
// greedy
int best_cost = State::ID_COUNT;
double vertical_align = state_iter1->vertical_align;
int best_cost = State::HASH_ID_COUNT + 1;
// we have a nullptr at the beginning, so no need to check for rend
for(auto iter = stack.rbegin(); *iter; ++iter)
{
int cost = state_iter1->diff(**iter);
if(!equal(vertical_align,0))
++cost;
if(cost < best_cost)
{
while(stack.back() != *iter)
{
state_iter1->vertical_align += stack.back()->vertical_align;
stack.back()->end(out);
stack.pop_back();
}
best_cost = cost;
state_iter1->vertical_align = vertical_align;
if(best_cost == 0)
break;
@ -160,7 +168,11 @@ void HTMLRenderer::TextLineBuffer::flush(void)
// cannot go further
if((*iter)->start_idx <= last_text_pos_with_negative_offset)
break;
vertical_align += (*iter)->vertical_align;
}
//
state_iter1->ids[State::VERTICAL_ALIGN_ID] = renderer->vertical_align_manager.install(state_iter1->vertical_align);
// export the diff between *state_iter1 and stack.back()
state_iter1->begin(out, stack.back());
stack.push_back(&*state_iter1);
@ -338,9 +350,8 @@ void HTMLRenderer::TextLineBuffer::optimize()
}
}
// now we would like to adjust letter space to most_used width
// we shall apply the optimization only when it can significantly reduce the number of elements
if(max_count <= text_count / 2)
// negative letter space may cause problems
if(!is_positive(state_iter1->letter_space + most_used_width))
{
// the old value is the best
// just copy old offsets
@ -348,6 +359,8 @@ void HTMLRenderer::TextLineBuffer::optimize()
}
else
{
// now we would like to adjust letter space to most_used width
// install new letter space
const double old_ls = state_iter1->letter_space;
state_iter1->ids[State::LETTER_SPACE_ID] = ls_manager.install(old_ls + most_used_width, &(state_iter1->letter_space));
@ -439,7 +452,7 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr
{
long long cur_mask = 0xff;
bool first = true;
for(int i = 0; i < ID_COUNT; ++i, cur_mask<<=8)
for(int i = 0; i < HASH_ID_COUNT; ++i, cur_mask<<=8)
{
if(hash_umask & cur_mask) // we don't care about this ID
{
@ -463,10 +476,8 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr
case WORD_SPACE_ID:
word_space = prev_state->word_space;
break;
case RISE_ID:
rise = prev_state->rise;
break;
default:
cerr << "unexpected state mask" << endl;
break;
}
}
@ -495,6 +506,28 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr
else
out << ids[i];
}
// veritcal align
if(!equal(vertical_align, 0))
{
// so we have to dump it
if(first)
{
out << "<span class=\"";
first = false;
}
else
{
out << ' ';
}
// out should have hex set
out << CSS::VERTICAL_ALIGN_CN;
auto id = ids[VERTICAL_ALIGN_ID];
if (id == -1)
out << CSS::INVALID_ID;
else
out << id;
}
if(first) // we actually just inherit the whole prev_state
{
@ -511,8 +544,9 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr
// prev_state == nullptr
// which means this is the first state of the line
// there should be a open pending <div> left there
// it is not necessary to output vertical align
long long cur_mask = 0xff;
for(int i = 0; i < ID_COUNT; ++i, cur_mask<<=8)
for(int i = 0; i < HASH_ID_COUNT; ++i, cur_mask<<=8)
{
if(hash_umask & cur_mask) // we don't care about this ID
continue;
@ -591,7 +625,7 @@ const char * const HTMLRenderer::TextLineBuffer::State::css_class_names [] = {
CSS::STROKE_COLOR_CN,
CSS::LETTER_SPACE_CN,
CSS::WORD_SPACE_CN,
CSS::RISE_CN
CSS::VERTICAL_ALIGN_CN,
};
} //namespace pdf2htmlEX

View File

@ -41,8 +41,9 @@ public:
STROKE_COLOR_ID,
LETTER_SPACE_ID,
WORD_SPACE_ID,
RISE_ID,
HASH_ID_COUNT,
VERTICAL_ALIGN_ID = HASH_ID_COUNT,
ID_COUNT
};

View File

@ -61,9 +61,9 @@ HTMLRenderer::HTMLRenderer(const Param * param)
* or may be handled well (whitespace_manager)
* So we can set a large eps here
*/
rise_manager .set_eps(param->v_eps);
whitespace_manager .set_eps(param->h_eps);
left_manager .set_eps(param->h_eps);
vertical_align_manager.set_eps(param->v_eps);
whitespace_manager .set_eps(param->h_eps);
left_manager .set_eps(param->h_eps);
/*
* For othere states, we need accurate values
* optimization will be done separately
@ -446,6 +446,7 @@ void HTMLRenderer::set_stream_flags(std::ostream & out)
void HTMLRenderer::dump_css (void)
{
transform_matrix_manager.dump_css(f_css.fs);
vertical_align_manager .dump_css(f_css.fs);
letter_space_manager .dump_css(f_css.fs);
stroke_color_manager .dump_css(f_css.fs);
word_space_manager .dump_css(f_css.fs);
@ -455,7 +456,6 @@ void HTMLRenderer::dump_css (void)
bottom_manager .dump_css(f_css.fs);
height_manager .dump_css(f_css.fs);
width_manager .dump_css(f_css.fs);
rise_manager .dump_css(f_css.fs);
left_manager .dump_css(f_css.fs);
bgimage_size_manager .dump_css(f_css.fs);
@ -463,6 +463,7 @@ void HTMLRenderer::dump_css (void)
double ps = print_scale();
f_css.fs << CSS::PRINT_ONLY << "{" << endl;
transform_matrix_manager.dump_print_css(f_css.fs, ps);
vertical_align_manager .dump_print_css(f_css.fs, ps);
letter_space_manager .dump_print_css(f_css.fs, ps);
stroke_color_manager .dump_print_css(f_css.fs, ps);
word_space_manager .dump_print_css(f_css.fs, ps);
@ -472,7 +473,6 @@ void HTMLRenderer::dump_css (void)
bottom_manager .dump_print_css(f_css.fs, ps);
height_manager .dump_print_css(f_css.fs, ps);
width_manager .dump_print_css(f_css.fs, ps);
rise_manager .dump_print_css(f_css.fs, ps);
left_manager .dump_print_css(f_css.fs, ps);
bgimage_size_manager .dump_print_css(f_css.fs, ps);
f_css.fs << "}" << endl;

View File

@ -101,7 +101,7 @@ void HTMLRenderer::reset_state()
cur_html_state.stroke_color.transparent = true;
cur_html_state.letter_space = 0;
cur_html_state.word_space = 0;
cur_html_state.rise = 0;
cur_html_state.vertical_align = 0;
cur_html_state.x = 0;
cur_html_state.y = 0;
memcpy(cur_html_state.transform_matrix, ID_MATRIX, sizeof(cur_html_state.transform_matrix));
@ -148,6 +148,9 @@ void HTMLRenderer::check_state_change(GfxState * state)
need_recheck_position = true;
}
// save current info for later use
auto old_font_info = cur_html_state.font_info;
double old_font_size = cur_html_state.font_size;
// font name & size
if(all_changed || font_changed)
{
@ -306,11 +309,28 @@ void HTMLRenderer::check_state_change(GfxState * state)
inverted[3] = old_tm[0] / det;
dx = inverted[0] * lhs1 + inverted[2] * lhs2;
dy = inverted[1] * lhs1 + inverted[3] * lhs2;
// currently we merge only text on a same horizontal line
if(equal(dy, 0))
{
// text on a same horizontal line, we can insert positive or negaive x-offsets
merged = true;
}
else
{
// otherwise we merge the lines only when
// - text are not shifted to the left too much
// - text are not moved too high or too low
if((dx * draw_text_scale) >= -(old_font_info->ascent - old_font_info->descent) * old_font_size - EPS)
{
double oldymin = old_font_info->descent * old_font_size;
double oldymax = old_font_info->ascent * old_font_size;
double ymin = dy * draw_text_scale + cur_html_state.font_info->descent * cur_html_state.font_size;
double ymax = dy * draw_text_scale + cur_html_state.font_info->ascent * cur_html_state.font_size;
if((ymin <= oldymax + EPS) && (ymax >= oldymin - EPS))
{
merged = true;
}
}
}
}
//else no solution
}
@ -319,6 +339,15 @@ void HTMLRenderer::check_state_change(GfxState * state)
if(merged)
{
text_line_buf->append_offset(dx * draw_text_scale);
if(equal(dy, 0))
{
cur_html_state.vertical_align = 0;
}
else
{
cur_html_state.vertical_align = (dy * draw_text_scale);
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
}
draw_tx = cur_tx;
draw_ty = cur_ty;
}
@ -419,6 +448,7 @@ void HTMLRenderer::prepare_text_line(GfxState * state)
// update position such that they will be recorded by text_line_buf
state->transform(state->getCurX(), state->getCurY(), &cur_html_state.x, &cur_html_state.y);
cur_html_state.vertical_align = 0;
//resync position
draw_ty = cur_ty;

View File

@ -6,29 +6,29 @@
set(CSS_INVALID_ID "_")
set(CSS_LINE_CN "t") # text
set(CSS_TRANSFORM_MATRIX_CN "m") # matrix
set(CSS_LINE_CN "t") # Text
set(CSS_TRANSFORM_MATRIX_CN "m") # Matrix
set(CSS_PAGE_DECORATION_CN "pd") # page decoration
set(CSS_PAGE_FRAME_CN "pf") # page frame
set(CSS_PAGE_CONTENT_BOX_CN "pc") # page content
set(CSS_PAGE_DATA_CN "pi") # page info
set(CSS_PAGE_DECORATION_CN "pd") # Page Decoration
set(CSS_PAGE_FRAME_CN "pf") # Page Frame
set(CSS_PAGE_CONTENT_BOX_CN "pc") # Page Content
set(CSS_PAGE_DATA_CN "pi") # Page Info
set(CSS_BACKGROUND_IMAGE_CN "bi") # background image
set(CSS_BACKGROUND_IMAGE_CN "bi") # Background Image
set(CSS_FONT_FAMILY_CN "ff") # font family
set(CSS_FONT_SIZE_CN "fs") # font size
set(CSS_FONT_FAMILY_CN "ff") # Font Family
set(CSS_FONT_SIZE_CN "fs") # Font Size
set(CSS_FILL_COLOR_CN "fc") # fill color
set(CSS_STROKE_COLOR_CN "sc") # stroke color
set(CSS_FILL_COLOR_CN "fc") # Fill Color
set(CSS_STROKE_COLOR_CN "sc") # Stroke Color
set(CSS_LETTER_SPACE_CN "ls") # letter space
set(CSS_WORD_SPACE_CN "ws") # word space
set(CSS_RISE_CN "r") # rise
set(CSS_LETTER_SPACE_CN "ls") # Letter Space
set(CSS_WORD_SPACE_CN "ws") # Word Space
set(CSS_VERTICAL_ALIGN_CN "v") # Vertial align
set(CSS_WHITESPACE_CN "_") # whitespace
set(CSS_LEFT_CN "x") # x
set(CSS_HEIGHT_CN "h") # height
set(CSS_WIDTH_CN "w") # width
set(CSS_BOTTTOM_CN "y") # y
set(CSS_CSS_DRAW_CN "d") # draw
set(CSS_LINK_CN "l") # link
set(CSS_LEFT_CN "x") # X
set(CSS_HEIGHT_CN "h") # Height
set(CSS_WIDTH_CN "w") # Width
set(CSS_BOTTTOM_CN "y") # Y
set(CSS_CSS_DRAW_CN "d") # Draw
set(CSS_LINK_CN "l") # Link

View File

@ -231,10 +231,10 @@ public:
void dump_print_value(std::ostream & out, double value, double scale) { out << "word-spacing:" << round(value*scale) << "pt;"; }
};
class RiseManager : public StateManager<double, RiseManager>
class VerticalAlignManager : public StateManager<double, VerticalAlignManager>
{
public:
static const char * get_css_class_name (void) { return CSS::RISE_CN; }
static const char * get_css_class_name (void) { return CSS::VERTICAL_ALIGN_CN; }
double default_value(void) { return 0; }
void dump_value(std::ostream & out, double value) { out << "vertical-align:" << round(value) << "px;"; }
void dump_print_value(std::ostream & out, double value, double scale) { out << "vertical-align:" << round(value*scale) << "pt;"; }

View File

@ -43,7 +43,7 @@ const char * const FILL_COLOR_CN = "@CSS_FILL_COLOR_CN@";
const char * const STROKE_COLOR_CN = "@CSS_STROKE_COLOR_CN@";
const char * const LETTER_SPACE_CN = "@CSS_LETTER_SPACE_CN@";
const char * const WORD_SPACE_CN = "@CSS_WORD_SPACE_CN@";
const char * const RISE_CN = "@CSS_RISE_CN@";
const char * const VERTICAL_ALIGN_CN = "@CSS_VERTICAL_ALIGN_CN@";
const char * const WHITESPACE_CN = "@CSS_WHITESPACE_CN@";
const char * const LEFT_CN = "@CSS_LEFT_CN@";
const char * const HEIGHT_CN = "@CSS_HEIGHT_CN@";