mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 13:00:08 +00:00
merge sub/sup into one line
This commit is contained in:
parent
623de175f5
commit
ab2588a5fd
@ -50,7 +50,9 @@ struct HTMLState
|
||||
Color stroke_color;
|
||||
double letter_space;
|
||||
double word_space;
|
||||
double rise;
|
||||
|
||||
// relative to the previous state
|
||||
double vertical_align;
|
||||
|
||||
double x,y;
|
||||
double transform_matrix[4];
|
||||
@ -337,6 +339,7 @@ protected:
|
||||
// managers store values actually used in HTML (i.e. scaled)
|
||||
////////////////////////////////////////////////
|
||||
TransformMatrixManager transform_matrix_manager;
|
||||
VerticalAlignManager vertical_align_manager;
|
||||
StrokeColorManager stroke_color_manager;
|
||||
LetterSpaceManager letter_space_manager;
|
||||
WhitespaceManager whitespace_manager;
|
||||
@ -346,7 +349,6 @@ protected:
|
||||
BottomManager bottom_manager;
|
||||
HeightManager height_manager;
|
||||
WidthManager width_manager;
|
||||
RiseManager rise_manager;
|
||||
LeftManager left_manager;
|
||||
////////////////////////////////////////////////
|
||||
BGImageSizeManager bgimage_size_manager;
|
||||
|
@ -91,22 +91,14 @@ void HTMLRenderer::TextLineBuffer::flush(void)
|
||||
ostream & out = renderer->f_pages.fs;
|
||||
{
|
||||
// max_ascent determines the height of the div
|
||||
double accum_vertical_align = 0; // accumulated
|
||||
double max_ascent = 0;
|
||||
for(auto iter = states.begin(); iter != states.end(); ++iter)
|
||||
{
|
||||
double cur_ascent = iter->rise + iter->font_info->ascent * iter->font_size;
|
||||
accum_vertical_align += iter->vertical_align;
|
||||
double cur_ascent = accum_vertical_align + iter->font_info->ascent * iter->font_size;
|
||||
if(cur_ascent > max_ascent)
|
||||
max_ascent = cur_ascent;
|
||||
|
||||
// set id
|
||||
iter->ids[State::FONT_ID] = iter->font_info->id;
|
||||
iter->ids[State::FONT_SIZE_ID] = renderer->font_size_manager .install(iter->font_size);
|
||||
iter->ids[State::FILL_COLOR_ID] = renderer->fill_color_manager .install(iter->fill_color);
|
||||
iter->ids[State::STROKE_COLOR_ID] = renderer->stroke_color_manager.install(iter->stroke_color);
|
||||
iter->ids[State::LETTER_SPACE_ID] = renderer->letter_space_manager.install(iter->letter_space);
|
||||
iter->ids[State::WORD_SPACE_ID] = renderer->word_space_manager .install(iter->word_space);
|
||||
iter->ids[State::RISE_ID] = renderer->rise_manager .install(iter->rise);
|
||||
iter->hash();
|
||||
}
|
||||
|
||||
// open <div> for the current text line
|
||||
@ -138,20 +130,36 @@ void HTMLRenderer::TextLineBuffer::flush(void)
|
||||
{
|
||||
// export current state, find a closest parent
|
||||
{
|
||||
// set id
|
||||
state_iter1->ids[State::FONT_ID] = state_iter1->font_info->id;
|
||||
state_iter1->ids[State::FONT_SIZE_ID] = renderer->font_size_manager .install(state_iter1->font_size);
|
||||
state_iter1->ids[State::FILL_COLOR_ID] = renderer->fill_color_manager .install(state_iter1->fill_color);
|
||||
state_iter1->ids[State::STROKE_COLOR_ID] = renderer->stroke_color_manager .install(state_iter1->stroke_color);
|
||||
state_iter1->ids[State::LETTER_SPACE_ID] = renderer->letter_space_manager .install(state_iter1->letter_space);
|
||||
state_iter1->ids[State::WORD_SPACE_ID] = renderer->word_space_manager .install(state_iter1->word_space);
|
||||
state_iter1->hash();
|
||||
|
||||
// greedy
|
||||
int best_cost = State::ID_COUNT;
|
||||
double vertical_align = state_iter1->vertical_align;
|
||||
int best_cost = State::HASH_ID_COUNT + 1;
|
||||
// we have a nullptr at the beginning, so no need to check for rend
|
||||
for(auto iter = stack.rbegin(); *iter; ++iter)
|
||||
{
|
||||
int cost = state_iter1->diff(**iter);
|
||||
if(!equal(vertical_align,0))
|
||||
++cost;
|
||||
|
||||
if(cost < best_cost)
|
||||
{
|
||||
while(stack.back() != *iter)
|
||||
{
|
||||
state_iter1->vertical_align += stack.back()->vertical_align;
|
||||
|
||||
stack.back()->end(out);
|
||||
stack.pop_back();
|
||||
}
|
||||
best_cost = cost;
|
||||
state_iter1->vertical_align = vertical_align;
|
||||
|
||||
if(best_cost == 0)
|
||||
break;
|
||||
@ -160,7 +168,11 @@ void HTMLRenderer::TextLineBuffer::flush(void)
|
||||
// cannot go further
|
||||
if((*iter)->start_idx <= last_text_pos_with_negative_offset)
|
||||
break;
|
||||
|
||||
vertical_align += (*iter)->vertical_align;
|
||||
}
|
||||
//
|
||||
state_iter1->ids[State::VERTICAL_ALIGN_ID] = renderer->vertical_align_manager.install(state_iter1->vertical_align);
|
||||
// export the diff between *state_iter1 and stack.back()
|
||||
state_iter1->begin(out, stack.back());
|
||||
stack.push_back(&*state_iter1);
|
||||
@ -338,9 +350,8 @@ void HTMLRenderer::TextLineBuffer::optimize()
|
||||
}
|
||||
}
|
||||
|
||||
// now we would like to adjust letter space to most_used width
|
||||
// we shall apply the optimization only when it can significantly reduce the number of elements
|
||||
if(max_count <= text_count / 2)
|
||||
// negative letter space may cause problems
|
||||
if(!is_positive(state_iter1->letter_space + most_used_width))
|
||||
{
|
||||
// the old value is the best
|
||||
// just copy old offsets
|
||||
@ -348,6 +359,8 @@ void HTMLRenderer::TextLineBuffer::optimize()
|
||||
}
|
||||
else
|
||||
{
|
||||
// now we would like to adjust letter space to most_used width
|
||||
|
||||
// install new letter space
|
||||
const double old_ls = state_iter1->letter_space;
|
||||
state_iter1->ids[State::LETTER_SPACE_ID] = ls_manager.install(old_ls + most_used_width, &(state_iter1->letter_space));
|
||||
@ -439,7 +452,7 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr
|
||||
{
|
||||
long long cur_mask = 0xff;
|
||||
bool first = true;
|
||||
for(int i = 0; i < ID_COUNT; ++i, cur_mask<<=8)
|
||||
for(int i = 0; i < HASH_ID_COUNT; ++i, cur_mask<<=8)
|
||||
{
|
||||
if(hash_umask & cur_mask) // we don't care about this ID
|
||||
{
|
||||
@ -463,10 +476,8 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr
|
||||
case WORD_SPACE_ID:
|
||||
word_space = prev_state->word_space;
|
||||
break;
|
||||
case RISE_ID:
|
||||
rise = prev_state->rise;
|
||||
break;
|
||||
default:
|
||||
cerr << "unexpected state mask" << endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -495,6 +506,28 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr
|
||||
else
|
||||
out << ids[i];
|
||||
}
|
||||
// veritcal align
|
||||
if(!equal(vertical_align, 0))
|
||||
{
|
||||
// so we have to dump it
|
||||
if(first)
|
||||
{
|
||||
out << "<span class=\"";
|
||||
first = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
out << ' ';
|
||||
}
|
||||
|
||||
// out should have hex set
|
||||
out << CSS::VERTICAL_ALIGN_CN;
|
||||
auto id = ids[VERTICAL_ALIGN_ID];
|
||||
if (id == -1)
|
||||
out << CSS::INVALID_ID;
|
||||
else
|
||||
out << id;
|
||||
}
|
||||
|
||||
if(first) // we actually just inherit the whole prev_state
|
||||
{
|
||||
@ -511,8 +544,9 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr
|
||||
// prev_state == nullptr
|
||||
// which means this is the first state of the line
|
||||
// there should be a open pending <div> left there
|
||||
// it is not necessary to output vertical align
|
||||
long long cur_mask = 0xff;
|
||||
for(int i = 0; i < ID_COUNT; ++i, cur_mask<<=8)
|
||||
for(int i = 0; i < HASH_ID_COUNT; ++i, cur_mask<<=8)
|
||||
{
|
||||
if(hash_umask & cur_mask) // we don't care about this ID
|
||||
continue;
|
||||
@ -591,7 +625,7 @@ const char * const HTMLRenderer::TextLineBuffer::State::css_class_names [] = {
|
||||
CSS::STROKE_COLOR_CN,
|
||||
CSS::LETTER_SPACE_CN,
|
||||
CSS::WORD_SPACE_CN,
|
||||
CSS::RISE_CN
|
||||
CSS::VERTICAL_ALIGN_CN,
|
||||
};
|
||||
|
||||
} //namespace pdf2htmlEX
|
||||
|
@ -41,8 +41,9 @@ public:
|
||||
STROKE_COLOR_ID,
|
||||
LETTER_SPACE_ID,
|
||||
WORD_SPACE_ID,
|
||||
RISE_ID,
|
||||
HASH_ID_COUNT,
|
||||
|
||||
VERTICAL_ALIGN_ID = HASH_ID_COUNT,
|
||||
ID_COUNT
|
||||
};
|
||||
|
||||
|
@ -61,9 +61,9 @@ HTMLRenderer::HTMLRenderer(const Param * param)
|
||||
* or may be handled well (whitespace_manager)
|
||||
* So we can set a large eps here
|
||||
*/
|
||||
rise_manager .set_eps(param->v_eps);
|
||||
whitespace_manager .set_eps(param->h_eps);
|
||||
left_manager .set_eps(param->h_eps);
|
||||
vertical_align_manager.set_eps(param->v_eps);
|
||||
whitespace_manager .set_eps(param->h_eps);
|
||||
left_manager .set_eps(param->h_eps);
|
||||
/*
|
||||
* For othere states, we need accurate values
|
||||
* optimization will be done separately
|
||||
@ -446,6 +446,7 @@ void HTMLRenderer::set_stream_flags(std::ostream & out)
|
||||
void HTMLRenderer::dump_css (void)
|
||||
{
|
||||
transform_matrix_manager.dump_css(f_css.fs);
|
||||
vertical_align_manager .dump_css(f_css.fs);
|
||||
letter_space_manager .dump_css(f_css.fs);
|
||||
stroke_color_manager .dump_css(f_css.fs);
|
||||
word_space_manager .dump_css(f_css.fs);
|
||||
@ -455,7 +456,6 @@ void HTMLRenderer::dump_css (void)
|
||||
bottom_manager .dump_css(f_css.fs);
|
||||
height_manager .dump_css(f_css.fs);
|
||||
width_manager .dump_css(f_css.fs);
|
||||
rise_manager .dump_css(f_css.fs);
|
||||
left_manager .dump_css(f_css.fs);
|
||||
bgimage_size_manager .dump_css(f_css.fs);
|
||||
|
||||
@ -463,6 +463,7 @@ void HTMLRenderer::dump_css (void)
|
||||
double ps = print_scale();
|
||||
f_css.fs << CSS::PRINT_ONLY << "{" << endl;
|
||||
transform_matrix_manager.dump_print_css(f_css.fs, ps);
|
||||
vertical_align_manager .dump_print_css(f_css.fs, ps);
|
||||
letter_space_manager .dump_print_css(f_css.fs, ps);
|
||||
stroke_color_manager .dump_print_css(f_css.fs, ps);
|
||||
word_space_manager .dump_print_css(f_css.fs, ps);
|
||||
@ -472,7 +473,6 @@ void HTMLRenderer::dump_css (void)
|
||||
bottom_manager .dump_print_css(f_css.fs, ps);
|
||||
height_manager .dump_print_css(f_css.fs, ps);
|
||||
width_manager .dump_print_css(f_css.fs, ps);
|
||||
rise_manager .dump_print_css(f_css.fs, ps);
|
||||
left_manager .dump_print_css(f_css.fs, ps);
|
||||
bgimage_size_manager .dump_print_css(f_css.fs, ps);
|
||||
f_css.fs << "}" << endl;
|
||||
|
@ -101,7 +101,7 @@ void HTMLRenderer::reset_state()
|
||||
cur_html_state.stroke_color.transparent = true;
|
||||
cur_html_state.letter_space = 0;
|
||||
cur_html_state.word_space = 0;
|
||||
cur_html_state.rise = 0;
|
||||
cur_html_state.vertical_align = 0;
|
||||
cur_html_state.x = 0;
|
||||
cur_html_state.y = 0;
|
||||
memcpy(cur_html_state.transform_matrix, ID_MATRIX, sizeof(cur_html_state.transform_matrix));
|
||||
@ -148,6 +148,9 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
||||
need_recheck_position = true;
|
||||
}
|
||||
|
||||
// save current info for later use
|
||||
auto old_font_info = cur_html_state.font_info;
|
||||
double old_font_size = cur_html_state.font_size;
|
||||
// font name & size
|
||||
if(all_changed || font_changed)
|
||||
{
|
||||
@ -306,11 +309,28 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
||||
inverted[3] = old_tm[0] / det;
|
||||
dx = inverted[0] * lhs1 + inverted[2] * lhs2;
|
||||
dy = inverted[1] * lhs1 + inverted[3] * lhs2;
|
||||
// currently we merge only text on a same horizontal line
|
||||
if(equal(dy, 0))
|
||||
{
|
||||
// text on a same horizontal line, we can insert positive or negaive x-offsets
|
||||
merged = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// otherwise we merge the lines only when
|
||||
// - text are not shifted to the left too much
|
||||
// - text are not moved too high or too low
|
||||
if((dx * draw_text_scale) >= -(old_font_info->ascent - old_font_info->descent) * old_font_size - EPS)
|
||||
{
|
||||
double oldymin = old_font_info->descent * old_font_size;
|
||||
double oldymax = old_font_info->ascent * old_font_size;
|
||||
double ymin = dy * draw_text_scale + cur_html_state.font_info->descent * cur_html_state.font_size;
|
||||
double ymax = dy * draw_text_scale + cur_html_state.font_info->ascent * cur_html_state.font_size;
|
||||
if((ymin <= oldymax + EPS) && (ymax >= oldymin - EPS))
|
||||
{
|
||||
merged = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
//else no solution
|
||||
}
|
||||
@ -319,6 +339,15 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
||||
if(merged)
|
||||
{
|
||||
text_line_buf->append_offset(dx * draw_text_scale);
|
||||
if(equal(dy, 0))
|
||||
{
|
||||
cur_html_state.vertical_align = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cur_html_state.vertical_align = (dy * draw_text_scale);
|
||||
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
|
||||
}
|
||||
draw_tx = cur_tx;
|
||||
draw_ty = cur_ty;
|
||||
}
|
||||
@ -419,6 +448,7 @@ void HTMLRenderer::prepare_text_line(GfxState * state)
|
||||
|
||||
// update position such that they will be recorded by text_line_buf
|
||||
state->transform(state->getCurX(), state->getCurY(), &cur_html_state.x, &cur_html_state.y);
|
||||
cur_html_state.vertical_align = 0;
|
||||
|
||||
//resync position
|
||||
draw_ty = cur_ty;
|
||||
|
@ -6,29 +6,29 @@
|
||||
|
||||
set(CSS_INVALID_ID "_")
|
||||
|
||||
set(CSS_LINE_CN "t") # text
|
||||
set(CSS_TRANSFORM_MATRIX_CN "m") # matrix
|
||||
set(CSS_LINE_CN "t") # Text
|
||||
set(CSS_TRANSFORM_MATRIX_CN "m") # Matrix
|
||||
|
||||
set(CSS_PAGE_DECORATION_CN "pd") # page decoration
|
||||
set(CSS_PAGE_FRAME_CN "pf") # page frame
|
||||
set(CSS_PAGE_CONTENT_BOX_CN "pc") # page content
|
||||
set(CSS_PAGE_DATA_CN "pi") # page info
|
||||
set(CSS_PAGE_DECORATION_CN "pd") # Page Decoration
|
||||
set(CSS_PAGE_FRAME_CN "pf") # Page Frame
|
||||
set(CSS_PAGE_CONTENT_BOX_CN "pc") # Page Content
|
||||
set(CSS_PAGE_DATA_CN "pi") # Page Info
|
||||
|
||||
set(CSS_BACKGROUND_IMAGE_CN "bi") # background image
|
||||
set(CSS_BACKGROUND_IMAGE_CN "bi") # Background Image
|
||||
|
||||
set(CSS_FONT_FAMILY_CN "ff") # font family
|
||||
set(CSS_FONT_SIZE_CN "fs") # font size
|
||||
set(CSS_FONT_FAMILY_CN "ff") # Font Family
|
||||
set(CSS_FONT_SIZE_CN "fs") # Font Size
|
||||
|
||||
set(CSS_FILL_COLOR_CN "fc") # fill color
|
||||
set(CSS_STROKE_COLOR_CN "sc") # stroke color
|
||||
set(CSS_FILL_COLOR_CN "fc") # Fill Color
|
||||
set(CSS_STROKE_COLOR_CN "sc") # Stroke Color
|
||||
|
||||
set(CSS_LETTER_SPACE_CN "ls") # letter space
|
||||
set(CSS_WORD_SPACE_CN "ws") # word space
|
||||
set(CSS_RISE_CN "r") # rise
|
||||
set(CSS_LETTER_SPACE_CN "ls") # Letter Space
|
||||
set(CSS_WORD_SPACE_CN "ws") # Word Space
|
||||
set(CSS_VERTICAL_ALIGN_CN "v") # Vertial align
|
||||
set(CSS_WHITESPACE_CN "_") # whitespace
|
||||
set(CSS_LEFT_CN "x") # x
|
||||
set(CSS_HEIGHT_CN "h") # height
|
||||
set(CSS_WIDTH_CN "w") # width
|
||||
set(CSS_BOTTTOM_CN "y") # y
|
||||
set(CSS_CSS_DRAW_CN "d") # draw
|
||||
set(CSS_LINK_CN "l") # link
|
||||
set(CSS_LEFT_CN "x") # X
|
||||
set(CSS_HEIGHT_CN "h") # Height
|
||||
set(CSS_WIDTH_CN "w") # Width
|
||||
set(CSS_BOTTTOM_CN "y") # Y
|
||||
set(CSS_CSS_DRAW_CN "d") # Draw
|
||||
set(CSS_LINK_CN "l") # Link
|
||||
|
@ -231,10 +231,10 @@ public:
|
||||
void dump_print_value(std::ostream & out, double value, double scale) { out << "word-spacing:" << round(value*scale) << "pt;"; }
|
||||
};
|
||||
|
||||
class RiseManager : public StateManager<double, RiseManager>
|
||||
class VerticalAlignManager : public StateManager<double, VerticalAlignManager>
|
||||
{
|
||||
public:
|
||||
static const char * get_css_class_name (void) { return CSS::RISE_CN; }
|
||||
static const char * get_css_class_name (void) { return CSS::VERTICAL_ALIGN_CN; }
|
||||
double default_value(void) { return 0; }
|
||||
void dump_value(std::ostream & out, double value) { out << "vertical-align:" << round(value) << "px;"; }
|
||||
void dump_print_value(std::ostream & out, double value, double scale) { out << "vertical-align:" << round(value*scale) << "pt;"; }
|
||||
|
@ -43,7 +43,7 @@ const char * const FILL_COLOR_CN = "@CSS_FILL_COLOR_CN@";
|
||||
const char * const STROKE_COLOR_CN = "@CSS_STROKE_COLOR_CN@";
|
||||
const char * const LETTER_SPACE_CN = "@CSS_LETTER_SPACE_CN@";
|
||||
const char * const WORD_SPACE_CN = "@CSS_WORD_SPACE_CN@";
|
||||
const char * const RISE_CN = "@CSS_RISE_CN@";
|
||||
const char * const VERTICAL_ALIGN_CN = "@CSS_VERTICAL_ALIGN_CN@";
|
||||
const char * const WHITESPACE_CN = "@CSS_WHITESPACE_CN@";
|
||||
const char * const LEFT_CN = "@CSS_LEFT_CN@";
|
||||
const char * const HEIGHT_CN = "@CSS_HEIGHT_CN@";
|
||||
|
Loading…
Reference in New Issue
Block a user