1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-07-05 01:28:39 +00:00

check clip changes

This commit is contained in:
Lu Wang 2013-05-04 21:17:35 +08:00
parent c30bc8a353
commit a82996eeb7
6 changed files with 95 additions and 72 deletions

View File

@ -289,6 +289,7 @@ protected:
AllStateManager all_manager; AllStateManager all_manager;
HTMLTextState cur_text_state; HTMLTextState cur_text_state;
HTMLLineState cur_line_state; HTMLLineState cur_line_state;
HTMLClipState cur_clip_state;
HTMLTextPage html_text_page; HTMLTextPage html_text_page;
@ -296,7 +297,8 @@ protected:
{ {
NLS_NONE, NLS_NONE,
NLS_NEWSTATE, NLS_NEWSTATE,
NLS_NEWLINE NLS_NEWLINE,
NLS_NEWCLIP
} new_line_state; } new_line_state;
// for font reencoding // for font reencoding

View File

@ -119,6 +119,11 @@ void HTMLRenderer::reset_state()
cur_line_state.y = 0; cur_line_state.y = 0;
memcpy(cur_line_state.transform_matrix, ID_MATRIX, sizeof(cur_line_state.transform_matrix)); memcpy(cur_line_state.transform_matrix, ID_MATRIX, sizeof(cur_line_state.transform_matrix));
cur_clip_state.xmin = 0;
cur_clip_state.xmax = 0;
cur_clip_state.ymin = 0;
cur_clip_state.ymax = 0;
cur_tx = cur_ty = 0; cur_tx = cur_ty = 0;
draw_tx = draw_ty = 0; draw_tx = draw_ty = 0;
@ -145,6 +150,14 @@ void HTMLRenderer::reset_state_change()
clip_changed = false; clip_changed = false;
} }
template<class NewLineState>
void set_line_state(NewLineState & cur_ls, NewLineState new_ls)
{
if(new_ls > cur_ls)
cur_ls = new_ls;
}
void HTMLRenderer::check_state_change(GfxState * state) void HTMLRenderer::check_state_change(GfxState * state)
{ {
// DEPENDENCY WARNING // DEPENDENCY WARNING
@ -154,11 +167,16 @@ void HTMLRenderer::check_state_change(GfxState * state)
if(all_changed || clip_changed) if(all_changed || clip_changed)
{ {
//TODO: compare with current clip box HTMLClipState new_clip_state;
double x1, x2, y1, y2; state->getClipBBox(&new_clip_state.xmin, &new_clip_state.ymin, &new_clip_state.xmax, &new_clip_state.ymax);
state->getClipBBox(&x1, &y1, &x2, &y2); if(!(equal(cur_clip_state.xmin, new_clip_state.xmin)
html_text_page.clip(x1, y1, x2, y2); && equal(cur_clip_state.xmax, new_clip_state.xmax)
new_line_state = NLS_NEWLINE; && equal(cur_clip_state.ymin, new_clip_state.ymin)
&& equal(cur_clip_state.ymax, new_clip_state.ymax)))
{
cur_clip_state = new_clip_state;
set_line_state(new_line_state, NLS_NEWCLIP);
}
} }
bool need_recheck_position = false; bool need_recheck_position = false;
@ -191,11 +209,11 @@ void HTMLRenderer::check_state_change(GfxState * state)
// TODO: consider the font matrix and estimate the metrics // TODO: consider the font matrix and estimate the metrics
if(new_font_info->is_type3 || cur_text_state.font_info->is_type3) if(new_font_info->is_type3 || cur_text_state.font_info->is_type3)
{ {
new_line_state = max<NewLineState>(new_line_state, NLS_NEWLINE); set_line_state(new_line_state, NLS_NEWLINE);
} }
else else
{ {
new_line_state = max<NewLineState>(new_line_state, NLS_NEWSTATE); set_line_state(new_line_state, NLS_NEWSTATE);
} }
cur_text_state.font_info = new_font_info; cur_text_state.font_info = new_font_info;
} }
@ -279,13 +297,13 @@ void HTMLRenderer::check_state_change(GfxState * state)
if(!equal(new_draw_font_size, cur_text_state.font_size)) if(!equal(new_draw_font_size, cur_text_state.font_size))
{ {
new_line_state = max<NewLineState>(new_line_state, NLS_NEWSTATE); set_line_state(new_line_state, NLS_NEWSTATE);
cur_text_state.font_size = new_draw_font_size; cur_text_state.font_size = new_draw_font_size;
} }
if(!tm_equal(new_draw_text_tm, cur_line_state.transform_matrix, 4)) if(!tm_equal(new_draw_text_tm, cur_line_state.transform_matrix, 4))
{ {
new_line_state = max<NewLineState>(new_line_state, NLS_NEWLINE); set_line_state(new_line_state, NLS_NEWLINE);
memcpy(cur_line_state.transform_matrix, new_draw_text_tm, sizeof(cur_line_state.transform_matrix)); memcpy(cur_line_state.transform_matrix, new_draw_text_tm, sizeof(cur_line_state.transform_matrix));
} }
} }
@ -368,14 +386,14 @@ void HTMLRenderer::check_state_change(GfxState * state)
else else
{ {
cur_text_state.vertical_align = (dy * old_draw_text_scale); cur_text_state.vertical_align = (dy * old_draw_text_scale);
new_line_state = max<NewLineState>(new_line_state, NLS_NEWSTATE); set_line_state(new_line_state, NLS_NEWSTATE);
} }
draw_tx = cur_tx; draw_tx = cur_tx;
draw_ty = cur_ty; draw_ty = cur_ty;
} }
else else
{ {
new_line_state = max<NewLineState>(new_line_state, NLS_NEWLINE); set_line_state(new_line_state, NLS_NEWLINE);
} }
} }
else else
@ -392,7 +410,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
if(!equal(new_letter_space, cur_text_state.letter_space)) if(!equal(new_letter_space, cur_text_state.letter_space))
{ {
cur_text_state.letter_space = new_letter_space; cur_text_state.letter_space = new_letter_space;
new_line_state = max<NewLineState>(new_line_state, NLS_NEWSTATE); set_line_state(new_line_state, NLS_NEWSTATE);
} }
} }
@ -404,7 +422,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
if(!equal(new_word_space, cur_text_state.word_space)) if(!equal(new_word_space, cur_text_state.word_space))
{ {
cur_text_state.word_space = new_word_space; cur_text_state.word_space = new_word_space;
new_line_state = max<NewLineState>(new_line_state, NLS_NEWSTATE); set_line_state(new_line_state, NLS_NEWSTATE);
} }
} }
@ -429,7 +447,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
if(!(new_fill_color == cur_text_state.fill_color)) if(!(new_fill_color == cur_text_state.fill_color))
{ {
cur_text_state.fill_color = new_fill_color; cur_text_state.fill_color = new_fill_color;
new_line_state = max<NewLineState>(new_line_state, NLS_NEWSTATE); set_line_state(new_line_state, NLS_NEWSTATE);
} }
} }
@ -455,7 +473,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
if(!(new_stroke_color == cur_text_state.stroke_color)) if(!(new_stroke_color == cur_text_state.stroke_color))
{ {
cur_text_state.stroke_color = new_stroke_color; cur_text_state.stroke_color = new_stroke_color;
new_line_state = max<NewLineState>(new_line_state, NLS_NEWSTATE); set_line_state(new_line_state, NLS_NEWSTATE);
} }
} }
@ -465,9 +483,14 @@ void HTMLRenderer::check_state_change(GfxState * state)
void HTMLRenderer::prepare_text_line(GfxState * state) void HTMLRenderer::prepare_text_line(GfxState * state)
{ {
if(!(html_text_page.get_cur_line())) if(!(html_text_page.get_cur_line()))
new_line_state = NLS_NEWLINE; new_line_state = NLS_NEWCLIP;
if(new_line_state == NLS_NEWLINE) if(new_line_state >= NLS_NEWCLIP)
{
html_text_page.clip(cur_clip_state);
}
if(new_line_state >= NLS_NEWLINE)
{ {
// update position such that they will be recorded by text_line_buf // update position such that they will be recorded by text_line_buf
state->transform(state->getCurX(), state->getCurY(), &cur_line_state.x, &cur_line_state.y); state->transform(state->getCurX(), state->getCurY(), &cur_line_state.x, &cur_line_state.y);

View File

@ -233,10 +233,10 @@ void HTMLTextLine::clear(void)
text.clear(); text.clear();
} }
void HTMLTextLine::clip(double x1, double y1, double x2, double y2) void HTMLTextLine::clip(const HTMLClipState & clip_state)
{ {
clip_x1 = x1; clip_x1 = clip_state.xmin;
clip_y1 = y1; clip_y1 = clip_state.ymin;
} }
void HTMLTextLine::prepare(void) void HTMLTextLine::prepare(void)

View File

@ -81,7 +81,7 @@ public:
bool text_empty(void) const { return text.empty(); } bool text_empty(void) const { return text.empty(); }
void clear(void); void clear(void);
void clip(double x1, double y1, double x2, double y2); void clip(const HTMLClipState &);
/* /*
* Optimize and calculate necessary values * Optimize and calculate necessary values

View File

@ -30,39 +30,49 @@ void HTMLTextPage::dump_text(ostream & out)
optimize(); optimize();
//push a dummy entry for convenience //push a dummy entry for convenience
clip_boxes.emplace_back(0, 0, page_width, page_height, text_lines.size()); clips.emplace_back(HTMLClipState{0, 0, page_width, page_height}, text_lines.size());
ClipBox cur_cb(0, 0, page_width, page_height, 0); Clip cur_clip(HTMLClipState{0, 0, page_width, page_height}, 0);
bool has_clip = false; bool has_clip = false;
auto text_line_iter = text_lines.begin(); auto text_line_iter = text_lines.begin();
for(auto clip_iter = clip_boxes.begin(); clip_iter != clip_boxes.end(); ++clip_iter) for(auto clip_iter = clips.begin(); clip_iter != clips.end(); ++clip_iter)
{ {
if(has_clip)
{
out << "<div class=\"" << CSS::CLIP_CN
<< " " << CSS::LEFT_CN << all_manager.left.install(cur_cb.x1)
<< " " << CSS::BOTTOM_CN << all_manager.bottom.install(cur_cb.y1)
<< " " << CSS::WIDTH_CN << all_manager.width.install(cur_cb.x2 - cur_cb.x1)
<< " " << CSS::HEIGHT_CN << all_manager.height.install(cur_cb.y2 - cur_cb.y1)
<< "\">";
}
auto next_text_line_iter = text_lines.begin() + clip_iter->start_idx; auto next_text_line_iter = text_lines.begin() + clip_iter->start_idx;
while(text_line_iter != next_text_line_iter) if(text_line_iter != next_text_line_iter)
{ {
(*text_line_iter)->clip(cur_cb.x1, cur_cb.y1, cur_cb.x2, cur_cb.y2); const auto & cs = cur_clip.clip_state;
(*text_line_iter)->dump_text(out); if(has_clip)
++text_line_iter; {
} out << "<div class=\"" << CSS::CLIP_CN
if(has_clip) << " " << CSS::LEFT_CN << all_manager.left.install(cs.xmin)
{ << " " << CSS::BOTTOM_CN << all_manager.bottom.install(cs.ymin)
out << "</div>"; << " " << CSS::WIDTH_CN << all_manager.width.install(cs.xmax - cs.xmin)
<< " " << CSS::HEIGHT_CN << all_manager.height.install(cs.ymax - cs.ymin)
<< "\">";
}
while(text_line_iter != next_text_line_iter)
{
if(has_clip)
{
(*text_line_iter)->clip(cs);
}
(*text_line_iter)->dump_text(out);
++text_line_iter;
}
if(has_clip)
{
out << "</div>";
}
} }
cur_cb = *clip_iter; {
has_clip = !(equal(0, cur_cb.x1) && equal(0, cur_cb.y1) cur_clip = *clip_iter;
&& equal(page_width, cur_cb.x2) && equal(page_height, cur_cb.y2)); const auto & cs = cur_clip.clip_state;
has_clip = !(equal(0, cs.xmin) && equal(0, cs.ymin)
&& equal(page_width, cs.xmax) && equal(page_height, cs.ymax));
}
} }
} }
@ -74,7 +84,7 @@ void HTMLTextPage::dump_css(ostream & out)
void HTMLTextPage::clear(void) void HTMLTextPage::clear(void)
{ {
text_lines.clear(); text_lines.clear();
clip_boxes.clear(); clips.clear();
cur_line = nullptr; cur_line = nullptr;
} }
@ -94,32 +104,21 @@ void HTMLTextPage::set_page_size(double width, double height)
page_height = height; page_height = height;
} }
void HTMLTextPage::clip(double x1, double y1, double x2, double y2) void HTMLTextPage::clip(const HTMLClipState & clip_state)
{ {
if(!clip_boxes.empty()) if(!clips.empty())
{ {
auto & cb = clip_boxes.back(); auto & clip = clips.back();
if(cb.start_idx == text_lines.size()) if(clip.start_idx == text_lines.size())
{ {
/* /*
* Previous ClipBox is not used * Previous ClipBox is not used
*/ */
cb.x1 = x1; clip.clip_state = clip_state;
cb.y1 = y1;
cb.x2 = x2;
cb.y2 = y2;
return;
}
if(equal(cb.x1, x1) && equal(cb.y1, y1)
&& equal(cb.x2, x2) && equal(cb.y2, y2))
{
/*
* same as previous ClipBox
*/
return; return;
} }
} }
clip_boxes.emplace_back(x1, y1, x2, y2, text_lines.size()); clips.emplace_back(clip_state, text_lines.size());
} }
void HTMLTextPage::optimize(void) void HTMLTextPage::optimize(void)

View File

@ -22,7 +22,6 @@ namespace pdf2htmlEX {
* *
* contains a series of HTMLTextLine * contains a series of HTMLTextLine
*/ */
class HTMLTextPage class HTMLTextPage
{ {
public: public:
@ -38,7 +37,7 @@ public:
/* for clipping */ /* for clipping */
void set_page_size(double width, double height); void set_page_size(double width, double height);
void clip(double x1, double y1, double x2, double y2); void clip(const HTMLClipState & clip_state);
private: private:
void optimize(void); void optimize(void);
@ -50,14 +49,14 @@ private:
std::vector<std::unique_ptr<HTMLTextLine>> text_lines; std::vector<std::unique_ptr<HTMLTextLine>> text_lines;
struct ClipBox { struct Clip {
ClipBox(double x1, double y1, double x2, double y2, size_t start_idx) HTMLClipState clip_state;
:x1(x1),y1(y1),x2(x2),y2(y2),start_idx(start_idx)
{ }
double x1, y1, x2, y2;
size_t start_idx; size_t start_idx;
Clip(const HTMLClipState & clip_state, size_t start_idx)
:clip_state(clip_state),start_idx(start_idx)
{ }
}; };
std::vector<ClipBox> clip_boxes; std::vector<Clip> clips;
}; };
} //namespace pdf2htmlEX } //namespace pdf2htmlEX