1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 13:00:08 +00:00

refactor state managers

This commit is contained in:
Lu Wang 2013-04-04 21:19:28 +08:00
parent 09ae5415a3
commit 7275ebde8b
9 changed files with 160 additions and 221 deletions

View File

@ -171,6 +171,8 @@ add_executable(pdf2htmlEX
src/util/ArgParser.cc
src/util/base64stream.h
src/util/base64stream.cc
src/util/color.h
src/util/color.cc
src/util/const.h
src/util/const.cc
src/util/css_const.h

View File

@ -26,13 +26,14 @@
#include "util/StringFormatter.h"
#include "util/TmpFiles.h"
#include "util/misc.h"
#include "util/color.h"
#include "util/StateManager.h"
namespace pdf2htmlEX {
class FontInfo
struct FontInfo
{
public:
long long id;
bool use_tounicode;
int em_size;
@ -41,6 +42,20 @@ public:
bool is_type3;
};
struct HTMLState
{
const FontInfo * font_info;
double font_size;
Color fill_color;
Color stroke_color;
double letter_space;
double word_space;
double rise;
double x,y;
double transform_matrix[4];
};
class HTMLRenderer : public OutputDev
{
public:
@ -316,7 +331,7 @@ protected:
// styles & resources
////////////////////////////////////////////////////
const FontInfo * cur_font_info;
HTMLState cur_html_state;
std::unordered_map<long long, FontInfo> font_info_map;
// managers store values actually used in HTML (i.e. scaled)

View File

@ -32,7 +32,6 @@ using std::abs;
void HTMLRenderer::TextLineBuffer::set_pos(GfxState * state)
{
state->transform(state->getCurX(), state->getCurY(), &x, &y);
tm_id = renderer->transform_matrix_manager.get_id();
}
void HTMLRenderer::TextLineBuffer::append_unicodes(const Unicode * u, int l)
@ -53,8 +52,15 @@ void HTMLRenderer::TextLineBuffer::append_offset(double width)
offsets.emplace_back(text.size(), width);
}
void HTMLRenderer::TextLineBuffer::append_state(void)
void HTMLRenderer::TextLineBuffer::append_state(const HTMLState & html_state)
{
if(states.empty())
{
// if this is the first state, update position and matrix
x = html_state.x;
y = html_state.y;
tm_id = renderer->transform_matrix_manager.install(html_state.transform_matrix);
}
if(states.empty() || (states.back().start_idx != text.size()))
{
states.emplace_back();
@ -62,7 +68,7 @@ void HTMLRenderer::TextLineBuffer::append_state(void)
states.back().hash_umask = 0;
}
set_state(states.back());
states.back().html_state = html_state;
}
void HTMLRenderer::TextLineBuffer::flush(void)
@ -100,9 +106,19 @@ void HTMLRenderer::TextLineBuffer::flush(void)
double max_ascent = 0;
for(auto iter = states.begin(); iter != states.end(); ++iter)
{
double cur_ascent = iter->rise + iter->font_info->ascent * iter->draw_font_size;
const auto & hs = iter->html_state;
double cur_ascent = hs.rise + hs.font_info->ascent * hs.font_size;
if(cur_ascent > max_ascent)
max_ascent = cur_ascent;
// set id
iter->ids[State::FONT_ID] = hs.font_info->id;
iter->ids[State::FONT_SIZE_ID] = renderer->font_size_manager.install(hs.font_size);
iter->ids[State::FILL_COLOR_ID] = renderer->fill_color_manager.install(hs.fill_color);
iter->ids[State::STROKE_COLOR_ID] = renderer->stroke_color_manager.install(hs.stroke_color);
iter->ids[State::LETTER_SPACE_ID] = renderer->letter_space_manager.install(hs.letter_space);
iter->ids[State::WORD_SPACE_ID] = renderer->word_space_manager.install(hs.word_space);
iter->ids[State::RISE_ID] = renderer->rise_manager.install(hs.rise);
iter->hash();
}
@ -250,25 +266,6 @@ void HTMLRenderer::TextLineBuffer::flush(void)
text.clear();
}
void HTMLRenderer::TextLineBuffer::set_state (State & state)
{
// TODO: as letter_space and word_space may be modified (optimization)
// we should not install them so early
state.ids[State::FONT_ID] = renderer->cur_font_info->id;
state.ids[State::FONT_SIZE_ID] = renderer->font_size_manager.get_id();
state.ids[State::FILL_COLOR_ID] = renderer->fill_color_manager.get_id();
state.ids[State::STROKE_COLOR_ID] = renderer->stroke_color_manager.get_id();
state.ids[State::LETTER_SPACE_ID] = renderer->letter_space_manager.get_id();
state.ids[State::WORD_SPACE_ID] = renderer->word_space_manager.get_id();
state.ids[State::RISE_ID] = renderer->rise_manager.get_id();
state.font_info = renderer->cur_font_info;
state.draw_font_size = renderer->font_size_manager.get_actual_value();
state.letter_space = renderer->letter_space_manager.get_actual_value();
state.word_space = renderer->word_space_manager.get_actual_value();
state.rise = renderer->rise_manager.get_actual_value();
}
/*
* Adjust letter space and word space in order to reduce the number of HTML elements
* May also unmask word space
@ -368,9 +365,9 @@ void HTMLRenderer::TextLineBuffer::optimize()
else
{
// install new letter space
const double old_ls = state_iter1->letter_space;
state_iter1->ids[State::LETTER_SPACE_ID] = ls_manager.install(old_ls + most_used_width, &(state_iter1->letter_space));
letter_space_diff = old_ls - state_iter1->letter_space;
const double old_ls = state_iter1->html_state.letter_space;
state_iter1->ids[State::LETTER_SPACE_ID] = ls_manager.install(old_ls + most_used_width, &(state_iter1->html_state.letter_space));
letter_space_diff = old_ls - state_iter1->html_state.letter_space;
// update offsets
auto off_iter = offset_iter1;
// re-count number of offsets
@ -432,9 +429,9 @@ void HTMLRenderer::TextLineBuffer::optimize()
}
}
state_iter1->word_space = 0; // clear word_space for single_space_offset
state_iter1->html_state.word_space = 0; // clear word_space for single_space_offset
double new_word_space = most_used_width - state_iter1->single_space_offset();
state_iter1->ids[State::WORD_SPACE_ID] = ws_manager.install(new_word_space, &(state_iter1->word_space)); // install new word_space
state_iter1->ids[State::WORD_SPACE_ID] = ws_manager.install(new_word_space, &(state_iter1->html_state.word_space)); // install new word_space
state_iter1->hash_umask &= (~word_space_umask); // mark that the word_space is not free
}
else // there is no offset at all
@ -474,16 +471,16 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr
switch(i)
{
case FONT_SIZE_ID:
draw_font_size = prev_state->draw_font_size;
html_state.font_size = prev_state->html_state.font_size;
break;
case LETTER_SPACE_ID:
letter_space = prev_state->letter_space;
html_state.letter_space = prev_state->html_state.letter_space;
break;
case WORD_SPACE_ID:
word_space = prev_state->word_space;
html_state.word_space = prev_state->html_state.word_space;
break;
case RISE_ID:
rise = prev_state->rise;
html_state.rise = prev_state->html_state.rise;
break;
default:
break;
@ -589,12 +586,12 @@ int HTMLRenderer::TextLineBuffer::State::diff(const State & s) const
double HTMLRenderer::TextLineBuffer::State::single_space_offset(void) const
{
return word_space + letter_space + font_info->space_width * draw_font_size;
return html_state.word_space + html_state.letter_space + html_state.font_info->space_width * html_state.font_size;
}
double HTMLRenderer::TextLineBuffer::State::em_size(void) const
{
return draw_font_size * (font_info->ascent - font_info->descent);
return html_state.font_size * (html_state.font_info->ascent - html_state.font_info->descent);
}
long long HTMLRenderer::TextLineBuffer::State::umask_by_id(int id)

View File

@ -50,11 +50,7 @@ public:
long long ids[ID_COUNT];
const FontInfo * font_info;
double draw_font_size;
double letter_space;
double word_space;
double rise;
HTMLState html_state;
size_t start_idx; // index of the first Text using this state
// for optimzation
@ -78,13 +74,10 @@ public:
void set_pos(GfxState * state);
void append_unicodes(const Unicode * u, int l);
void append_offset(double width);
void append_state(void);
void append_state(const HTMLState & html_state);
void flush(void);
private:
// retrieve state from renderer
void set_state(State & state);
void optimize(void);
HTMLRenderer * renderer;

View File

@ -372,9 +372,8 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, co
}
}
transform_matrix_manager.update(new_tm);
f_pages.fs << "<div class=\"" << CSS::CSS_DRAW_CN
<< ' ' << CSS::TRANSFORM_MATRIX_CN << transform_matrix_manager.get_id()
<< ' ' << CSS::TRANSFORM_MATRIX_CN << transform_matrix_manager.install(new_tm)
<< "\" style=\"";
if(line_color)

View File

@ -205,9 +205,8 @@ void HTMLRenderer::processLink(AnnotLink * al)
f_pages.fs << ">";
}
transform_matrix_manager.update(default_ctm);
f_pages.fs << "<div class=\"" << CSS::CSS_DRAW_CN << ' ' << CSS::TRANSFORM_MATRIX_CN
<< transform_matrix_manager.get_id()
<< transform_matrix_manager.install(default_ctm)
<< "\" style=\"";
double x,y,w,h;

View File

@ -90,24 +90,23 @@ void HTMLRenderer::reset_state()
{
draw_text_scale = 1.0;
cur_font_info = install_font(nullptr);
cur_html_state.font_info = install_font(nullptr);
cur_font_size = 0.0;
memcpy(cur_text_tm, ID_MATRIX, sizeof(cur_text_tm));
transform_matrix_manager.reset();
letter_space_manager .reset();
stroke_color_manager .reset();
word_space_manager .reset();
whitespace_manager .reset();
fill_color_manager .reset();
font_size_manager .reset();
bottom_manager .reset();
height_manager .reset();
width_manager .reset();
rise_manager .reset();
left_manager .reset();
// reset html_state
cur_html_state.font_info = nullptr;
cur_html_state.font_size = 0;
cur_html_state.fill_color.transparent = true;
cur_html_state.stroke_color.transparent = true;
cur_html_state.letter_space = 0;
cur_html_state.word_space = 0;
cur_html_state.rise = 0;
cur_html_state.x = 0;
cur_html_state.y = 0;
memcpy(cur_html_state.transform_matrix, ID_MATRIX, sizeof(cur_html_state.transform_matrix));
cur_tx = cur_ty = 0;
draw_tx = draw_ty = 0;
@ -156,12 +155,12 @@ void HTMLRenderer::check_state_change(GfxState * state)
{
const FontInfo * new_font_info = install_font(state->getFont());
if(!(new_font_info->id == cur_font_info->id))
if(!(new_font_info->id == cur_html_state.font_info->id))
{
// The width of the type 3 font text, if shown, is likely to be wrong
// So we will create separate (absolute positioned) blocks for them, such that it won't affect other text
// TODO: consider the font matrix and estimate the metrics
if(new_font_info->is_type3 || cur_font_info->is_type3)
if(new_font_info->is_type3 || cur_html_state.font_info->is_type3)
{
new_line_state = max<NewLineState>(new_line_state, NLS_DIV);
}
@ -169,7 +168,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
{
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
}
cur_font_info = new_font_info;
cur_html_state.font_info = new_font_info;
}
double new_font_size = state->getFontSize();
@ -253,13 +252,16 @@ void HTMLRenderer::check_state_change(GfxState * state)
draw_text_scale = new_draw_text_scale;
}
if(font_size_manager.update(new_draw_font_size))
if(!equal(new_draw_font_size, cur_html_state.font_size))
{
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
cur_html_state.font_size = new_draw_font_size;
}
if(transform_matrix_manager.update(new_draw_text_tm))
if(!tm_equal(new_draw_text_tm, cur_html_state.transform_matrix, 4))
{
new_line_state = max<NewLineState>(new_line_state, NLS_DIV);
memcpy(cur_html_state.transform_matrix, new_draw_text_tm, sizeof(cur_html_state.transform_matrix));
}
}
@ -333,18 +335,26 @@ void HTMLRenderer::check_state_change(GfxState * state)
// letter space
// depends: draw_text_scale
if((all_changed || letter_space_changed || draw_text_scale_changed)
&& (letter_space_manager.update(state->getCharSpace() * draw_text_scale)))
if(all_changed || letter_space_changed || draw_text_scale_changed)
{
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
double new_letter_space = state->getCharSpace() * draw_text_scale;
if(!equal(new_letter_space, cur_html_state.letter_space))
{
cur_html_state.letter_space = new_letter_space;
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
}
}
// word space
// depends draw_text_scale
if((all_changed || word_space_changed || draw_text_scale_changed)
&& (word_space_manager.update(state->getWordSpace() * draw_text_scale)))
if(all_changed || word_space_changed || draw_text_scale_changed)
{
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
double new_word_space = state->getWordSpace() * draw_text_scale;
if(!equal(new_word_space, cur_html_state.word_space))
{
cur_html_state.word_space = new_word_space;
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
}
}
// fill color
@ -355,19 +365,21 @@ void HTMLRenderer::check_state_change(GfxState * state)
int idx = state->getRender();
assert((idx >= 0) && (idx < 8));
bool changed = true;
Color new_fill_color;
if(FILL[idx])
{
GfxRGB new_color;
state->getFillRGB(&new_color);
changed = fill_color_manager.update(new_color);
new_fill_color.transparent = false;
state->getFillRGB(&new_fill_color.rgb);
}
else
{
changed = fill_color_manager.update_transparent();
new_fill_color.transparent = true;
}
if(changed)
if(!(new_fill_color == cur_html_state.fill_color))
{
cur_html_state.fill_color = new_fill_color;
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
}
}
// stroke color
@ -378,28 +390,34 @@ void HTMLRenderer::check_state_change(GfxState * state)
int idx = state->getRender();
assert((idx >= 0) && (idx < 8));
bool changed = true;
Color new_stroke_color;
// stroke
if(STROKE[idx])
{
GfxRGB new_color;
state->getStrokeRGB(&new_color);
changed = stroke_color_manager.update(new_color);
new_stroke_color.transparent = false;
state->getStrokeRGB(&new_stroke_color.rgb);
}
else
{
changed = stroke_color_manager.update_transparent();
new_stroke_color.transparent = true;
}
if(changed)
if(!(new_stroke_color == cur_html_state.stroke_color))
{
cur_html_state.stroke_color = new_stroke_color;
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
}
}
// rise
// depends draw_text_scale
if((all_changed || rise_changed || draw_text_scale_changed)
&& (rise_manager.update(state->getRise() * draw_text_scale)))
if(all_changed || rise_changed || draw_text_scale_changed)
{
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
double new_rise = state->getRise() * draw_text_scale;
if(!equal(new_rise, cur_html_state.rise))
{
cur_html_state.rise = new_rise;
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
}
}
reset_state_change();
@ -416,7 +434,8 @@ void HTMLRenderer::prepare_text_line(GfxState * state)
{
close_text_line();
text_line_buf->set_pos(state);
// update position such that they will be recorded by text_line_buf
state->transform(state->getCurX(), state->getCurY(), &cur_html_state.x, &cur_html_state.y);
//resync position
draw_ty = cur_ty;
@ -436,7 +455,7 @@ void HTMLRenderer::prepare_text_line(GfxState * state)
if(new_line_state != NLS_NONE)
{
text_line_buf->append_state();
text_line_buf->append_state(cur_html_state);
}
line_opened = true;

View File

@ -100,7 +100,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
}
else
{
if(cur_font_info->use_tounicode)
if(cur_html_state.font_info->use_tounicode)
{
Unicode uu = check_unicode(u, uLen, code, font);
text_line_buf->append_unicodes(&uu, 1);

View File

@ -15,6 +15,7 @@
#include "util/math.h"
#include "util/css_const.h"
#include "util/color.h"
namespace pdf2htmlEX {
@ -27,9 +28,7 @@ public:
StateManager()
: eps(0)
, imp(static_cast<Imp*>(this))
{
reset();
}
{ }
// values no farther than eps are treated as equal
void set_eps (double eps) {
@ -40,26 +39,8 @@ public:
return eps;
}
// usually called at the beginning of a page
void reset(void) {
cur_value = imp->default_value();
cur_id = install(cur_value, &cur_actual_value);
}
/*
* update the current state, which will be installed automatically
* return if the state has been indeed changed
*/
bool update(double new_value) {
if(equal(new_value, cur_value))
return false;
cur_value = new_value;
cur_id = install(cur_value, &cur_actual_value);
return true;
}
// install new_value into the map, but do not update the state
// return the corresponding id, and set
// install new_value into the map
// return the corresponding id
long long install(double new_value, double * actual_value_ptr = nullptr) {
auto iter = value_map.lower_bound(new_value - eps);
if((iter != value_map.end()) && (abs(iter->first - new_value) <= eps))
@ -76,11 +57,6 @@ public:
return id;
}
// get current state
long long get_id (void) const { return cur_id; }
double get_value (void) const { return cur_value; }
double get_actual_value (void) const { return cur_actual_value; }
void dump_css(std::ostream & out) {
for(auto iter = value_map.begin(); iter != value_map.end(); ++iter)
{
@ -102,17 +78,12 @@ public:
protected:
double eps;
Imp * imp;
long long cur_id;
double cur_value; // the value we are tracking
double cur_actual_value; // the value we actually exported to HTML
std::map<double, long long> value_map;
};
// Be careful about the mixed usage of Matrix and const double *
// the input is usually double *, which might be changed, so we have to copy the content out
// in the map we use Matrix instead of double * such that the array may be automatically release when deconstructign
// since the address of cur_value.m cannot be changed, we can export double * instead of Matrix
template <class Imp>
class StateManager<Matrix, Imp>
{
@ -121,26 +92,21 @@ public:
: imp(static_cast<Imp*>(this))
{ }
void reset(void) {
memcpy(cur_value.m, imp->default_value(), sizeof(cur_value.m));
cur_id = install(cur_value);
// return id
long long install(const double * new_value) {
Matrix m;
memcpy(m.m, new_value, sizeof(m.m));
auto iter = value_map.lower_bound(m);
if((iter != value_map.end()) && (tm_equal(m.m, iter->first.m, 4)))
{
return iter->second;
}
long long id = value_map.size();
value_map.insert(iter, std::make_pair(m, id));
return id;
}
// return if changed
bool update(const double * new_value) {
// For a transform matrix m
// m[4] & m[5] have been taken care of
if(tm_equal(new_value, cur_value.m, 4))
return false;
memcpy(cur_value.m, new_value, sizeof(cur_value.m));
cur_id = install(cur_value);
return true;
}
long long get_id (void) const { return cur_id; }
const double * get_value (void) const { return cur_value.m; }
void dump_css(std::ostream & out) {
for(auto iter = value_map.begin(); iter != value_map.end(); ++iter)
{
@ -153,24 +119,8 @@ public:
void dump_print_css(std::ostream & out, double scale) {}
protected:
// return id
long long install(const Matrix & new_value) {
auto iter = value_map.lower_bound(new_value);
if((iter != value_map.end()) && (tm_equal(new_value.m, iter->first.m, 4)))
{
return iter->second;
}
long long id = value_map.size();
value_map.insert(iter, std::make_pair(new_value, id));
return id;
}
Imp * imp;
long long cur_id;
Matrix cur_value;
class Matrix_less
{
public:
@ -192,40 +142,25 @@ protected:
};
template <class Imp>
class StateManager<GfxRGB, Imp>
class StateManager<Color, Imp>
{
public:
StateManager()
: imp(static_cast<Imp*>(this))
{ }
void reset(void) {
cur_is_transparent = true;
cur_id = -1;
long long install(const Color & new_value) {
auto iter = value_map.find(new_value);
if(iter != value_map.end())
{
return iter->second;
}
long long id = value_map.size();
value_map.insert(std::make_pair(new_value, id));
return id;
}
bool update(const GfxRGB & new_value) {
if((!cur_is_transparent) && gfxrgb_equal_obj(new_value, cur_value))
return false;
cur_value = new_value;
cur_is_transparent = false;
cur_id = install(cur_value);
return true;
}
bool update_transparent (void) {
if(cur_is_transparent)
return false;
cur_is_transparent = true;
cur_id = -1;
return true;
}
long long get_id (void) const { return cur_id; }
const GfxRGB & get_value (void) const { return cur_value; }
bool get_is_transparent (void) const { return cur_is_transparent; }
void dump_css(std::ostream & out) {
out << "." << imp->get_css_class_name() << CSS::INVALID_ID << "{";
imp->dump_transparent(out);
@ -242,47 +177,28 @@ public:
void dump_print_css(std::ostream & out, double scale) {}
protected:
long long install(const GfxRGB & new_value) {
auto iter = value_map.find(new_value);
if(iter != value_map.end())
{
return iter->second;
}
long long id = value_map.size();
value_map.insert(std::make_pair(new_value, id));
return id;
}
Imp * imp;
long long cur_id;
GfxRGB cur_value;
bool cur_is_transparent;
class GfxRGB_hash
class Color_hash
{
public:
size_t operator () (const GfxRGB & rgb) const
size_t operator () (const Color & color) const
{
return ( (((size_t)colToByte(rgb.r)) << 16)
| (((size_t)colToByte(rgb.g)) << 8)
| ((size_t)colToByte(rgb.b))
);
if(color.transparent)
{
return (~((size_t)0));
}
else
{
return ( ((((size_t)colToByte(color.rgb.r)) & 0xff) << 16)
| ((((size_t)colToByte(color.rgb.g)) & 0xff) << 8)
| (((size_t)colToByte(color.rgb.b)) & 0xff)
);
}
}
};
class GfxRGB_equal
{
public:
bool operator ()(const GfxRGB & rgb1, const GfxRGB & rgb2) const
{
return ((rgb1.r == rgb2.r) && (rgb1.g == rgb2.g) && (rgb1.b == rgb2.b));
}
};
GfxRGB_equal gfxrgb_equal_obj;
std::unordered_map<GfxRGB, long long, GfxRGB_hash, GfxRGB_equal> value_map;
std::unordered_map<Color, long long, Color_hash> value_map;
};
/////////////////////////////////////
@ -412,13 +328,12 @@ public:
}
};
class FillColorManager : public StateManager<GfxRGB, FillColorManager>
class FillColorManager : public StateManager<Color, FillColorManager>
{
public:
static const char * get_css_class_name (void) { return CSS::FILL_COLOR_CN; }
/* override base's method, as we need some workaround in CSS */
void dump_css(std::ostream & out) {
out << "." << get_css_class_name() << CSS::INVALID_ID << "{color:transparent;}" << std::endl;
for(auto iter = value_map.begin(); iter != value_map.end(); ++iter)
{
out << "." << get_css_class_name() << iter->second
@ -427,7 +342,7 @@ public:
}
};
class StrokeColorManager : public StateManager<GfxRGB, StrokeColorManager>
class StrokeColorManager : public StateManager<Color, StrokeColorManager>
{
public:
static const char * get_css_class_name (void) { return CSS::STROKE_COLOR_CN; }