1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-07-07 18:30:34 +00:00
This commit is contained in:
Lu Wang 2013-02-05 20:45:56 +08:00
parent 820f06d940
commit 225aa23dca

View File

@ -43,317 +43,314 @@ public:
class HTMLRenderer : public OutputDev class HTMLRenderer : public OutputDev
{ {
public: public:
HTMLRenderer(const Param * param); HTMLRenderer(const Param * param);
virtual ~HTMLRenderer(); virtual ~HTMLRenderer();
void process(PDFDoc * doc); void process(PDFDoc * doc);
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// OutputDev interface // OutputDev interface
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// Does this device use upside-down coordinates? // Does this device use upside-down coordinates?
// (Upside-down means (0,0) is the top left corner of the page.) // (Upside-down means (0,0) is the top left corner of the page.)
virtual GBool upsideDown() { return gFalse; } virtual GBool upsideDown() { return gFalse; }
// Does this device use drawChar() or drawString()? // Does this device use drawChar() or drawString()?
virtual GBool useDrawChar() { return gFalse; } virtual GBool useDrawChar() { return gFalse; }
// Does this device use functionShadedFill(), axialShadedFill(), and // Does this device use functionShadedFill(), axialShadedFill(), and
// radialShadedFill()? If this returns false, these shaded fills // radialShadedFill()? If this returns false, these shaded fills
// will be reduced to a series of other drawing operations. // will be reduced to a series of other drawing operations.
virtual GBool useShadedFills(int type) { return (type == 2) ? gTrue: gFalse; } virtual GBool useShadedFills(int type) { return (type == 2) ? gTrue: gFalse; }
// Does this device use beginType3Char/endType3Char? Otherwise, // Does this device use beginType3Char/endType3Char? Otherwise,
// text in Type 3 fonts will be drawn with drawChar/drawString. // text in Type 3 fonts will be drawn with drawChar/drawString.
virtual GBool interpretType3Chars() { return gFalse; } virtual GBool interpretType3Chars() { return gFalse; }
// Does this device need non-text content? // Does this device need non-text content?
virtual GBool needNonText() { return (param->process_nontext) ? gTrue: gFalse; } virtual GBool needNonText() { return (param->process_nontext) ? gTrue: gFalse; }
virtual void setDefaultCTM(double *ctm); virtual void setDefaultCTM(double *ctm);
// Start a page. // Start a page.
// UGLY: These 2 versions are for different versions of poppler // UGLY: These 2 versions are for different versions of poppler
virtual void startPage(int pageNum, GfxState *state); virtual void startPage(int pageNum, GfxState *state);
virtual void startPage(int pageNum, GfxState *state, XRef * xref); virtual void startPage(int pageNum, GfxState *state, XRef * xref);
// End a page. // End a page.
virtual void endPage(); virtual void endPage();
/* /*
* To optimize false alarms * To optimize false alarms
* We just mark as changed, and recheck if they have been changed when we are about to output a new string * We just mark as changed, and recheck if they have been changed when we are about to output a new string
*/ */
/* /*
* Ugly implementation of save/restore * Ugly implementation of save/restore
*/ */
virtual void saveState(GfxState * state) { updateAll(state); } virtual void saveState(GfxState * state) { updateAll(state); }
virtual void restoreState(GfxState * state) { updateAll(state); } virtual void restoreState(GfxState * state) { updateAll(state); }
virtual void updateAll(GfxState * state); virtual void updateAll(GfxState * state);
virtual void updateRise(GfxState * state); virtual void updateRise(GfxState * state);
virtual void updateTextPos(GfxState * state); virtual void updateTextPos(GfxState * state);
virtual void updateTextShift(GfxState * state, double shift); virtual void updateTextShift(GfxState * state, double shift);
virtual void updateFont(GfxState * state); virtual void updateFont(GfxState * state);
virtual void updateCTM(GfxState * state, double m11, double m12, double m21, double m22, double m31, double m32); virtual void updateCTM(GfxState * state, double m11, double m12, double m21, double m22, double m31, double m32);
virtual void updateTextMat(GfxState * state); virtual void updateTextMat(GfxState * state);
virtual void updateHorizScaling(GfxState * state); virtual void updateHorizScaling(GfxState * state);
virtual void updateCharSpace(GfxState * state); virtual void updateCharSpace(GfxState * state);
virtual void updateWordSpace(GfxState * state); virtual void updateWordSpace(GfxState * state);
virtual void updateRender(GfxState * state); virtual void updateRender(GfxState * state);
virtual void updateFillColorSpace(GfxState * state); virtual void updateFillColorSpace(GfxState * state);
virtual void updateStrokeColorSpace(GfxState * state); virtual void updateStrokeColorSpace(GfxState * state);
virtual void updateFillColor(GfxState * state); virtual void updateFillColor(GfxState * state);
virtual void updateStrokeColor(GfxState * state); virtual void updateStrokeColor(GfxState * state);
/* /*
* Rendering * Rendering
*/ */
virtual void drawString(GfxState * state, GooString * s); virtual void drawString(GfxState * state, GooString * s);
virtual void drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg); virtual void drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg);
virtual void stroke(GfxState *state) { css_do_path(state, false); } virtual void stroke(GfxState *state) { css_do_path(state, false); }
virtual void fill(GfxState *state) { css_do_path(state, true); } virtual void fill(GfxState *state) { css_do_path(state, true); }
virtual GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax); virtual GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax);
virtual void processLink(AnnotLink * al); virtual void processLink(AnnotLink * al);
/* capacity test */ /* capacity test */
bool can_stroke(GfxState *state) { return css_do_path(state, false, true); } bool can_stroke(GfxState *state) { return css_do_path(state, false, true); }
bool can_fill(GfxState *state) { return css_do_path(state, true, true); } bool can_fill(GfxState *state) { return css_do_path(state, true, true); }
protected: protected:
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// misc // misc
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void pre_process(PDFDoc * doc); void pre_process(PDFDoc * doc);
void post_process(); void post_process();
void process_outline(); void process_outline();
void process_outline_items(GooList * items); void process_outline_items(GooList * items);
void set_stream_flags (std::ostream & out); void set_stream_flags (std::ostream & out);
std::string dump_embedded_font (GfxFont * font, long long fn_id); std::string dump_embedded_font (GfxFont * font, long long fn_id);
void embed_font(const std::string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only = false); void embed_font(const std::string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only = false);
// convert a LinkAction to a string that our Javascript code can understand // convert a LinkAction to a string that our Javascript code can understand
std::string get_linkaction_str(LinkAction *, std::string & detail); std::string get_linkaction_str(LinkAction *, std::string & detail);
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// manage styles // manage styles
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
const FontInfo * install_font(GfxFont * font); const FontInfo * install_font(GfxFont * font);
void install_embedded_font(GfxFont * font, FontInfo & info); void install_embedded_font(GfxFont * font, FontInfo & info);
void install_base_font(GfxFont * font, GfxFontLoc * font_loc, FontInfo & info); void install_base_font(GfxFont * font, GfxFontLoc * font_loc, FontInfo & info);
void install_external_font (GfxFont * font, FontInfo & info); void install_external_font (GfxFont * font, FontInfo & info);
long long install_fill_color(const GfxRGB * rgb); long long install_fill_color(const GfxRGB * rgb);
long long install_stroke_color(const GfxRGB * rgb); long long install_stroke_color(const GfxRGB * rgb);
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// export css styles // export css styles
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
/* /*
* remote font: to be retrieved from the web server * remote font: to be retrieved from the web server
* local font: to be substituted with a local (client side) font * local font: to be substituted with a local (client side) font
*/ */
void export_remote_font(const FontInfo & info, const std::string & suffix, GfxFont * font); void export_remote_font(const FontInfo & info, const std::string & suffix, GfxFont * font);
void export_remote_default_font(long long fn_id); void export_remote_default_font(long long fn_id);
void export_local_font(const FontInfo & info, GfxFont * font, const std::string & original_font_name, const std::string & cssfont); void export_local_font(const FontInfo & info, GfxFont * font, const std::string & original_font_name, const std::string & cssfont);
void export_fill_color(long long color_id, const GfxRGB * rgb); void export_fill_color(long long color_id, const GfxRGB * rgb);
void export_stroke_color(long long color_id, const GfxRGB * rgb); void export_stroke_color(long long color_id, const GfxRGB * rgb);
// depending on single-html, to embed the content or add a link to it // depending on single-html, to embed the content or add a link to it
// "type": specify the file type, usually it's the suffix, in which case this parameter could be "" // "type": specify the file type, usually it's the suffix, in which case this parameter could be ""
// "copy": indicates whether to copy the file into dest_dir, if not embedded // "copy": indicates whether to copy the file into dest_dir, if not embedded
void embed_file(std::ostream & out, const std::string & path, const std::string & type, bool copy); void embed_file(std::ostream & out, const std::string & path, const std::string & type, bool copy);
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// state tracking // state tracking
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// reset all states // reset all states
void reset_state(); void reset_state();
// reset all ***_changed flags // reset all ***_changed flags
void reset_state_change(); void reset_state_change();
// check updated states, and determine new_line_stauts // check updated states, and determine new_line_stauts
// make sure this function can be called several times consecutively without problem // make sure this function can be called several times consecutively without problem
void check_state_change(GfxState * state); void check_state_change(GfxState * state);
// prepare the line context, (close old tags, open new tags) // prepare the line context, (close old tags, open new tags)
// make sure the current HTML style consistent with PDF // make sure the current HTML style consistent with PDF
void prepare_text_line(GfxState * state); void prepare_text_line(GfxState * state);
void close_text_line(); void close_text_line();
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// CSS drawing // CSS drawing
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
/* /*
* test_only is for capacity check * test_only is for capacity check
*/ */
bool css_do_path(GfxState *state, bool fill, bool test_only = false); bool css_do_path(GfxState *state, bool fill, bool test_only = false);
/* /*
* coordinates are to transformed by state->getCTM() * coordinates are to transformed by state->getCTM()
* (x,y) should be the bottom-left corner INCLUDING border * (x,y) should be the bottom-left corner INCLUDING border
* w,h should be the metrics WITHOUT border * w,h should be the metrics WITHOUT border
* *
* line_color & fill_color may be specified as nullptr to indicate none * line_color & fill_color may be specified as nullptr to indicate none
* style_function & style_function_data may be provided to provide more styles * style_function & style_function_data may be provided to provide more styles
*/ */
void css_draw_rectangle(double x, double y, double w, double h, const double * tm, void css_draw_rectangle(double x, double y, double w, double h, const double * tm,
double * line_width_array, int line_width_count, double * line_width_array, int line_width_count,
const GfxRGB * line_color, const GfxRGB * fill_color, const GfxRGB * line_color, const GfxRGB * fill_color,
void (*style_function)(void *, std::ostream &) = nullptr, void * style_function_data = nullptr ); void (*style_function)(void *, std::ostream &) = nullptr, void * style_function_data = nullptr );
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// PDF stuffs // PDF stuffs
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
XRef * xref; XRef * xref;
PDFDoc * cur_doc; PDFDoc * cur_doc;
Catalog * cur_catalog; Catalog * cur_catalog;
double default_ctm[6]; double default_ctm[6];
// page info // page info
int pageNum; int pageNum;
double pageWidth ; double pageWidth ;
double pageHeight ; double pageHeight ;
/* /*
* The content of each page is first scaled with factor1 (>=1), then scale back with factor2(<=1) * The content of each page is first scaled with factor1 (>=1), then scale back with factor2(<=1)
* *
* factor1 is use to multiplied with all metrics (height/width/font-size...), in order to improve accuracy * factor1 is use to multiplied with all metrics (height/width/font-size...), in order to improve accuracy
* factor2 is applied with css transform, and is exposed to Javascript * factor2 is applied with css transform, and is exposed to Javascript
* *
* factor1 & factor 2 are determined according to zoom and font-size-multiplier * factor1 & factor 2 are determined according to zoom and font-size-multiplier
* *
*/ */
double text_zoom_factor (void) const { return text_scale_factor1 * text_scale_factor2; } double text_zoom_factor (void) const { return text_scale_factor1 * text_scale_factor2; }
double text_scale_factor1; double text_scale_factor1;
double text_scale_factor2; double text_scale_factor2;
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// states // states
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
bool line_opened; bool line_opened;
enum NewLineState enum NewLineState
{ {
NLS_NONE, // stay with the same style NLS_NONE, // stay with the same style
NLS_SPAN, // open a new <span> if possible, otherwise a new <div> NLS_SPAN, // open a new <span> if possible, otherwise a new <div>
NLS_DIV // has to open a new <div> NLS_DIV // has to open a new <div>
} new_line_state; } new_line_state;
// The order is according to the appearance in check_state_change // The order is according to the appearance in check_state_change
// any state changed // any state changed
bool all_changed; bool all_changed;
// current position // current position
double cur_tx, cur_ty; // real text position, in text coords double cur_tx, cur_ty; // real text position, in text coords
bool text_pos_changed;
// font & size // cur_font_size and cur_text_tm are unscaled (the same as in PDF)
const FontInfo * cur_font_info; double cur_font_size;
bool font_changed; // this is CTM * TextMAT in PDF
// cur_font_size is as in GfxState, // [4] and [5] are ignored,
// font_size_manager saves the final font size used in HTML // as we'll calculate the position of the origin separately
double cur_font_size; double cur_text_tm[6]; // unscaled
// transform matrix bool text_pos_changed;
bool ctm_changed; bool font_changed;
bool text_mat_changed; bool ctm_changed;
// horizontal scaling bool text_mat_changed;
bool hori_scale_changed; bool hori_scale_changed;
// this is CTM * TextMAT in PDF bool letter_space_changed;
// [4] and [5] are ignored, bool word_space_changed;
// as we'll calculate the position of the origin separately bool rise_changed;
double cur_text_tm[6]; // unscaled
// letter spacing // font & size
bool letter_space_changed; const FontInfo * cur_font_info;
bool word_space_changed;
// fill color // fill color
long long cur_fill_color_id; long long cur_fill_color_id;
GfxRGB cur_fill_color; GfxRGB cur_fill_color;
bool cur_has_fill; bool cur_has_fill;
bool fill_color_changed; bool fill_color_changed;
// stroke color // stroke color
long long cur_stroke_color_id; long long cur_stroke_color_id;
GfxRGB cur_stroke_color; GfxRGB cur_stroke_color;
bool cur_has_stroke; bool cur_has_stroke;
bool stroke_color_changed; bool stroke_color_changed;
bool rise_changed;
// managers store values actually used in HTML (i.e. scaled)
FontSizeManager font_size_manager;
LetterSpaceManager letter_space_manager;
WordSpaceManager word_space_manager;
RiseManager rise_manager;
WhitespaceManager whitespace_manager;
HeightManager height_manager;
LeftManager left_manager;
TransformMatrixManager transform_matrix_manager;
// optimize for web // managers store values actually used in HTML (i.e. scaled)
// we try to render the final font size directly FontSizeManager font_size_manager;
// to reduce the effect of ctm as much as possible LetterSpaceManager letter_space_manager;
WordSpaceManager word_space_manager;
// the actual tm used is `real tm in PDF` scaled by 1/draw_text_scale, RiseManager rise_manager;
// so everything redenered should be multiplied by draw_text_scale WhitespaceManager whitespace_manager;
double draw_text_scale; HeightManager height_manager;
LeftManager left_manager;
TransformMatrixManager transform_matrix_manager;
// the position of next char, in text coords // optimize for web
// this is actual position (in HTML), which might be different from cur_tx/ty (in PDF) // we try to render the final font size directly
// also keep in mind that they are not the final position, as they will be transform by CTM (also true for cur_tx/ty) // to reduce the effect of ctm as much as possible
double draw_tx, draw_ty;
// the actual tm used is `real tm in PDF` scaled by 1/draw_text_scale,
// so everything redenered should be multiplied by draw_text_scale
double draw_text_scale;
// some metrics have to be determined after all elements in the lines have been seen // the position of next char, in text coords
// see TextLineBuffer.h // this is actual position (in HTML), which might be different from cur_tx/ty (in PDF)
class TextLineBuffer; // also keep in mind that they are not the final position, as they will be transform by CTM (also true for cur_tx/ty)
friend class TextLineBuffer; double draw_tx, draw_ty;
TextLineBuffer * text_line_buf;
// for font reencoding // some metrics have to be determined after all elements in the lines have been seen
int32_t * cur_mapping; // see TextLineBuffer.h
char ** cur_mapping2; class TextLineBuffer;
int * width_list; friend class TextLineBuffer;
TextLineBuffer * text_line_buf;
Preprocessor preprocessor; // for font reencoding
TmpFiles tmp_files; int32_t * cur_mapping;
char ** cur_mapping2;
int * width_list;
// for string formatting Preprocessor preprocessor;
StringFormatter str_fmt; TmpFiles tmp_files;
//////////////////////////////////////////////////// // for string formatting
// styles & resources StringFormatter str_fmt;
////////////////////////////////////////////////////
std::unordered_map<long long, FontInfo> font_name_map; ////////////////////////////////////////////////////
std::unordered_map<GfxRGB, long long, GfxRGB_hash, GfxRGB_equal> fill_color_map, stroke_color_map; // styles & resources
////////////////////////////////////////////////////
const Param * param; std::unordered_map<long long, FontInfo> font_name_map;
std::unordered_map<GfxRGB, long long, GfxRGB_hash, GfxRGB_equal> fill_color_map, stroke_color_map;
struct { const Param * param;
std::ofstream fs;
std::string path;
} f_outline, f_pages, f_css;
static const std::string MANIFEST_FILENAME; struct {
std::ofstream fs;
std::string path;
} f_outline, f_pages, f_css;
static const std::string MANIFEST_FILENAME;
}; };
} //namespace pdf2htmlEX } //namespace pdf2htmlEX