diff --git a/src/HTMLRenderer/HTMLRenderer.h b/src/HTMLRenderer/HTMLRenderer.h index 236ed2b..c0e5c1b 100644 --- a/src/HTMLRenderer/HTMLRenderer.h +++ b/src/HTMLRenderer/HTMLRenderer.h @@ -43,317 +43,314 @@ public: class HTMLRenderer : public OutputDev { - public: - HTMLRenderer(const Param * param); - virtual ~HTMLRenderer(); +public: + HTMLRenderer(const Param * param); + virtual ~HTMLRenderer(); - void process(PDFDoc * doc); + void process(PDFDoc * doc); - //////////////////////////////////////////////////// - // OutputDev interface - //////////////////////////////////////////////////// - - // Does this device use upside-down coordinates? - // (Upside-down means (0,0) is the top left corner of the page.) - virtual GBool upsideDown() { return gFalse; } + //////////////////////////////////////////////////// + // OutputDev interface + //////////////////////////////////////////////////// + + // Does this device use upside-down coordinates? + // (Upside-down means (0,0) is the top left corner of the page.) + virtual GBool upsideDown() { return gFalse; } - // Does this device use drawChar() or drawString()? - virtual GBool useDrawChar() { return gFalse; } + // Does this device use drawChar() or drawString()? + virtual GBool useDrawChar() { return gFalse; } - // Does this device use functionShadedFill(), axialShadedFill(), and - // radialShadedFill()? If this returns false, these shaded fills - // will be reduced to a series of other drawing operations. - virtual GBool useShadedFills(int type) { return (type == 2) ? gTrue: gFalse; } + // Does this device use functionShadedFill(), axialShadedFill(), and + // radialShadedFill()? If this returns false, these shaded fills + // will be reduced to a series of other drawing operations. + virtual GBool useShadedFills(int type) { return (type == 2) ? gTrue: gFalse; } - // Does this device use beginType3Char/endType3Char? Otherwise, - // text in Type 3 fonts will be drawn with drawChar/drawString. - virtual GBool interpretType3Chars() { return gFalse; } + // Does this device use beginType3Char/endType3Char? Otherwise, + // text in Type 3 fonts will be drawn with drawChar/drawString. + virtual GBool interpretType3Chars() { return gFalse; } - // Does this device need non-text content? - virtual GBool needNonText() { return (param->process_nontext) ? gTrue: gFalse; } + // Does this device need non-text content? + virtual GBool needNonText() { return (param->process_nontext) ? gTrue: gFalse; } - virtual void setDefaultCTM(double *ctm); + virtual void setDefaultCTM(double *ctm); - // Start a page. - // UGLY: These 2 versions are for different versions of poppler - virtual void startPage(int pageNum, GfxState *state); - virtual void startPage(int pageNum, GfxState *state, XRef * xref); + // Start a page. + // UGLY: These 2 versions are for different versions of poppler + virtual void startPage(int pageNum, GfxState *state); + virtual void startPage(int pageNum, GfxState *state, XRef * xref); - // End a page. - virtual void endPage(); + // End a page. + virtual void endPage(); - /* - * To optimize false alarms - * We just mark as changed, and recheck if they have been changed when we are about to output a new string - */ + /* + * To optimize false alarms + * We just mark as changed, and recheck if they have been changed when we are about to output a new string + */ - /* - * Ugly implementation of save/restore - */ - virtual void saveState(GfxState * state) { updateAll(state); } - virtual void restoreState(GfxState * state) { updateAll(state); } + /* + * Ugly implementation of save/restore + */ + virtual void saveState(GfxState * state) { updateAll(state); } + virtual void restoreState(GfxState * state) { updateAll(state); } - virtual void updateAll(GfxState * state); + virtual void updateAll(GfxState * state); - virtual void updateRise(GfxState * state); - virtual void updateTextPos(GfxState * state); - virtual void updateTextShift(GfxState * state, double shift); + virtual void updateRise(GfxState * state); + virtual void updateTextPos(GfxState * state); + virtual void updateTextShift(GfxState * state, double shift); - virtual void updateFont(GfxState * state); - virtual void updateCTM(GfxState * state, double m11, double m12, double m21, double m22, double m31, double m32); - virtual void updateTextMat(GfxState * state); - virtual void updateHorizScaling(GfxState * state); + virtual void updateFont(GfxState * state); + virtual void updateCTM(GfxState * state, double m11, double m12, double m21, double m22, double m31, double m32); + virtual void updateTextMat(GfxState * state); + virtual void updateHorizScaling(GfxState * state); - virtual void updateCharSpace(GfxState * state); - virtual void updateWordSpace(GfxState * state); + virtual void updateCharSpace(GfxState * state); + virtual void updateWordSpace(GfxState * state); - virtual void updateRender(GfxState * state); + virtual void updateRender(GfxState * state); - virtual void updateFillColorSpace(GfxState * state); - virtual void updateStrokeColorSpace(GfxState * state); - virtual void updateFillColor(GfxState * state); - virtual void updateStrokeColor(GfxState * state); + virtual void updateFillColorSpace(GfxState * state); + virtual void updateStrokeColorSpace(GfxState * state); + virtual void updateFillColor(GfxState * state); + virtual void updateStrokeColor(GfxState * state); - /* - * Rendering - */ - - virtual void drawString(GfxState * state, GooString * s); + /* + * Rendering + */ + + virtual void drawString(GfxState * state, GooString * s); - virtual void drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg); + virtual void drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg); - virtual void stroke(GfxState *state) { css_do_path(state, false); } - virtual void fill(GfxState *state) { css_do_path(state, true); } - virtual GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax); + virtual void stroke(GfxState *state) { css_do_path(state, false); } + virtual void fill(GfxState *state) { css_do_path(state, true); } + virtual GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax); - virtual void processLink(AnnotLink * al); + virtual void processLink(AnnotLink * al); - /* capacity test */ - bool can_stroke(GfxState *state) { return css_do_path(state, false, true); } - bool can_fill(GfxState *state) { return css_do_path(state, true, true); } + /* capacity test */ + bool can_stroke(GfxState *state) { return css_do_path(state, false, true); } + bool can_fill(GfxState *state) { return css_do_path(state, true, true); } - protected: - //////////////////////////////////////////////////// - // misc - //////////////////////////////////////////////////// - void pre_process(PDFDoc * doc); - void post_process(); +protected: + //////////////////////////////////////////////////// + // misc + //////////////////////////////////////////////////// + void pre_process(PDFDoc * doc); + void post_process(); - void process_outline(); - void process_outline_items(GooList * items); + void process_outline(); + void process_outline_items(GooList * items); - void set_stream_flags (std::ostream & out); + void set_stream_flags (std::ostream & out); - std::string dump_embedded_font (GfxFont * font, long long fn_id); - void embed_font(const std::string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only = false); + std::string dump_embedded_font (GfxFont * font, long long fn_id); + void embed_font(const std::string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only = false); - // convert a LinkAction to a string that our Javascript code can understand - std::string get_linkaction_str(LinkAction *, std::string & detail); + // convert a LinkAction to a string that our Javascript code can understand + std::string get_linkaction_str(LinkAction *, std::string & detail); - //////////////////////////////////////////////////// - // manage styles - //////////////////////////////////////////////////// - const FontInfo * install_font(GfxFont * font); - void install_embedded_font(GfxFont * font, FontInfo & info); - void install_base_font(GfxFont * font, GfxFontLoc * font_loc, FontInfo & info); - void install_external_font (GfxFont * font, FontInfo & info); + //////////////////////////////////////////////////// + // manage styles + //////////////////////////////////////////////////// + const FontInfo * install_font(GfxFont * font); + void install_embedded_font(GfxFont * font, FontInfo & info); + void install_base_font(GfxFont * font, GfxFontLoc * font_loc, FontInfo & info); + void install_external_font (GfxFont * font, FontInfo & info); - long long install_fill_color(const GfxRGB * rgb); - long long install_stroke_color(const GfxRGB * rgb); + long long install_fill_color(const GfxRGB * rgb); + long long install_stroke_color(const GfxRGB * rgb); - //////////////////////////////////////////////////// - // export css styles - //////////////////////////////////////////////////// - /* - * remote font: to be retrieved from the web server - * local font: to be substituted with a local (client side) font - */ - void export_remote_font(const FontInfo & info, const std::string & suffix, GfxFont * font); - void export_remote_default_font(long long fn_id); - void export_local_font(const FontInfo & info, GfxFont * font, const std::string & original_font_name, const std::string & cssfont); + //////////////////////////////////////////////////// + // export css styles + //////////////////////////////////////////////////// + /* + * remote font: to be retrieved from the web server + * local font: to be substituted with a local (client side) font + */ + void export_remote_font(const FontInfo & info, const std::string & suffix, GfxFont * font); + void export_remote_default_font(long long fn_id); + void export_local_font(const FontInfo & info, GfxFont * font, const std::string & original_font_name, const std::string & cssfont); - void export_fill_color(long long color_id, const GfxRGB * rgb); - void export_stroke_color(long long color_id, const GfxRGB * rgb); + void export_fill_color(long long color_id, const GfxRGB * rgb); + void export_stroke_color(long long color_id, const GfxRGB * rgb); - // depending on single-html, to embed the content or add a link to it - // "type": specify the file type, usually it's the suffix, in which case this parameter could be "" - // "copy": indicates whether to copy the file into dest_dir, if not embedded - void embed_file(std::ostream & out, const std::string & path, const std::string & type, bool copy); + // depending on single-html, to embed the content or add a link to it + // "type": specify the file type, usually it's the suffix, in which case this parameter could be "" + // "copy": indicates whether to copy the file into dest_dir, if not embedded + void embed_file(std::ostream & out, const std::string & path, const std::string & type, bool copy); - //////////////////////////////////////////////////// - // state tracking - //////////////////////////////////////////////////// - // reset all states - void reset_state(); - // reset all ***_changed flags - void reset_state_change(); - // check updated states, and determine new_line_stauts - // make sure this function can be called several times consecutively without problem - void check_state_change(GfxState * state); - // prepare the line context, (close old tags, open new tags) - // make sure the current HTML style consistent with PDF - void prepare_text_line(GfxState * state); - void close_text_line(); + //////////////////////////////////////////////////// + // state tracking + //////////////////////////////////////////////////// + // reset all states + void reset_state(); + // reset all ***_changed flags + void reset_state_change(); + // check updated states, and determine new_line_stauts + // make sure this function can be called several times consecutively without problem + void check_state_change(GfxState * state); + // prepare the line context, (close old tags, open new tags) + // make sure the current HTML style consistent with PDF + void prepare_text_line(GfxState * state); + void close_text_line(); - //////////////////////////////////////////////////// - // CSS drawing - //////////////////////////////////////////////////// - /* - * test_only is for capacity check - */ - bool css_do_path(GfxState *state, bool fill, bool test_only = false); - /* - * coordinates are to transformed by state->getCTM() - * (x,y) should be the bottom-left corner INCLUDING border - * w,h should be the metrics WITHOUT border - * - * line_color & fill_color may be specified as nullptr to indicate none - * style_function & style_function_data may be provided to provide more styles - */ - void css_draw_rectangle(double x, double y, double w, double h, const double * tm, - double * line_width_array, int line_width_count, - const GfxRGB * line_color, const GfxRGB * fill_color, - void (*style_function)(void *, std::ostream &) = nullptr, void * style_function_data = nullptr ); + //////////////////////////////////////////////////// + // CSS drawing + //////////////////////////////////////////////////// + /* + * test_only is for capacity check + */ + bool css_do_path(GfxState *state, bool fill, bool test_only = false); + /* + * coordinates are to transformed by state->getCTM() + * (x,y) should be the bottom-left corner INCLUDING border + * w,h should be the metrics WITHOUT border + * + * line_color & fill_color may be specified as nullptr to indicate none + * style_function & style_function_data may be provided to provide more styles + */ + void css_draw_rectangle(double x, double y, double w, double h, const double * tm, + double * line_width_array, int line_width_count, + const GfxRGB * line_color, const GfxRGB * fill_color, + void (*style_function)(void *, std::ostream &) = nullptr, void * style_function_data = nullptr ); - //////////////////////////////////////////////////// - // PDF stuffs - //////////////////////////////////////////////////// - - XRef * xref; - PDFDoc * cur_doc; - Catalog * cur_catalog; + //////////////////////////////////////////////////// + // PDF stuffs + //////////////////////////////////////////////////// + + XRef * xref; + PDFDoc * cur_doc; + Catalog * cur_catalog; - double default_ctm[6]; + double default_ctm[6]; - // page info - int pageNum; - double pageWidth ; - double pageHeight ; + // page info + int pageNum; + double pageWidth ; + double pageHeight ; - /* - * The content of each page is first scaled with factor1 (>=1), then scale back with factor2(<=1) - * - * factor1 is use to multiplied with all metrics (height/width/font-size...), in order to improve accuracy - * factor2 is applied with css transform, and is exposed to Javascript - * - * factor1 & factor 2 are determined according to zoom and font-size-multiplier - * - */ - double text_zoom_factor (void) const { return text_scale_factor1 * text_scale_factor2; } - double text_scale_factor1; - double text_scale_factor2; + /* + * The content of each page is first scaled with factor1 (>=1), then scale back with factor2(<=1) + * + * factor1 is use to multiplied with all metrics (height/width/font-size...), in order to improve accuracy + * factor2 is applied with css transform, and is exposed to Javascript + * + * factor1 & factor 2 are determined according to zoom and font-size-multiplier + * + */ + double text_zoom_factor (void) const { return text_scale_factor1 * text_scale_factor2; } + double text_scale_factor1; + double text_scale_factor2; - //////////////////////////////////////////////////// - // states - //////////////////////////////////////////////////// - bool line_opened; - enum NewLineState - { - NLS_NONE, // stay with the same style - NLS_SPAN, // open a new if possible, otherwise a new
- NLS_DIV // has to open a new
- } new_line_state; - - // The order is according to the appearance in check_state_change - // any state changed - bool all_changed; - // current position - double cur_tx, cur_ty; // real text position, in text coords - bool text_pos_changed; + //////////////////////////////////////////////////// + // states + //////////////////////////////////////////////////// + bool line_opened; + enum NewLineState + { + NLS_NONE, // stay with the same style + NLS_SPAN, // open a new if possible, otherwise a new
+ NLS_DIV // has to open a new
+ } new_line_state; + + // The order is according to the appearance in check_state_change + // any state changed + bool all_changed; + // current position + double cur_tx, cur_ty; // real text position, in text coords - // font & size - const FontInfo * cur_font_info; - bool font_changed; - // cur_font_size is as in GfxState, - // font_size_manager saves the final font size used in HTML - double cur_font_size; + // cur_font_size and cur_text_tm are unscaled (the same as in PDF) + double cur_font_size; + // this is CTM * TextMAT in PDF + // [4] and [5] are ignored, + // as we'll calculate the position of the origin separately + double cur_text_tm[6]; // unscaled - // transform matrix - bool ctm_changed; - bool text_mat_changed; - // horizontal scaling - bool hori_scale_changed; - // this is CTM * TextMAT in PDF - // [4] and [5] are ignored, - // as we'll calculate the position of the origin separately - double cur_text_tm[6]; // unscaled + bool text_pos_changed; + bool font_changed; + bool ctm_changed; + bool text_mat_changed; + bool hori_scale_changed; + bool letter_space_changed; + bool word_space_changed; + bool rise_changed; - // letter spacing - bool letter_space_changed; - bool word_space_changed; + // font & size + const FontInfo * cur_font_info; - // fill color - long long cur_fill_color_id; - GfxRGB cur_fill_color; - bool cur_has_fill; - bool fill_color_changed; + // fill color + long long cur_fill_color_id; + GfxRGB cur_fill_color; + bool cur_has_fill; + bool fill_color_changed; - // stroke color - long long cur_stroke_color_id; - GfxRGB cur_stroke_color; - bool cur_has_stroke; - bool stroke_color_changed; - bool rise_changed; + // stroke color + long long cur_stroke_color_id; + GfxRGB cur_stroke_color; + bool cur_has_stroke; + bool stroke_color_changed; - // managers store values actually used in HTML (i.e. scaled) - FontSizeManager font_size_manager; - LetterSpaceManager letter_space_manager; - WordSpaceManager word_space_manager; - RiseManager rise_manager; - WhitespaceManager whitespace_manager; - HeightManager height_manager; - LeftManager left_manager; - TransformMatrixManager transform_matrix_manager; - // optimize for web - // we try to render the final font size directly - // to reduce the effect of ctm as much as possible - - // the actual tm used is `real tm in PDF` scaled by 1/draw_text_scale, - // so everything redenered should be multiplied by draw_text_scale - double draw_text_scale; + // managers store values actually used in HTML (i.e. scaled) + FontSizeManager font_size_manager; + LetterSpaceManager letter_space_manager; + WordSpaceManager word_space_manager; + RiseManager rise_manager; + WhitespaceManager whitespace_manager; + HeightManager height_manager; + LeftManager left_manager; + TransformMatrixManager transform_matrix_manager; - // the position of next char, in text coords - // this is actual position (in HTML), which might be different from cur_tx/ty (in PDF) - // also keep in mind that they are not the final position, as they will be transform by CTM (also true for cur_tx/ty) - double draw_tx, draw_ty; + // optimize for web + // we try to render the final font size directly + // to reduce the effect of ctm as much as possible + + // the actual tm used is `real tm in PDF` scaled by 1/draw_text_scale, + // so everything redenered should be multiplied by draw_text_scale + double draw_text_scale; - // some metrics have to be determined after all elements in the lines have been seen - // see TextLineBuffer.h - class TextLineBuffer; - friend class TextLineBuffer; - TextLineBuffer * text_line_buf; + // the position of next char, in text coords + // this is actual position (in HTML), which might be different from cur_tx/ty (in PDF) + // also keep in mind that they are not the final position, as they will be transform by CTM (also true for cur_tx/ty) + double draw_tx, draw_ty; - // for font reencoding - int32_t * cur_mapping; - char ** cur_mapping2; - int * width_list; + // some metrics have to be determined after all elements in the lines have been seen + // see TextLineBuffer.h + class TextLineBuffer; + friend class TextLineBuffer; + TextLineBuffer * text_line_buf; - Preprocessor preprocessor; - TmpFiles tmp_files; + // for font reencoding + int32_t * cur_mapping; + char ** cur_mapping2; + int * width_list; - // for string formatting - StringFormatter str_fmt; + Preprocessor preprocessor; + TmpFiles tmp_files; - //////////////////////////////////////////////////// - // styles & resources - //////////////////////////////////////////////////// + // for string formatting + StringFormatter str_fmt; - std::unordered_map font_name_map; - std::unordered_map fill_color_map, stroke_color_map; + //////////////////////////////////////////////////// + // styles & resources + //////////////////////////////////////////////////// - const Param * param; + std::unordered_map font_name_map; + std::unordered_map fill_color_map, stroke_color_map; - struct { - std::ofstream fs; - std::string path; - } f_outline, f_pages, f_css; + const Param * param; - static const std::string MANIFEST_FILENAME; + struct { + std::ofstream fs; + std::string path; + } f_outline, f_pages, f_css; + + static const std::string MANIFEST_FILENAME; }; } //namespace pdf2htmlEX