1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-07-08 19:00:33 +00:00
pdf2htmlEX/src/HTMLRenderer.h

262 lines
8.4 KiB
C
Raw Normal View History

2012-08-04 18:03:53 +00:00
/*
* HTMLRenderer.h
*
* Created on: Mar 15, 2011
* Author: tian
*/
#ifndef HTMLRENDERER_H_
#define HTMLRENDERER_H_
#include <algorithm>
#include <fstream>
#include <unordered_map>
#include <map>
#include <vector>
#include <OutputDev.h>
#include <GfxState.h>
#include <CharTypes.h>
#include <Stream.h>
#include <Array.h>
#include <Dict.h>
#include <XRef.h>
#include <Catalog.h>
#include <Page.h>
#include <PDFDoc.h>
#include <goo/gtypes.h>
#include <Object.h>
#include <GfxFont.h>
#include "Param.h"
using namespace std;
static const double EPS = 1e-6;
inline bool _equal(double x, double y) { return std::abs(x-y) < EPS; }
inline bool _is_positive(double x) { return x > EPS; }
2012-08-05 11:39:37 +00:00
inline bool _tm_equal(const double * tm1, const double * tm2, int size = 6)
2012-08-04 18:03:53 +00:00
{
2012-08-05 11:39:37 +00:00
for(int i = 0; i < size; ++i)
2012-08-04 18:03:53 +00:00
if(!_equal(tm1[i], tm2[i]))
return false;
return true;
}
class HTMLRenderer : public OutputDev
{
public:
HTMLRenderer(const Param * param);
virtual ~HTMLRenderer();
void process(PDFDoc * doc);
//---- get info about output device
// Does this device use upside-down coordinates?
// (Upside-down means (0,0) is the top left corner of the page.)
virtual GBool upsideDown() { return gFalse; }
// Does this device use drawChar() or drawString()?
2012-08-07 07:03:06 +00:00
virtual GBool useDrawChar() { return gFalse; }
2012-08-04 18:03:53 +00:00
// Does this device use beginType3Char/endType3Char? Otherwise,
// text in Type 3 fonts will be drawn with drawChar/drawString.
virtual GBool interpretType3Chars() { return gFalse; }
// Does this device need non-text content?
virtual GBool needNonText() { return gFalse; }
//----- initialization and control
virtual GBool checkPageSlice(Page *page, double hDPI, double vDPI,
int rotate, GBool useMediaBox, GBool crop,
int sliceX, int sliceY, int sliceW, int sliceH,
GBool printing, Catalog * catalogA,
GBool (* abortCheckCbk)(void *data) = NULL,
void * abortCheckCbkData = NULL)
{
docPage = page;
catalog = catalogA;
return gTrue;
}
// Start a page.
virtual void startPage(int pageNum, GfxState *state);
// End a page.
virtual void endPage();
//----- update state
2012-08-07 07:03:06 +00:00
/*
* To optmize false alarms
* We just mark as changed, and recheck if they have been changed when we are about to output a new string
*/
virtual void updateAll(GfxState * state) { all_changed = true; }
virtual void updateFont(GfxState * state) { font_changed = true; }
virtual void updateTextMat(GfxState * state) { text_mat_changed = true; }
virtual void updateCTM(GfxState * state, double m11, double m12, double m21, double m22, double m31, double m32) { ctm_changed = true; }
2012-08-07 08:44:49 +00:00
virtual void updateTextPos(GfxState * state) { line_pos_changed = true; }
2012-08-07 09:59:24 +00:00
virtual void updateTextShift(GfxState * state, double shift) {
double off = shift * 0.001 * state->getFontSize() * state->getHorizScaling();
cur_line_x_offset += off;
cur_tx -= off;
}
2012-08-07 07:03:06 +00:00
virtual void updateFillColor(GfxState * state) { color_changed = true; }
2012-08-04 18:03:53 +00:00
//----- text drawing
virtual void drawString(GfxState * state, GooString * s);
private:
void close_cur_line();
2012-08-07 07:03:06 +00:00
void outputUnicodes(const Unicode * u, int uLen);
2012-08-04 18:03:53 +00:00
// return the mapped font name
long long install_font(GfxFont * font);
static void output_to_file(void * outf, const char * data, int len);
2012-08-06 10:10:06 +00:00
void install_embedded_type1_font (Ref * id, GfxFont * font, long long fn_id);
2012-08-04 18:03:53 +00:00
void install_embedded_type1c_font (GfxFont * font, long long fn_id);
void install_embedded_opentypet1c_font (GfxFont * font, long long fn_id);
void install_embedded_truetype_font (GfxFont * font, long long fn_id);
void install_base_font(GfxFont * font, GfxFontLoc * font_loc, long long fn_id);
long long install_font_size(double font_size);
long long install_whitespace(double ws_width, double & actual_width);
2012-08-06 10:10:06 +00:00
long long install_transform_matrix(const double * tm);
2012-08-06 16:48:33 +00:00
long long install_color(const GfxRGB * rgb);
2012-08-04 18:03:53 +00:00
/*
* remote font: to be retrieved from the web server
* local font: to be substituted with a local (client side) font
*/
2012-08-06 10:10:06 +00:00
void export_remote_font(long long fn_id, const string & suffix, GfxFont * font);
2012-08-04 18:03:53 +00:00
void export_remote_default_font(long long fn_id);
void export_local_font(long long fn_id, GfxFont * font, GfxFontLoc * font_loc, const string & original_font_name, const string & cssfont);
std::string general_font_family(GfxFont * font);
void export_font_size(long long fs_id, double font_size);
void export_whitespace(long long ws_id, double ws_width);
2012-08-06 10:10:06 +00:00
void export_transform_matrix(long long tm_id, const double * tm);
2012-08-06 16:48:33 +00:00
void export_color(long long color_id, const GfxRGB * rgb);
2012-08-04 18:03:53 +00:00
2012-08-06 16:48:33 +00:00
XRef * xref;
2012-08-04 18:03:53 +00:00
Catalog *catalog;
Page *docPage;
// page info
int pageNum ;
2012-08-05 11:39:37 +00:00
double pageWidth ;
double pageHeight ;
2012-08-04 18:03:53 +00:00
2012-08-05 11:39:37 +00:00
2012-08-07 07:03:06 +00:00
// state tracking when processing pdf
2012-08-05 11:39:37 +00:00
void check_state_change(GfxState * state);
2012-08-06 16:48:33 +00:00
void reset_state_track();
2012-08-05 11:39:37 +00:00
2012-08-07 07:03:06 +00:00
bool all_changed;
2012-08-05 11:39:37 +00:00
2012-08-07 07:03:06 +00:00
// if we have a pending opened line
bool line_opened;
2012-08-05 11:39:37 +00:00
2012-08-06 16:48:33 +00:00
// current position
2012-08-07 01:59:10 +00:00
double cur_tx, cur_ty; // in text coords
2012-08-07 08:44:49 +00:00
double cur_line_x_offset; // in text coords, our position - real position
bool line_pos_changed;
2012-08-05 11:39:37 +00:00
long long cur_fn_id;
double cur_font_size;
long long cur_fs_id;
bool font_changed;
2012-08-04 18:03:53 +00:00
2012-08-06 10:10:06 +00:00
long long cur_tm_id;
bool ctm_changed;
bool text_mat_changed;
2012-08-07 07:03:06 +00:00
// this is CTM * TextMAT in PDF, not only CTM
// [4] and [5] are ignored, we'll calculate the position of the origin separately
double cur_ctm[6]; // unscaled
2012-08-06 10:10:06 +00:00
2012-08-06 16:48:33 +00:00
long long cur_color_id;
GfxRGB cur_color;
bool color_changed;
2012-08-06 10:10:06 +00:00
2012-08-06 14:46:50 +00:00
// optmize for web
// we try to render the final font size directly
// to reduce the effect of ctm as much as possible
2012-08-07 07:03:06 +00:00
// draw_ctm is cur_ctem scaled by 1/draw_scale,
// so everything redenered should be scaled by draw_scale
2012-08-06 10:10:06 +00:00
double draw_ctm[6];
double draw_font_size;
2012-08-07 07:03:06 +00:00
double draw_scale;
2012-08-04 18:03:53 +00:00
ofstream html_fout, allcss_fout;
class FontInfo{
public:
long long fn_id;
};
unordered_map<long long, FontInfo> font_name_map;
map<double, long long> font_size_map;
map<double, long long> whitespace_map;
// transform matrix
class TM{
public:
TM() {}
2012-08-06 10:10:06 +00:00
TM(const double * m) {memcpy(_, m, sizeof(_));}
2012-08-04 18:03:53 +00:00
bool operator < (const TM & m) const {
for(int i = 0; i < 6; ++i)
{
if(_[i] < m._[i] - EPS)
return true;
if(_[i] > m._[i] + EPS)
return false;
}
return false;
}
bool operator == (const TM & m) const {
return _tm_equal(_, m._);
}
double _[6];
};
map<TM, long long> transform_matrix_map;
2012-08-06 16:48:33 +00:00
class Color{
public:
Color() {}
Color(const GfxRGB * rgb) {
_[0] = rgb->r;
_[1] = rgb->g;
_[2] = rgb->b;
}
bool operator < (const Color & c) const {
for(int i = 0; i < 3; ++i)
{
if(_[i] < c._[i])
return true;
if(_[i] > c._[i])
return false;
}
return false;
}
bool operator == (const Color & c) const {
for(int i = 0; i < 3; ++i)
if(_[i] != c._[i])
return false;
return true;
}
int _[3];
};
map<Color, long long> color_map;
2012-08-04 18:03:53 +00:00
const Param * param;
};
#endif /* HTMLRENDERER_H_ */