2012-08-10 13:30:41 +00:00
|
|
|
/*
|
2012-09-10 05:03:25 +00:00
|
|
|
* Constants & Misc functions
|
2012-08-10 13:30:41 +00:00
|
|
|
*
|
|
|
|
*
|
|
|
|
* by WangLu
|
|
|
|
* 2012.08.10
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef UTIL_H__
|
|
|
|
#define UTIL_H__
|
|
|
|
|
2012-09-10 16:45:00 +00:00
|
|
|
#include <cstdio>
|
2012-08-27 10:37:00 +00:00
|
|
|
#include <iostream>
|
2012-09-09 17:18:09 +00:00
|
|
|
#include <algorithm>
|
2012-09-09 19:03:21 +00:00
|
|
|
#include <cmath>
|
|
|
|
#include <vector>
|
2012-09-10 05:03:25 +00:00
|
|
|
#include <string>
|
|
|
|
#include <map>
|
2012-08-14 13:23:33 +00:00
|
|
|
|
2012-09-10 05:03:25 +00:00
|
|
|
#ifndef nullptr
|
|
|
|
#define nullptr (NULL)
|
|
|
|
#endif
|
|
|
|
|
2012-09-11 13:52:46 +00:00
|
|
|
namespace pdf2htmlEX {
|
|
|
|
|
2012-09-10 05:03:25 +00:00
|
|
|
static const double EPS = 1e-6;
|
|
|
|
extern const double id_matrix[6];
|
|
|
|
|
|
|
|
static const double DEFAULT_DPI = 72.0;
|
|
|
|
|
|
|
|
extern const std::map<std::string, std::string> BASE_14_FONT_CSS_FONT_MAP;
|
|
|
|
extern const std::map<std::string, std::string> GB_ENCODED_FONT_NAME_MAP;
|
2012-09-12 15:26:14 +00:00
|
|
|
// map to embed files into html
|
|
|
|
// key: (suffix, if_embed_content)
|
|
|
|
// value: (prefix string, suffix string)
|
|
|
|
extern const std::map<std::pair<std::string, bool>, std::pair<std::string, std::string> > EMBED_STRING_MAP;
|
2012-09-10 05:03:25 +00:00
|
|
|
|
2012-09-14 07:18:29 +00:00
|
|
|
static inline double _round(double x) { return (std::abs(x) > EPS) ? x : 0.0; }
|
2012-08-10 13:30:41 +00:00
|
|
|
static inline bool _equal(double x, double y) { return std::abs(x-y) < EPS; }
|
|
|
|
static inline bool _is_positive(double x) { return x > EPS; }
|
|
|
|
static inline bool _tm_equal(const double * tm1, const double * tm2, int size = 6)
|
|
|
|
{
|
|
|
|
for(int i = 0; i < size; ++i)
|
|
|
|
if(!_equal(tm1[i], tm2[i]))
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2012-10-01 17:59:04 +00:00
|
|
|
void _transform(const double * ctm, double & x, double & y, bool is_delta = false);
|
|
|
|
|
2012-09-06 16:58:23 +00:00
|
|
|
static inline long long hash_ref(const Ref * id)
|
|
|
|
{
|
|
|
|
return (((long long)(id->num)) << (sizeof(id->gen)*8)) | (id->gen);
|
|
|
|
}
|
|
|
|
|
2012-08-17 07:25:10 +00:00
|
|
|
/*
|
|
|
|
* http://en.wikipedia.org/wiki/HTML_decimal_character_rendering
|
|
|
|
*/
|
2012-09-09 17:18:09 +00:00
|
|
|
bool isLegalUnicode(Unicode u);
|
2012-08-17 07:25:10 +00:00
|
|
|
|
2012-09-09 17:18:09 +00:00
|
|
|
Unicode map_to_private(CharCode code);
|
2012-08-27 10:37:00 +00:00
|
|
|
|
2012-08-27 15:09:01 +00:00
|
|
|
/*
|
2012-08-27 16:06:09 +00:00
|
|
|
* Try to determine the Unicode value directly from the information in the font
|
2012-08-27 15:09:01 +00:00
|
|
|
*/
|
2012-09-09 17:18:09 +00:00
|
|
|
Unicode unicode_from_font (CharCode code, GfxFont * font);
|
2012-08-26 23:26:30 +00:00
|
|
|
|
2012-08-27 16:06:09 +00:00
|
|
|
/*
|
|
|
|
* We have to use a single Unicode value to reencode fonts
|
|
|
|
* if we got multi-unicode values, it might be expanded ligature, try to restore it
|
|
|
|
* if we cannot figure it out at the end, use a private mapping
|
|
|
|
*/
|
2012-09-09 17:18:09 +00:00
|
|
|
Unicode check_unicode(Unicode * u, int len, CharCode code, GfxFont * font);
|
2012-08-27 16:06:09 +00:00
|
|
|
|
2012-09-09 17:18:09 +00:00
|
|
|
void outputUnicodes(std::ostream & out, const Unicode * u, int uLen);
|
2012-08-10 13:30:41 +00:00
|
|
|
|
2012-09-11 13:52:46 +00:00
|
|
|
class GfxRGB_hash
|
2012-08-14 09:13:29 +00:00
|
|
|
{
|
2012-09-11 13:52:46 +00:00
|
|
|
public:
|
|
|
|
size_t operator () (const GfxRGB & rgb) const
|
|
|
|
{
|
|
|
|
return (colToByte(rgb.r) << 16) | (colToByte(rgb.g) << 8) | (colToByte(rgb.b));
|
|
|
|
}
|
|
|
|
};
|
2012-08-14 09:13:29 +00:00
|
|
|
|
2012-09-11 13:52:46 +00:00
|
|
|
class GfxRGB_equal
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
bool operator ()(const GfxRGB & rgb1, const GfxRGB & rgb2) const
|
|
|
|
{
|
|
|
|
return ((rgb1.r == rgb2.r) && (rgb1.g == rgb2.g) && (rgb1.b == rgb1.b));
|
|
|
|
}
|
|
|
|
};
|
2012-08-10 13:30:41 +00:00
|
|
|
|
2012-08-14 09:24:54 +00:00
|
|
|
// we may need more info of a font in the future
|
|
|
|
class FontInfo
|
|
|
|
{
|
|
|
|
public:
|
2012-08-27 15:09:01 +00:00
|
|
|
long long id;
|
|
|
|
bool use_tounicode;
|
2012-09-20 05:24:16 +00:00
|
|
|
int em_size;
|
2012-08-30 15:36:30 +00:00
|
|
|
double ascent, descent;
|
2012-09-19 04:24:13 +00:00
|
|
|
bool has_space; // whether space is included in the font
|
2012-08-14 09:24:54 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
// wrapper of the transform matrix double[6]
|
|
|
|
// Transform Matrix
|
|
|
|
class TM
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
TM() {}
|
|
|
|
TM(const double * m) {memcpy(_, m, sizeof(_));}
|
|
|
|
bool operator < (const TM & m) const {
|
2012-08-16 06:30:12 +00:00
|
|
|
// Note that we only care about the first 4 elements
|
|
|
|
for(int i = 0; i < 4; ++i)
|
2012-08-14 09:24:54 +00:00
|
|
|
{
|
|
|
|
if(_[i] < m._[i] - EPS)
|
|
|
|
return true;
|
|
|
|
if(_[i] > m._[i] + EPS)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
bool operator == (const TM & m) const {
|
2012-08-16 06:30:12 +00:00
|
|
|
return _tm_equal(_, m._, 4);
|
2012-08-14 09:24:54 +00:00
|
|
|
}
|
|
|
|
double _[6];
|
|
|
|
};
|
|
|
|
|
2012-08-15 03:15:33 +00:00
|
|
|
class base64stream
|
2012-08-14 13:23:33 +00:00
|
|
|
{
|
2012-08-14 18:28:19 +00:00
|
|
|
public:
|
2012-08-14 13:23:33 +00:00
|
|
|
|
2012-09-10 16:45:00 +00:00
|
|
|
base64stream(std::istream & in) : in(&in) { }
|
|
|
|
base64stream(std::istream && in) : in(&in) { }
|
2012-08-14 18:28:19 +00:00
|
|
|
|
2012-09-10 16:45:00 +00:00
|
|
|
std::ostream & dumpto(std::ostream & out)
|
2012-08-14 13:48:57 +00:00
|
|
|
{
|
2012-08-14 18:28:19 +00:00
|
|
|
unsigned char buf[3];
|
2012-08-15 03:15:33 +00:00
|
|
|
while(in->read((char*)buf, 3))
|
2012-08-14 18:28:19 +00:00
|
|
|
{
|
2012-08-15 03:15:33 +00:00
|
|
|
out << base64_encoding[(buf[0] & 0xfc)>>2]
|
|
|
|
<< base64_encoding[((buf[0] & 0x03)<<4) | ((buf[1] & 0xf0)>>4)]
|
|
|
|
<< base64_encoding[((buf[1] & 0x0f)<<2) | ((buf[2] & 0xc0)>>6)]
|
|
|
|
<< base64_encoding[(buf[2] & 0x3f)];
|
2012-08-14 18:28:19 +00:00
|
|
|
}
|
2012-08-15 03:15:33 +00:00
|
|
|
auto cnt = in->gcount();
|
2012-08-14 18:28:19 +00:00
|
|
|
if(cnt > 0)
|
|
|
|
{
|
|
|
|
for(int i = cnt; i < 3; ++i)
|
|
|
|
buf[i] = 0;
|
2012-08-15 03:15:33 +00:00
|
|
|
|
|
|
|
out << base64_encoding[(buf[0] & 0xfc)>>2]
|
|
|
|
<< base64_encoding[((buf[0] & 0x03)<<4) | ((buf[1] & 0xf0)>>4)];
|
|
|
|
|
2012-08-14 18:28:19 +00:00
|
|
|
if(cnt > 1)
|
|
|
|
{
|
|
|
|
out << base64_encoding[(buf[1] & 0x0f)<<2];
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
out << '=';
|
|
|
|
}
|
2012-08-14 13:48:57 +00:00
|
|
|
out << '=';
|
2012-08-14 18:28:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return out;
|
2012-08-14 13:48:57 +00:00
|
|
|
}
|
2012-08-14 18:28:19 +00:00
|
|
|
|
|
|
|
private:
|
2012-09-10 16:45:00 +00:00
|
|
|
std::istream * in;
|
2012-09-09 17:18:09 +00:00
|
|
|
static const char * base64_encoding;
|
2012-08-14 18:28:19 +00:00
|
|
|
};
|
|
|
|
|
2012-09-10 16:45:00 +00:00
|
|
|
static inline std::ostream & operator << (std::ostream & out, base64stream & bf) { return bf.dumpto(out); }
|
|
|
|
static inline std::ostream & operator << (std::ostream & out, base64stream && bf) { return bf.dumpto(out); }
|
2012-08-10 13:30:41 +00:00
|
|
|
|
2012-09-07 16:38:41 +00:00
|
|
|
class string_formatter
|
|
|
|
{
|
|
|
|
public:
|
2012-09-09 16:21:46 +00:00
|
|
|
class guarded_pointer
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
guarded_pointer(string_formatter * sf) : sf(sf) { ++(sf->buf_cnt); }
|
|
|
|
~guarded_pointer(void) { --(sf->buf_cnt); }
|
|
|
|
operator char* () { return &(sf->buf.front()); }
|
|
|
|
private:
|
|
|
|
string_formatter * sf;
|
|
|
|
};
|
|
|
|
|
|
|
|
string_formatter() : buf_cnt(0) { buf.reserve(L_tmpnam); }
|
2012-09-07 16:38:41 +00:00
|
|
|
/*
|
|
|
|
* Important:
|
|
|
|
* there is only one buffer, so new strings will replace old ones
|
|
|
|
*/
|
2012-09-09 16:21:46 +00:00
|
|
|
guarded_pointer operator () (const char * format, ...) {
|
|
|
|
assert((buf_cnt == 0) && "string_formatter: buffer is reused!");
|
|
|
|
|
2012-09-07 16:38:41 +00:00
|
|
|
va_list vlist;
|
|
|
|
va_start(vlist, format);
|
2012-09-10 17:53:33 +00:00
|
|
|
int l = vsnprintf(&buf.front(), buf.capacity(), format, vlist);
|
2012-09-07 16:38:41 +00:00
|
|
|
va_end(vlist);
|
2012-09-07 17:09:09 +00:00
|
|
|
if(l >= (int)buf.capacity())
|
2012-09-07 16:38:41 +00:00
|
|
|
{
|
2012-09-10 16:45:00 +00:00
|
|
|
buf.reserve(std::max((long)(l+1), (long)buf.capacity() * 2));
|
2012-09-07 16:38:41 +00:00
|
|
|
va_start(vlist, format);
|
2012-09-10 17:53:33 +00:00
|
|
|
l = vsnprintf(&buf.front(), buf.capacity(), format, vlist);
|
2012-09-07 16:38:41 +00:00
|
|
|
va_end(vlist);
|
|
|
|
}
|
2012-09-09 16:21:46 +00:00
|
|
|
assert(l >= 0); // we should fail when vsnprintf fail
|
2012-09-07 16:38:41 +00:00
|
|
|
assert(l < (int)buf.capacity());
|
2012-09-09 16:21:46 +00:00
|
|
|
return guarded_pointer(this);
|
2012-09-07 16:38:41 +00:00
|
|
|
}
|
|
|
|
private:
|
2012-09-09 16:21:46 +00:00
|
|
|
friend class guarded_pointer;
|
2012-09-07 16:38:41 +00:00
|
|
|
std::vector<char> buf;
|
2012-09-09 16:21:46 +00:00
|
|
|
int buf_cnt;
|
2012-09-07 16:38:41 +00:00
|
|
|
};
|
|
|
|
|
2012-09-09 17:18:09 +00:00
|
|
|
void create_directories(std::string path);
|
|
|
|
|
2012-09-09 18:07:35 +00:00
|
|
|
bool is_truetype_suffix(const std::string & suffix);
|
|
|
|
|
|
|
|
std::string get_filename(const std::string & path);
|
|
|
|
std::string get_suffix(const std::string & path);
|
|
|
|
|
2012-10-01 17:59:04 +00:00
|
|
|
/*
|
|
|
|
* In PDF, edges of the rectangle are in the middle of the borders
|
|
|
|
* In HTML, edges are completely outside the rectangle
|
|
|
|
*/
|
|
|
|
void css_fix_rectangle_border_width(double x1, double y1, double x2, double y2,
|
|
|
|
double border_width,
|
|
|
|
double & x, double & y, double & w, double & h,
|
|
|
|
double & border_top_bottom_width,
|
|
|
|
double & border_left_right_width);
|
|
|
|
|
2012-09-11 13:52:46 +00:00
|
|
|
} // namespace util
|
2012-08-10 13:30:41 +00:00
|
|
|
#endif //UTIL_H__
|