mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-10-06 03:51:40 +00:00
229 lines
5.2 KiB
C++
229 lines
5.2 KiB
C++
/*
|
|
* Misc functions
|
|
*
|
|
*
|
|
* by WangLu
|
|
* 2012.08.10
|
|
*/
|
|
|
|
|
|
#ifndef UTIL_H__
|
|
#define UTIL_H__
|
|
|
|
#include <algorithm>
|
|
#include <istream>
|
|
#include <ostream>
|
|
|
|
#include <GfxState.h>
|
|
#include <GfxFont.h>
|
|
#include <CharTypes.h>
|
|
#include <UTF8.h>
|
|
#include <GlobalParams.h>
|
|
|
|
#include "Consts.h"
|
|
|
|
using std::istream;
|
|
using std::ostream;
|
|
using std::noskipws;
|
|
using std::endl;
|
|
using std::flush;
|
|
|
|
// mute gcc warning of unused function
|
|
namespace
|
|
{
|
|
template <class T>
|
|
void dummy(){ auto _ = &mapUCS2; }
|
|
}
|
|
|
|
static inline bool _equal(double x, double y) { return std::abs(x-y) < EPS; }
|
|
static inline bool _is_positive(double x) { return x > EPS; }
|
|
static inline bool _tm_equal(const double * tm1, const double * tm2, int size = 6)
|
|
{
|
|
for(int i = 0; i < size; ++i)
|
|
if(!_equal(tm1[i], tm2[i]))
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* http://en.wikipedia.org/wiki/HTML_decimal_character_rendering
|
|
*/
|
|
static inline bool isLegalUnicode(Unicode u)
|
|
{
|
|
/*
|
|
if((u == 9) || (u == 10) || (u == 13))
|
|
return true;
|
|
*/
|
|
|
|
if(u <= 31)
|
|
return false;
|
|
|
|
if((u >= 127) && (u <= 159))
|
|
return false;
|
|
|
|
if((u >= 0xd800) && (u <= 0xdfff))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* We have to use a single Unicode value to reencode fonts
|
|
* if we got multi-unicode values, it might be expanded ligature, try to restore it
|
|
* if we cannot figure it out at the end, use a private mapping
|
|
*/
|
|
static inline Unicode check_unicode(Unicode * u, int len, CharCode code, GfxFont * font)
|
|
{
|
|
Unicode private_mapping = (Unicode)(code + 0xE000);
|
|
|
|
if(len == 0)
|
|
return private_mapping;
|
|
|
|
if(len == 1)
|
|
{
|
|
if(isLegalUnicode(*u))
|
|
return *u;
|
|
}
|
|
|
|
if(!font->isCIDFont())
|
|
{
|
|
char * cname = dynamic_cast<Gfx8BitFont*>(font)->getCharName(code);
|
|
// may be untranslated ligature
|
|
if(cname)
|
|
{
|
|
Unicode ou = globalParams->mapNameToUnicode(cname);
|
|
|
|
if(isLegalUnicode(ou))
|
|
return ou;
|
|
}
|
|
}
|
|
|
|
return private_mapping;
|
|
}
|
|
|
|
static inline void outputUnicodes(std::ostream & out, const Unicode * u, int uLen)
|
|
{
|
|
for(int i = 0; i < uLen; ++i)
|
|
{
|
|
switch(u[i])
|
|
{
|
|
case '&':
|
|
out << "&";
|
|
break;
|
|
case '\"':
|
|
out << """;
|
|
break;
|
|
case '\'':
|
|
out << "'";
|
|
break;
|
|
case '<':
|
|
out << "<";
|
|
break;
|
|
case '>':
|
|
out << ">";
|
|
break;
|
|
default:
|
|
{
|
|
char buf[4];
|
|
auto n = mapUTF8(u[i], buf, 4);
|
|
out.write(buf, n);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static inline bool operator < (const GfxRGB & rgb1, const GfxRGB & rgb2)
|
|
{
|
|
if(rgb1.r < rgb2.r) return true;
|
|
if(rgb1.r > rgb2.r) return false;
|
|
if(rgb1.g < rgb2.g) return true;
|
|
if(rgb1.g > rgb2.g) return false;
|
|
return (rgb1.b < rgb2.b);
|
|
}
|
|
|
|
static inline bool operator == (const GfxRGB & rgb1, const GfxRGB & rgb2)
|
|
{
|
|
return ((rgb1.r == rgb2.r) && (rgb1.g == rgb2.g) && (rgb1.b == rgb1.b));
|
|
}
|
|
|
|
// we may need more info of a font in the future
|
|
class FontInfo
|
|
{
|
|
public:
|
|
long long fn_id;
|
|
};
|
|
|
|
// wrapper of the transform matrix double[6]
|
|
// Transform Matrix
|
|
class TM
|
|
{
|
|
public:
|
|
TM() {}
|
|
TM(const double * m) {memcpy(_, m, sizeof(_));}
|
|
bool operator < (const TM & m) const {
|
|
// Note that we only care about the first 4 elements
|
|
for(int i = 0; i < 4; ++i)
|
|
{
|
|
if(_[i] < m._[i] - EPS)
|
|
return true;
|
|
if(_[i] > m._[i] + EPS)
|
|
return false;
|
|
}
|
|
return false;
|
|
}
|
|
bool operator == (const TM & m) const {
|
|
return _tm_equal(_, m._, 4);
|
|
}
|
|
double _[6];
|
|
};
|
|
|
|
class base64stream
|
|
{
|
|
public:
|
|
|
|
base64stream(istream & in) : in(&in) { }
|
|
base64stream(istream && in) : in(&in) { }
|
|
|
|
ostream & dumpto(ostream & out)
|
|
{
|
|
unsigned char buf[3];
|
|
while(in->read((char*)buf, 3))
|
|
{
|
|
out << base64_encoding[(buf[0] & 0xfc)>>2]
|
|
<< base64_encoding[((buf[0] & 0x03)<<4) | ((buf[1] & 0xf0)>>4)]
|
|
<< base64_encoding[((buf[1] & 0x0f)<<2) | ((buf[2] & 0xc0)>>6)]
|
|
<< base64_encoding[(buf[2] & 0x3f)];
|
|
}
|
|
auto cnt = in->gcount();
|
|
if(cnt > 0)
|
|
{
|
|
for(int i = cnt; i < 3; ++i)
|
|
buf[i] = 0;
|
|
|
|
out << base64_encoding[(buf[0] & 0xfc)>>2]
|
|
<< base64_encoding[((buf[0] & 0x03)<<4) | ((buf[1] & 0xf0)>>4)];
|
|
|
|
if(cnt > 1)
|
|
{
|
|
out << base64_encoding[(buf[1] & 0x0f)<<2];
|
|
}
|
|
else
|
|
{
|
|
out << '=';
|
|
}
|
|
out << '=';
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
private:
|
|
static constexpr const char * base64_encoding = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
|
istream * in;
|
|
};
|
|
|
|
static inline ostream & operator << (ostream & out, base64stream & bf) { return bf.dumpto(out); }
|
|
static inline ostream & operator << (ostream & out, base64stream && bf) { return bf.dumpto(out); }
|
|
|
|
#endif //UTIL_H__
|