";
if(param->readable) html_fout << endl;
// default CTM
html_fout << "
";
if(param->readable) html_fout << endl;
}
void HTMLRenderer::endPage() {
close_cur_line();
// close CTM
html_fout << "
";
if(param->readable) html_fout << endl;
// close page
html_fout << "
";
if(param->readable) html_fout << endl;
}
void HTMLRenderer::convert_transform_matrix(double * tm)
{
tm[1] = -tm[1];
tm[2] = -tm[2];
tm[5] = -tm[5];
}
bool HTMLRenderer::at_same_line(const TextString * ts1, const TextString * ts2) const
{
if(!(std::abs(ts1->getY() - ts2->getY()) < param->v_eps))
return false;
GfxState * s1 = ts1->getState();
GfxState * s2 = ts2->getState();
if(!(_equal(s1->getCharSpace(), s2->getCharSpace())
&& _equal(s1->getWordSpace(), s2->getWordSpace())
&& _equal(s1->getHorizScaling(), s2->getHorizScaling())))
return false;
/*
no need for this, as we track TM now
if(!(_tm_equal(s1->getCTM(), s2->getCTM()) && _tm_equal(s1->getTextMat(), s2->getTextMat())))
return false;
*/
return true;
}
void HTMLRenderer::close_cur_line()
{
if(cur_line != nullptr)
{
html_fout << "";
if(param->readable) html_fout << endl;
delete cur_line;
cur_line = nullptr;
cur_line_x_offset = 0;
}
}
void HTMLRenderer::outputTextString(TextString * str)
{
for (auto u : str->getUnicodes())
{
switch(u)
{
case '&':
html_fout << "&";
break;
case '\"':
html_fout << """;
break;
case '\'':
html_fout << "'";
break;
case '<':
html_fout << "<";
break;
case '>':
html_fout << ">";
break;
default:
{
char buf[4];
auto n = mapUTF8(u, buf, 4);
if(n > 0)
html_fout.write(buf, n);
}
}
}
}
void HTMLRenderer::updateCTM(GfxState * state, double m11, double m12, double m21, double m22, double m31, double m32)
{
double new_ctm[6];
memcpy(new_ctm, state->getCTM(), sizeof(new_ctm));
convert_transform_matrix(new_ctm);
if(!_tm_equal(ctm, new_ctm))
{
close_cur_line();
memcpy(ctm, new_ctm, sizeof(ctm));
// close old CTM div and create a new one
html_fout << "";
if(param->readable) html_fout << endl;
html_fout << boost::format("") % install_transform_matrix(ctm);
if(param->readable) html_fout << endl;
}
}
void HTMLRenderer::updateFont(GfxState *state) {
long long new_fn_id = install_font(state->getFont());
long long new_fs_id = install_font_size(state->getFontSize());
if(!((new_fn_id == cur_fn_id) && (new_fs_id == cur_fs_id)))
{
close_cur_line();
cur_fn_id = new_fn_id;
cur_fs_id = new_fs_id;
}
}
void HTMLRenderer::updateTextMat(GfxState * state)
{
double new_text_mat[6];
memcpy(new_text_mat, state->getTextMat(), sizeof(new_text_mat));
convert_transform_matrix(new_text_mat);
if(!_tm_equal(text_mat, new_text_mat))
{
close_cur_line();
memcpy(text_mat, new_text_mat, sizeof(text_mat));
//debug
//TODO: why
text_mat[4] = text_mat[5] = 0.0;
}
}
void HTMLRenderer::beginString(GfxState *state, GooString *s) {
// TODO: remove this
GfxState * new_state = state->copy(gTrue);
cur_string = new TextString(new_state);
}
void HTMLRenderer::endString(GfxState *state) {
if (cur_string->getSize() == 0) {
delete cur_string ;
return;
}
// try to merge with last line
if(cur_line != nullptr)
{
if(at_same_line(cur_line, cur_string))
{
double x1 = cur_line->getX() + cur_line->getWidth();
double x2 = cur_string->getX();
double target = x2-x1-cur_line_x_offset;
if(target > -param->h_eps)
{
if(target > param->h_eps)
{
double w;
auto wid = install_whitespace(target, w);
cur_line_x_offset = w-target;
html_fout << boost::format("
") % wid;
}
else
{
cur_line_x_offset = -target;
}
outputTextString(cur_string);
delete cur_line;
cur_line = cur_string;
cur_string = nullptr;
return;
}
}
}
close_cur_line();
// TODO: optimize text matrix search/install
html_fout << boost::format("
getY()) << "px;"
<< "left:" << cur_string->getX() << "px;"
// << "height:" << cur_string->getHeight() << "px;"
;
// letter & word spacing
GfxState * cur_state = cur_string -> getState();
if(_is_positive(cur_state->getCharSpace()))
html_fout << "letter-spacing:" << cur_state->getCharSpace() << "px;";
if(_is_positive(cur_state->getWordSpace()))
html_fout << "word-spacing:" << cur_state->getWordSpace() << "px;";
//debug
{
html_fout << "\"";
double x,y;
cur_state->transform(cur_state->getCurX(), cur_state->getCurY(), &x, &y);
html_fout << boost::format(" data-x=\"%1%\" data-y=\"%2%")%x%y;
}
html_fout << "\">";
outputTextString(cur_string);
cur_line = cur_string;
cur_string = nullptr;
cur_line_x_offset = 0;
// HERE
//debug
// close_cur_line();
}
void HTMLRenderer::drawChar(GfxState *state, double x, double y,
double dx, double dy,
double originX, double originY,
CharCode code, int /*nBytes*/, Unicode *u, int uLen)
{
double x1, y1, w1, h1;
x1 = x;
y1 = y;
// if it is hidden, then return
if ((state->getRender() & 3) == 3)
return ;
// TODO:
// not on the same line
if (!_equal(cur_string->getY(), y1)){
std::cerr << "TODO: line break in a string" << std::endl;
}
w1 = dx - state->getCharSpace() * state->getHorizScaling(),
h1 = dy;
if (uLen != 0) {
w1 /= uLen;
h1 /= uLen;
}
for (int i = 0; i < uLen; ++i) {
cur_string->addChar(state, x1 + i*w1, y1 + i*h1, w1, h1, u[i]);
}
}
// TODO
void HTMLRenderer::drawString(GfxState * state, GooString * s)
{
auto font = state->getFont();
if(font->getWMode())
std::cerr << "TODO: writing mode" << std::endl;
// stolen from poppler
double dx = 0;
double dy = 0;
double dx2, dy2;
double ox, oy;
char *p = s->getCString();
int len = s->getLength();
int nChars = 0;
int nSpaces = 0;
int uLen;
CharCode code;
Unicode *u = nullptr;
while (len > 0) {
auto n = font->getNextChar(p, len, &code, &u, &uLen, &dx2, &dy2, &ox, &oy);
dx += dx2;
dy += dy2;
if (n == 1 && *p == ' ') {
++nSpaces;
}
++nChars;
p += n;
len -= n;
}
dx = dx * state->getFontSize()
+ nChars * state->getCharSpace()
+ nSpaces * state->getWordSpace();
dx *= state->getHorizScaling();
dy *= state->getFontSize();
}
// The font installation code is stolen from PSOutputDev.cc in poppler
long long HTMLRenderer::install_font(GfxFont * font)
{
assert(sizeof(long long) == 2*sizeof(int));
long long fn_id = (font == nullptr) ? 0 : *reinterpret_cast(font->getID());
auto iter = font_name_map.find(fn_id);
if(iter != font_name_map.end())
return iter->second.fn_id;
long long new_fn_id = font_name_map.size();
font_name_map.insert(std::make_pair(fn_id, FontInfo({new_fn_id})));
if(font == nullptr)
{
export_remote_default_font(new_fn_id);
return new_fn_id;
}
string new_fn = (boost::format("f%|1$x|") % new_fn_id).str();
if(font->getType() == fontType3) {
std::cerr << "TODO: Type 3 font unsupported" << std::endl;
export_remote_default_font(new_fn_id);
return new_fn_id;
}
auto * font_loc = font->locateFont(xref, gTrue);
if(font_loc != nullptr)
{
switch(font_loc -> locType)
{
case gfxFontLocEmbedded:
switch(font_loc -> fontType)
{
case fontType1:
install_embedded_type1_font(&font_loc->embFontID, new_fn_id);
break;
case fontType1C:
install_embedded_type1c_font(font, new_fn_id);
break;
case fontType1COT:
install_embedded_opentypet1c_font(font, new_fn_id);
break;
case fontTrueType:
case fontTrueTypeOT:
install_embedded_truetype_font(font, new_fn_id);
break;
default:
std::cerr << "TODO: unsuppported embedded font type" << std::endl;
export_remote_default_font(new_fn_id);
break;
}
break;
case gfxFontLocExternal:
std::cerr << "TODO: external font" << std::endl;
export_remote_default_font(new_fn_id);
break;
case gfxFontLocResident:
install_base_font(font, font_loc, new_fn_id);
break;
default:
std::cerr << "TODO: other font loc" << std::endl;
export_remote_default_font(new_fn_id);
break;
}
delete font_loc;
}
return new_fn_id;
}
void HTMLRenderer::install_embedded_type1_font (Ref * id, long long fn_id)
{
Object ref_obj, str_obj, ol1, ol2, ol3;
Dict * dict;
int l1, l2, l3;
int c;
bool is_bin = false;
int buf[4];
ofstream tmpf((boost::format("f%|1$x|.pfa")%fn_id).str().c_str(), ofstream::binary);
auto output_char = [&tmpf](int c)->void {
char tmp = (char)(c & 0xff);
tmpf.write(&tmp, 1);
};
ref_obj.initRef(id->num, id->gen);
ref_obj.fetch(xref, &str_obj);
ref_obj.free();
if(!str_obj.isStream())
{
std::cerr << "Embedded font is not a stream" << std::endl;
goto err;
}
dict = str_obj.streamGetDict();
if(dict == nullptr)
{
std::cerr << "No dict in the embedded font" << std::endl;
goto err;
}
dict->lookup("Length1", &ol1);
dict->lookup("Length2", &ol2);
dict->lookup("Length3", &ol3);
if(!(ol1.isInt() && ol2.isInt() && ol3.isInt()))
{
std::cerr << "Length 1&2&3 are not all integers" << std::endl;
ol1.free();
ol2.free();
ol3.free();
goto err;
}
l1 = ol1.getInt();
l2 = ol2.getInt();
l3 = ol3.getInt();
ol1.free();
ol2.free();
ol3.free();
str_obj.streamReset();
for(int i = 0; i < l1; ++i)
{
if((c = str_obj.streamGetChar()) == EOF)
break;
output_char(c);
}
if(l2 == 0)
{
std::cerr << "Bad Length2" << std::endl;
goto err;
}
{
int i;
for(i = 0; i < 4; ++i)
{
int j = buf[i] = str_obj.streamGetChar();
if(buf[i] == EOF)
{
std::cerr << "Embedded font stream is too short" << std::endl;
goto err;
}
if(!((j>='0'&&j<='9') || (j>='a'&&j<='f') || (j>='A'&&j<='F')))
{
is_bin = true;
++i;
break;
}
}
if(is_bin)
{
static const char hex_char[] = "0123456789ABCDEF";
for(int j = 0; j < i; ++j)
{
output_char(hex_char[(buf[j]>>4)&0xf]);
output_char(hex_char[buf[j]&0xf]);
}
for(; i < l2; ++i)
{
if(i % 32 == 0)
output_char('\n');
int c = str_obj.streamGetChar();
if(c == EOF)
break;
output_char(hex_char[(c>>4)&0xf]);
output_char(hex_char[c&0xf]);
}
if(i % 32 != 0)
output_char('\n');
}
else
{
for(int j = 0; j < i; ++j)
{
output_char(buf[j]);
}
for(;i < l2; ++i)
{
int c = str_obj.streamGetChar();
if(c == EOF)
break;
output_char(c);
}
}
}
if(l3 > 0)
{
int c;
while((c = str_obj.streamGetChar()) != EOF)
output_char(c);
}
else
{
for(int i = 0; i < 8; ++i)
{
for(int j = 0; j < 64; ++j)
output_char('0');
output_char('\n');
}
static const char * CTM = "cleartomark\n";
tmpf.write(CTM, strlen(CTM));
}
export_remote_font(fn_id, "otf");
err:
str_obj.streamClose();
str_obj.free();
}
void HTMLRenderer::output_to_file(void * outf, const char * data, int len)
{
reinterpret_cast(outf)->write(data, len);
}
void HTMLRenderer::install_embedded_type1c_font (GfxFont * font, long long fn_id)
{
int font_len;
char * font_buf = font->readEmbFontFile(xref, &font_len);
if(font_buf != nullptr)
{
auto * FFT1C = FoFiType1C::make(font_buf, font_len);
if(FFT1C != nullptr)
{
string fn = (boost::format("f%|1$x|")%fn_id).str();
ofstream tmpf((fn+".pfa").c_str(), ofstream::binary);
FFT1C->convertToType1((char*)fn.c_str(), nullptr, true, &output_to_file , &tmpf);
export_remote_font(fn_id, "otf");
delete FFT1C;
}
else
{
std::cerr << "Warning: cannot process type 1c font: " << fn_id << std::endl;
export_remote_default_font(fn_id);
}
gfree(font_buf);
}
}
void HTMLRenderer::install_embedded_opentypet1c_font (GfxFont * font, long long fn_id)
{
install_embedded_truetype_font(font, fn_id);
}
void HTMLRenderer::install_embedded_truetype_font (GfxFont * font, long long fn_id)
{
int font_len;
char * font_buf = font->readEmbFontFile(xref, &font_len);
if(font_buf != nullptr)
{
auto * FFTT = FoFiTrueType::make(font_buf, font_len);
if(FFTT != nullptr)
{
string fn = (boost::format("f%|1$x|")%fn_id).str();
ofstream tmpf((fn+".ttf").c_str(), ofstream::binary);
FFTT->writeTTF(output_to_file, &tmpf, (char*)(fn.c_str()), nullptr);
export_remote_font(fn_id, "ttf");
delete FFTT;
}
else
{
std::cerr << "Warning: cannot process truetype (or opentype t1c) font: " << fn_id << std::endl;
export_remote_default_font(fn_id);
}
gfree(font_buf);
}
}
void HTMLRenderer::install_base_font( GfxFont * font, GfxFontLoc * font_loc, long long fn_id)
{
std::string psname(font_loc->path->getCString());
string basename = psname.substr(0, psname.find('-'));
string cssfont;
auto iter = BASE_14_FONT_CSS_FONT_MAP.find(basename);
if(iter == BASE_14_FONT_CSS_FONT_MAP.end())
{
std::cerr << "PS Font: " << basename << " not found in the base 14 font map" << std::endl;
cssfont = "";
}
else
cssfont = iter->second;
export_local_font(fn_id, font, font_loc, psname, cssfont);
}
long long HTMLRenderer::install_font_size(double font_size)
{
auto iter = font_size_map.lower_bound(font_size - EPS);
if((iter != font_size_map.end()) && (_equal(iter->first, font_size)))
return iter->second;
long long new_fs_id = font_size_map.size();
font_size_map.insert(std::make_pair(font_size, new_fs_id));
export_font_size(new_fs_id, font_size);
return new_fs_id;
}
long long HTMLRenderer::install_whitespace(double ws_width, double & actual_width)
{
auto iter = whitespace_map.lower_bound(ws_width - param->h_eps);
if((iter != whitespace_map.end()) && (std::abs(iter->first - ws_width) < param->h_eps))
{
actual_width = iter->first;
return iter->second;
}
actual_width = ws_width;
long long new_ws_id = whitespace_map.size();
whitespace_map.insert(std::make_pair(ws_width, new_ws_id));
export_whitespace(new_ws_id, ws_width);
return new_ws_id;
}
long long HTMLRenderer::install_transform_matrix(double * tm){
TM m(tm);
auto iter = transform_matrix_map.lower_bound(m);
if(m == (iter->first))
{
return iter->second;
}
long long new_tm_id = transform_matrix_map.size();
transform_matrix_map.insert(std::make_pair(m, new_tm_id));
export_transform_matrix(new_tm_id, tm);
return new_tm_id;
}
void HTMLRenderer::export_remote_font(long long fn_id, const string & suffix)
{
allcss_fout << boost::format("@font-face{font-family:f%|1$x|;src:url(f%|1$x|.%2%);}.f%|1$x|{font-family:f%|1$x|;}") % fn_id % suffix;
if(param->readable) allcss_fout << endl;
}
void HTMLRenderer::export_remote_default_font(long long fn_id)
{
allcss_fout << boost::format(".f%|1$x|{font-family:sans-serif;color:red;}")%fn_id;
if(param->readable) allcss_fout << endl;
}
void HTMLRenderer::export_local_font(long long fn_id, GfxFont * font, GfxFontLoc * font_loc, const string & original_font_name, const string & cssfont)
{
allcss_fout << boost::format(".f%|1$x|{") % fn_id;
allcss_fout << "font-family:" << ((cssfont == "") ? (original_font_name + "," + general_font_family(font)) : cssfont) << ";";
if(font->isBold())
allcss_fout << "font-weight:bold;";
if(boost::algorithm::ifind_first(original_font_name, "oblique"))
allcss_fout << "font-style:oblique;";
else if(font->isItalic())
allcss_fout << "font-style:italic;";
allcss_fout << "}";
if(param->readable) allcss_fout << endl;
}
std::string HTMLRenderer::general_font_family(GfxFont * font)
{
if(font -> isFixedWidth())
return "monospace";
else if (font -> isSerif())
return "serif";
else
return "sans-serif";
}
void HTMLRenderer::export_font_size (long long fs_id, double font_size)
{
allcss_fout << boost::format(".s%|1$x|{font-size:%2%px;}") % fs_id % font_size;
if(param->readable) allcss_fout << endl;
}
void HTMLRenderer::export_whitespace (long long ws_id, double ws_width)
{
allcss_fout << boost::format(".w%|1$x|{width:%2%px;}") % ws_id % ws_width;
if(param->readable) allcss_fout << endl;
}
void HTMLRenderer::export_transform_matrix (long long tm_id, double * tm)
{
// TODO: recognize common matices
allcss_fout << boost::format(".t%|1$x|{") % tm_id;
for(const std::string & prefix : {"", "-ms-", "-moz-", "-webkit-", "-o-"})
{
allcss_fout << prefix << "transform:matrix(";
for(int i = 0; i < 4; ++i)
allcss_fout << tm[i] << ',';
if(prefix == "-moz-")
allcss_fout << boost::format("%1%px,%2%px);") % tm[4] % tm[5];
else
allcss_fout << boost::format("%1%,%2%);") % tm[4] % tm[5];
}
allcss_fout << "}";
if(param->readable) allcss_fout << endl;
}