1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-09-17 21:16:03 +00:00
pdf2htmlEX/src/HTMLRenderer/text.cc

299 lines
7.0 KiB
C++
Raw Normal View History

2012-08-14 08:23:15 +00:00
/*
* text.ccc
*
* Handling text and relative stuffs
*
* by WangLu
* 2012.08.14
*/
#include <iostream>
#include <boost/format.hpp>
#include "HTMLRenderer.h"
std::string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
{
// mupdf consulted
Object ref_obj, font_obj, font_obj2, fontdesc_obj;
Object obj, obj1, obj2;
Dict * dict = nullptr;
std::string suffix, subtype;
char buf[1024];
int len;
ofstream outf;
auto * id = font->getID();
ref_obj.initRef(id->num, id->gen);
ref_obj.fetch(xref, &font_obj);
ref_obj.free();
if(!font_obj.isDict())
{
std::cerr << "Font object is not a dictionary" << std::endl;
goto err;
}
dict = font_obj.getDict();
if(dict->lookup("DescendantFonts", &font_obj2)->isArray())
{
if(font_obj2.arrayGetLength() == 0)
{
std::cerr << "Warning: empty DescendantFonts array" << std::endl;
}
else
{
if(font_obj2.arrayGetLength() > 1)
std::cerr << "TODO: multiple entries in DescendantFonts array" << std::endl;
if(font_obj2.arrayGet(0, &obj2)->isDict())
{
dict = obj2.getDict();
}
}
}
if(!dict->lookup("FontDescriptor", &fontdesc_obj)->isDict())
{
std::cerr << "Cannot find FontDescriptor " << std::endl;
goto err;
}
dict = fontdesc_obj.getDict();
if(dict->lookup("FontFile3", &obj)->isStream())
{
if(obj.streamGetDict()->lookup("Subtype", &obj1)->isName())
{
subtype = obj1.getName();
if(subtype == "Type1C")
{
suffix = ".cff";
}
else if (subtype == "CIDFontType0C")
{
suffix = ".cid";
}
else
{
std::cerr << "Unknown subtype: " << subtype << std::endl;
goto err;
}
}
else
{
std::cerr << "Invalid subtype in font descriptor" << std::endl;
goto err;
}
}
else if (dict->lookup("FontFile2", &obj)->isStream())
{
suffix = ".ttf";
}
else if (dict->lookup("FontFile", &obj)->isStream())
{
suffix = ".ttf";
}
else
{
std::cerr << "Cannot find FontFile for dump" << std::endl;
goto err;
}
if(suffix == "")
{
std::cerr << "Font type unrecognized" << std::endl;
goto err;
}
obj.streamReset();
outf.open((boost::format("%1%/f%|2$x|%3%")%TMP_DIR%fn_id%suffix).str().c_str(), ofstream::binary);
while((len = obj.streamGetChars(1024, (Guchar*)buf)) > 0)
{
outf.write(buf, len);
}
outf.close();
obj.streamClose();
err:
obj2.free();
obj1.free();
obj.free();
fontdesc_obj.free();
font_obj2.free();
font_obj.free();
return suffix;
}
void HTMLRenderer::drawString(GfxState * state, GooString * s)
{
if(s->getLength() == 0)
return;
auto font = state->getFont();
if((font == nullptr) || (font->getWMode()))
{
return;
}
//hidden
if((state->getRender() & 3) == 3)
{
return;
}
// see if the line has to be closed due to state change
check_state_change(state);
// if the line is still open, try to merge with it
if(line_opened)
{
double target = (cur_tx - draw_tx) * draw_scale;
if(target > -param->h_eps)
{
if(target > param->h_eps)
{
double w;
auto wid = install_whitespace(target, w);
html_fout << boost::format("<span class=\"w w%|1$x|\"> </span>") % wid;
draw_tx += w / draw_scale;
}
}
else
{
// or can we shift left using simple tags?
close_cur_line();
}
}
if(!line_opened)
{
// have to open a new line
// classes
html_fout << "<div class=\"l "
<< boost::format("f%|1$x| s%|2$x| c%|3$x|") % cur_fn_id % cur_fs_id % cur_color_id;
// "t0" is the id_matrix
if(cur_tm_id != 0)
html_fout << boost::format(" t%|1$x|") % cur_tm_id;
{
double x,y; // in user space
state->transform(state->getCurX(), state->getCurY(), &x, &y);
// TODO: recheck descent/ascent
html_fout << "\" style=\""
<< "bottom:" << (y + state->getFont()->getDescent() * draw_font_size) << "px;"
<< "top:" << (pageHeight - y - state->getFont()->getAscent() * draw_font_size) << "px;"
<< "left:" << x << "px;"
;
}
// TODO: tracking
// letter & word spacing
if(_is_positive(state->getCharSpace()))
html_fout << "letter-spacing:" << state->getCharSpace() << "px;";
if(_is_positive(state->getWordSpace()))
html_fout << "word-spacing:" << state->getWordSpace() << "px;";
//debug
//real pos & hori_scale
if(0)
{
#if 0
html_fout << "\"";
double x,y;
state->transform(state->getCurX(), state->getCurY(), &x, &y);
html_fout << boost::format("data-lx=\"%5%\" data-ly=\"%6%\" data-drawscale=\"%4%\" data-x=\"%1%\" data-y=\"%2%\" data-hs=\"%3%")
%x%y%(state->getHorizScaling())%draw_scale%state->getLineX()%state->getLineY();
#endif
}
html_fout << "\">";
line_opened = true;
draw_tx = cur_tx;
}
// Now ready to output
// get the unicodes
char *p = s->getCString();
int len = s->getLength();
double dx = 0;
double dy = 0;
double dx1,dy1;
double ox, oy;
int nChars = 0;
int nSpaces = 0;
int uLen;
CharCode code;
Unicode *u = nullptr;
while (len > 0) {
auto n = font->getNextChar(p, len, &code, &u, &uLen, &dx1, &dy1, &ox, &oy);
if(!(_equal(ox, 0) && _equal(oy, 0)))
{
std::cerr << "TODO: non-zero origins" << std::endl;
}
if(uLen == 0)
{
// TODO
#if 0
CharCode c = 0;
for(int i = 0; i < n; ++i)
{
c = (c<<8) | (code&0xff);
code >>= 8;
}
for(int i = 0; i < n; ++i)
{
Unicode u = (c&0xff);
c >>= 8;
outputUnicodes(html_fout, &u, 1);
}
#endif
}
else
{
outputUnicodes(html_fout, u, uLen);
}
dx += dx1;
dy += dy1;
if (n == 1 && *p == ' ')
{
++nSpaces;
}
++nChars;
p += n;
len -= n;
}
dx = (dx * state->getFontSize()
+ nChars * state->getCharSpace()
+ nSpaces * state->getWordSpace()) * state->getHorizScaling();
dy *= state->getFontSize();
cur_tx += dx;
cur_ty += dy;
draw_tx += dx;
draw_ty += dy;
}