pdf2htmlEX/src/HTMLRenderer/text.cc

238 lines
5.1 KiB
C++
Raw Normal View History

2012-08-14 08:23:15 +00:00
/*
* text.ccc
*
* Handling text and relative stuffs
*
* by WangLu
* 2012.08.14
*/
#include <iostream>
2012-08-20 21:48:21 +00:00
#include <algorithm>
2012-08-14 08:23:15 +00:00
#include <boost/format.hpp>
#include "HTMLRenderer.h"
2012-08-14 09:13:29 +00:00
#include "namespace.h"
2012-08-14 08:23:15 +00:00
2012-08-20 21:48:21 +00:00
using std::all_of;
2012-08-14 09:13:29 +00:00
string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
2012-08-14 08:23:15 +00:00
{
// mupdf consulted
Object ref_obj, font_obj, font_obj2, fontdesc_obj;
Object obj, obj1, obj2;
Dict * dict = nullptr;
2012-08-14 09:13:29 +00:00
string suffix, subtype;
2012-08-14 08:23:15 +00:00
char buf[1024];
int len;
2012-08-15 04:27:41 +00:00
string fn;
2012-08-14 08:23:15 +00:00
ofstream outf;
auto * id = font->getID();
ref_obj.initRef(id->num, id->gen);
ref_obj.fetch(xref, &font_obj);
ref_obj.free();
if(!font_obj.isDict())
{
2012-08-14 09:13:29 +00:00
cerr << "Font object is not a dictionary" << endl;
2012-08-14 08:23:15 +00:00
goto err;
}
dict = font_obj.getDict();
if(dict->lookup("DescendantFonts", &font_obj2)->isArray())
{
if(font_obj2.arrayGetLength() == 0)
{
2012-08-14 09:13:29 +00:00
cerr << "Warning: empty DescendantFonts array" << endl;
2012-08-14 08:23:15 +00:00
}
else
{
if(font_obj2.arrayGetLength() > 1)
2012-08-14 09:13:29 +00:00
cerr << "TODO: multiple entries in DescendantFonts array" << endl;
2012-08-14 08:23:15 +00:00
if(font_obj2.arrayGet(0, &obj2)->isDict())
{
dict = obj2.getDict();
}
}
}
if(!dict->lookup("FontDescriptor", &fontdesc_obj)->isDict())
{
2012-08-14 09:13:29 +00:00
cerr << "Cannot find FontDescriptor " << endl;
2012-08-14 08:23:15 +00:00
goto err;
}
dict = fontdesc_obj.getDict();
if(dict->lookup("FontFile3", &obj)->isStream())
{
if(obj.streamGetDict()->lookup("Subtype", &obj1)->isName())
{
subtype = obj1.getName();
if(subtype == "Type1C")
{
suffix = ".cff";
}
else if (subtype == "CIDFontType0C")
{
suffix = ".cid";
}
else
{
2012-08-14 09:13:29 +00:00
cerr << "Unknown subtype: " << subtype << endl;
2012-08-14 08:23:15 +00:00
goto err;
}
}
else
{
2012-08-14 09:13:29 +00:00
cerr << "Invalid subtype in font descriptor" << endl;
2012-08-14 08:23:15 +00:00
goto err;
}
}
else if (dict->lookup("FontFile2", &obj)->isStream())
{
suffix = ".ttf";
}
else if (dict->lookup("FontFile", &obj)->isStream())
{
2012-08-23 19:30:37 +00:00
suffix = ".pfa";
2012-08-14 08:23:15 +00:00
}
else
{
2012-08-14 09:13:29 +00:00
cerr << "Cannot find FontFile for dump" << endl;
2012-08-14 08:23:15 +00:00
goto err;
}
if(suffix == "")
{
2012-08-14 09:13:29 +00:00
cerr << "Font type unrecognized" << endl;
2012-08-14 08:23:15 +00:00
goto err;
}
obj.streamReset();
2012-08-15 04:27:41 +00:00
fn = (format("f%|1$x|%2%")%fn_id%suffix).str();
outf.open(tmp_dir / fn , ofstream::binary);
add_tmp_file(fn);
2012-08-14 08:23:15 +00:00
while((len = obj.streamGetChars(1024, (Guchar*)buf)) > 0)
{
outf.write(buf, len);
}
outf.close();
obj.streamClose();
err:
obj2.free();
obj1.free();
obj.free();
fontdesc_obj.free();
font_obj2.free();
font_obj.free();
return suffix;
}
void HTMLRenderer::drawString(GfxState * state, GooString * s)
{
if(s->getLength() == 0)
return;
auto font = state->getFont();
if((font == nullptr) || (font->getWMode()))
{
return;
}
//hidden
if((state->getRender() & 3) == 3)
{
return;
}
// see if the line has to be closed due to state change
check_state_change(state);
prepare_line(state);
2012-08-14 08:23:15 +00:00
// Now ready to output
// get the unicodes
char *p = s->getCString();
int len = s->getLength();
double dx = 0;
double dy = 0;
2012-08-23 20:36:27 +00:00
double dxerr = 0;
2012-08-14 08:23:15 +00:00
double dx1,dy1;
double ox, oy;
int nChars = 0;
int nSpaces = 0;
int uLen;
CharCode code;
Unicode *u = nullptr;
2012-08-23 20:36:27 +00:00
double fs = state->getFontSize();
double cs = state->getCharSpace();
double ws = state->getWordSpace();
double hs = state->getHorizScaling();
2012-08-14 08:23:15 +00:00
while (len > 0) {
auto n = font->getNextChar(p, len, &code, &u, &uLen, &dx1, &dy1, &ox, &oy);
if(!(_equal(ox, 0) && _equal(oy, 0)))
{
2012-08-14 09:13:29 +00:00
cerr << "TODO: non-zero origins" << endl;
2012-08-14 08:23:15 +00:00
}
2012-08-19 20:50:28 +00:00
if (n == 1 && *p == ' ')
{
++nSpaces;
}
2012-08-21 19:44:48 +00:00
if((uLen > 0) && (all_of(u, u+uLen, isLegalUnicode)))
2012-08-14 08:23:15 +00:00
{
2012-08-21 19:44:48 +00:00
outputUnicodes(html_fout, u, uLen);
}
else
{
2012-08-21 20:34:39 +00:00
// should not consider hozi scaling here
// will be handled by draw_ctm
2012-08-23 21:26:41 +00:00
double target = dx1 * fs + cs;
2012-08-21 19:44:48 +00:00
if(n == 1 && *p == ' ')
target += state->getWordSpace();
double w;
auto wid = install_whitespace(target * draw_scale, w);
html_fout << format("<span class=\"_ _%|1$x|\">%2%</span>") % wid % (target > 0 ? " " : "");
2012-08-23 20:36:27 +00:00
dxerr += w/draw_scale - target;
2012-08-14 08:23:15 +00:00
}
2012-08-23 20:36:27 +00:00
dx += dx1;
dy += dy1;
2012-08-14 08:23:15 +00:00
++nChars;
p += n;
len -= n;
}
2012-08-21 20:34:39 +00:00
// horiz_scaling is merged into ctm now,
2012-08-21 19:44:48 +00:00
// so the coordinate system is ugly
2012-08-23 20:36:27 +00:00
dx = (dx * fs + nChars * cs + nSpaces * ws) * hs;
2012-08-14 08:23:15 +00:00
2012-08-23 20:36:27 +00:00
dy *= fs;
2012-08-14 08:23:15 +00:00
cur_tx += dx;
cur_ty += dy;
2012-08-23 20:36:27 +00:00
draw_tx += dx + dxerr * state->getFontSize() * state->getHorizScaling();
2012-08-14 08:23:15 +00:00
draw_ty += dy;
}