1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-07-05 09:38:40 +00:00

a little bit better encoding

This commit is contained in:
Lu Wang 2012-08-28 00:06:09 +08:00
parent 1642b4a37e
commit 28eb708339
3 changed files with 81 additions and 67 deletions

View File

@ -136,8 +136,12 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix,
int code2GID_len = 0;
int maxcode = 0;
Gfx8BitFont * font_8bit = nullptr;
GfxCIDFont * font_cid = nullptr;
if(!font->isCIDFont())
{
font_8bit = dynamic_cast<Gfx8BitFont*>(font);
maxcode = 0xff;
if(suffix == ".ttf")
{
@ -149,7 +153,7 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix,
FoFiTrueType *fftt = nullptr;
if((fftt = FoFiTrueType::make(buf, buflen)))
{
code2GID = dynamic_cast<Gfx8BitFont*>(font)->getCodeToGIDMap(fftt);
code2GID = font_8bit->getCodeToGIDMap(fftt);
code2GID_len = 256;
delete fftt;
}
@ -158,11 +162,25 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix,
}
else
{
// pass
// move the slot such that it's consistent with the encoding seen in PDF
ofstream out(tmp_dir / (fn + "_.encoding"));
add_tmp_file(fn+"_.encoding");
out << format("/%1% [") % fn << endl;
for(int i = 0; i < 256; ++i)
{
auto cn = font_8bit->getCharName(i);
out << "/" << ((cn == nullptr) ? ".notdef" : cn) << endl;
}
out << "] def" << endl;
script_fout << format("LoadEncodingFile(%1%)") % (tmp_dir / (fn+"_.encoding")) << endl;
script_fout << format("Reencode(\"%1%\")") % fn << endl;
}
}
else
{
font_cid = dynamic_cast<GfxCIDFont*>(font);
maxcode = 0xffff;
if(suffix == ".ttf")
@ -180,43 +198,56 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix,
script_fout << "CIDFlatten()" << endl;
}
}
bool use_tounicode = ((suffix == ".ttf") || (param->always_apply_tounicode));
auto ctu = font->getToUnicode();
ofstream map_fout(tmp_dir / (fn + ".encoding"));
add_tmp_file(fn+".encoding");
for(int i = 0; i <= maxcode; ++i)
if(use_tounicode)
{
map_fout << format("0x%|1$X|") % ((code2GID && (i < code2GID_len))? code2GID[i] : i);
auto ctu = font->getToUnicode();
Unicode u, *pu=&u;
ofstream map_fout(tmp_dir / (fn + ".encoding"));
add_tmp_file(fn+".encoding");
if(use_tounicode)
int cnt = 0;
for(int i = 0; i <= maxcode; ++i)
{
int n = 0;
if(ctu)
n = ctu->mapToUnicode(i, &pu);
u = check_unicode(pu, n, i, font);
}
else
{
u = isLegalUnicode(i) ? i : map_to_private(i);
if((suffix != ".ttf") && (font_8bit != nullptr) && (font_8bit->getCharName(i) == nullptr))
continue;
++ cnt;
map_fout << format("0x%|1$X|") % ((code2GID && (i < code2GID_len))? code2GID[i] : i);
Unicode u, *pu=&u;
if(use_tounicode)
{
int n = 0;
if(ctu)
n = ctu->mapToUnicode(i, &pu);
u = check_unicode(pu, n, i, font);
}
else
{
u = unicode_from_font(i, font);
}
map_fout << format(" 0x%|1$X|") % u;
map_fout << format(" # 0x%|1$X|") % i;
map_fout << endl;
}
map_fout << format(" 0x%|1$X|") % u;
map_fout << format(" # 0x%|1$X|") % i;
if(cnt > 0)
{
script_fout << format("LoadEncodingFile(%1%, \"%2%\")") % (tmp_dir / (fn+".encoding")) % fn << endl;
script_fout << format("Reencode(\"%1%\", 1)") % fn << endl;
}
map_fout << endl;
if(ctu)
ctu->decRefCnt();
}
script_fout << format("LoadEncodingFile(%1%, \"%2%\")") % (tmp_dir / (fn+".encoding")) % fn << endl;
script_fout << format("Reencode(\"%1%\", 1)") % fn << endl;
if(ctu)
ctu->decRefCnt();
script_fout << format("Generate(%1%)") % ((param->single_html ? tmp_dir : dest_dir) / (fn+".ttf")) << endl;
if(param->single_html)
add_tmp_file(fn+".ttf");

View File

@ -130,28 +130,6 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
}
outf.close();
obj.streamClose();
/*
* Pre re-encode the font such that it's consistent with the encoding used by PDF
*/
auto output_to_file = [](void * stream, const char * data, int len)->void
{
reinterpret_cast<ostream*>(stream)->write(data, len);
};
if(suffix == ".cff")
{
auto f = FoFiType1C::load((char*)((tmp_dir/(fn+suffix)).c_str()));
suffix = ".pfa";
outf.open(tmp_dir / (fn + suffix), ofstream::binary);
add_tmp_file(fn+suffix);
f->convertToType1(nullptr, (const char **)dynamic_cast<Gfx8BitFont*>(font)->getEncoding(), false, output_to_file, &outf);
outf.close();
delete f;
}
}
catch(int)
{
@ -226,10 +204,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
++nSpaces;
}
Unicode uu = (cur_font_info.use_tounicode
? check_unicode(u, uLen, code, font)
: (isLegalUnicode(code) ? code : map_to_private(code))
);
Unicode uu = (cur_font_info.use_tounicode ? check_unicode(u, uLen, code, font) : unicode_from_font(code, font));
outputUnicodes(html_fout, &uu, 1);
dx += dx1;

View File

@ -87,6 +87,27 @@ static inline Unicode map_to_private(CharCode code)
return private_mapping;
}
/*
* Try to determine the Unicode value directly from the information in the font
*/
static inline Unicode unicode_from_font (CharCode code, GfxFont * font)
{
if(!font->isCIDFont())
{
char * cname = dynamic_cast<Gfx8BitFont*>(font)->getCharName(code);
// may be untranslated ligature
if(cname)
{
Unicode ou = globalParams->mapNameToUnicode(cname);
if(isLegalUnicode(ou))
return ou;
}
}
return map_to_private(code);
}
/*
* We have to use a single Unicode value to reencode fonts
* if we got multi-unicode values, it might be expanded ligature, try to restore it
@ -103,20 +124,7 @@ static inline Unicode check_unicode(Unicode * u, int len, CharCode code, GfxFont
return *u;
}
if(!font->isCIDFont())
{
char * cname = dynamic_cast<Gfx8BitFont*>(font)->getCharName(code);
// may be untranslated ligature
if(cname)
{
Unicode ou = globalParams->mapNameToUnicode(cname);
if(isLegalUnicode(ou))
return ou;
}
}
return map_to_private(code);
return unicode_from_font(code, font);
}
static inline void outputUnicodes(std::ostream & out, const Unicode * u, int uLen)