mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 04:50:09 +00:00
a little bit better encoding
This commit is contained in:
parent
1642b4a37e
commit
28eb708339
@ -136,8 +136,12 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix,
|
||||
int code2GID_len = 0;
|
||||
int maxcode = 0;
|
||||
|
||||
Gfx8BitFont * font_8bit = nullptr;
|
||||
GfxCIDFont * font_cid = nullptr;
|
||||
|
||||
if(!font->isCIDFont())
|
||||
{
|
||||
font_8bit = dynamic_cast<Gfx8BitFont*>(font);
|
||||
maxcode = 0xff;
|
||||
if(suffix == ".ttf")
|
||||
{
|
||||
@ -149,7 +153,7 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix,
|
||||
FoFiTrueType *fftt = nullptr;
|
||||
if((fftt = FoFiTrueType::make(buf, buflen)))
|
||||
{
|
||||
code2GID = dynamic_cast<Gfx8BitFont*>(font)->getCodeToGIDMap(fftt);
|
||||
code2GID = font_8bit->getCodeToGIDMap(fftt);
|
||||
code2GID_len = 256;
|
||||
delete fftt;
|
||||
}
|
||||
@ -158,11 +162,25 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix,
|
||||
}
|
||||
else
|
||||
{
|
||||
// pass
|
||||
// move the slot such that it's consistent with the encoding seen in PDF
|
||||
ofstream out(tmp_dir / (fn + "_.encoding"));
|
||||
add_tmp_file(fn+"_.encoding");
|
||||
|
||||
out << format("/%1% [") % fn << endl;
|
||||
for(int i = 0; i < 256; ++i)
|
||||
{
|
||||
auto cn = font_8bit->getCharName(i);
|
||||
out << "/" << ((cn == nullptr) ? ".notdef" : cn) << endl;
|
||||
}
|
||||
out << "] def" << endl;
|
||||
|
||||
script_fout << format("LoadEncodingFile(%1%)") % (tmp_dir / (fn+"_.encoding")) << endl;
|
||||
script_fout << format("Reencode(\"%1%\")") % fn << endl;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
font_cid = dynamic_cast<GfxCIDFont*>(font);
|
||||
maxcode = 0xffff;
|
||||
|
||||
if(suffix == ".ttf")
|
||||
@ -180,43 +198,56 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix,
|
||||
script_fout << "CIDFlatten()" << endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool use_tounicode = ((suffix == ".ttf") || (param->always_apply_tounicode));
|
||||
auto ctu = font->getToUnicode();
|
||||
|
||||
ofstream map_fout(tmp_dir / (fn + ".encoding"));
|
||||
add_tmp_file(fn+".encoding");
|
||||
|
||||
for(int i = 0; i <= maxcode; ++i)
|
||||
if(use_tounicode)
|
||||
{
|
||||
map_fout << format("0x%|1$X|") % ((code2GID && (i < code2GID_len))? code2GID[i] : i);
|
||||
auto ctu = font->getToUnicode();
|
||||
|
||||
Unicode u, *pu=&u;
|
||||
ofstream map_fout(tmp_dir / (fn + ".encoding"));
|
||||
add_tmp_file(fn+".encoding");
|
||||
|
||||
if(use_tounicode)
|
||||
int cnt = 0;
|
||||
for(int i = 0; i <= maxcode; ++i)
|
||||
{
|
||||
int n = 0;
|
||||
if(ctu)
|
||||
n = ctu->mapToUnicode(i, &pu);
|
||||
u = check_unicode(pu, n, i, font);
|
||||
}
|
||||
else
|
||||
{
|
||||
u = isLegalUnicode(i) ? i : map_to_private(i);
|
||||
if((suffix != ".ttf") && (font_8bit != nullptr) && (font_8bit->getCharName(i) == nullptr))
|
||||
continue;
|
||||
|
||||
++ cnt;
|
||||
map_fout << format("0x%|1$X|") % ((code2GID && (i < code2GID_len))? code2GID[i] : i);
|
||||
|
||||
Unicode u, *pu=&u;
|
||||
|
||||
if(use_tounicode)
|
||||
{
|
||||
int n = 0;
|
||||
if(ctu)
|
||||
n = ctu->mapToUnicode(i, &pu);
|
||||
u = check_unicode(pu, n, i, font);
|
||||
}
|
||||
else
|
||||
{
|
||||
u = unicode_from_font(i, font);
|
||||
}
|
||||
|
||||
map_fout << format(" 0x%|1$X|") % u;
|
||||
map_fout << format(" # 0x%|1$X|") % i;
|
||||
|
||||
map_fout << endl;
|
||||
}
|
||||
|
||||
map_fout << format(" 0x%|1$X|") % u;
|
||||
map_fout << format(" # 0x%|1$X|") % i;
|
||||
if(cnt > 0)
|
||||
{
|
||||
script_fout << format("LoadEncodingFile(%1%, \"%2%\")") % (tmp_dir / (fn+".encoding")) % fn << endl;
|
||||
script_fout << format("Reencode(\"%1%\", 1)") % fn << endl;
|
||||
}
|
||||
|
||||
map_fout << endl;
|
||||
if(ctu)
|
||||
ctu->decRefCnt();
|
||||
}
|
||||
|
||||
script_fout << format("LoadEncodingFile(%1%, \"%2%\")") % (tmp_dir / (fn+".encoding")) % fn << endl;
|
||||
script_fout << format("Reencode(\"%1%\", 1)") % fn << endl;
|
||||
|
||||
if(ctu)
|
||||
ctu->decRefCnt();
|
||||
|
||||
script_fout << format("Generate(%1%)") % ((param->single_html ? tmp_dir : dest_dir) / (fn+".ttf")) << endl;
|
||||
if(param->single_html)
|
||||
add_tmp_file(fn+".ttf");
|
||||
|
@ -130,28 +130,6 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
|
||||
}
|
||||
outf.close();
|
||||
obj.streamClose();
|
||||
|
||||
/*
|
||||
* Pre re-encode the font such that it's consistent with the encoding used by PDF
|
||||
*/
|
||||
auto output_to_file = [](void * stream, const char * data, int len)->void
|
||||
{
|
||||
reinterpret_cast<ostream*>(stream)->write(data, len);
|
||||
};
|
||||
|
||||
if(suffix == ".cff")
|
||||
{
|
||||
auto f = FoFiType1C::load((char*)((tmp_dir/(fn+suffix)).c_str()));
|
||||
|
||||
suffix = ".pfa";
|
||||
outf.open(tmp_dir / (fn + suffix), ofstream::binary);
|
||||
add_tmp_file(fn+suffix);
|
||||
|
||||
f->convertToType1(nullptr, (const char **)dynamic_cast<Gfx8BitFont*>(font)->getEncoding(), false, output_to_file, &outf);
|
||||
outf.close();
|
||||
|
||||
delete f;
|
||||
}
|
||||
}
|
||||
catch(int)
|
||||
{
|
||||
@ -226,10 +204,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
|
||||
++nSpaces;
|
||||
}
|
||||
|
||||
Unicode uu = (cur_font_info.use_tounicode
|
||||
? check_unicode(u, uLen, code, font)
|
||||
: (isLegalUnicode(code) ? code : map_to_private(code))
|
||||
);
|
||||
Unicode uu = (cur_font_info.use_tounicode ? check_unicode(u, uLen, code, font) : unicode_from_font(code, font));
|
||||
outputUnicodes(html_fout, &uu, 1);
|
||||
|
||||
dx += dx1;
|
||||
|
36
src/util.h
36
src/util.h
@ -87,6 +87,27 @@ static inline Unicode map_to_private(CharCode code)
|
||||
return private_mapping;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to determine the Unicode value directly from the information in the font
|
||||
*/
|
||||
static inline Unicode unicode_from_font (CharCode code, GfxFont * font)
|
||||
{
|
||||
if(!font->isCIDFont())
|
||||
{
|
||||
char * cname = dynamic_cast<Gfx8BitFont*>(font)->getCharName(code);
|
||||
// may be untranslated ligature
|
||||
if(cname)
|
||||
{
|
||||
Unicode ou = globalParams->mapNameToUnicode(cname);
|
||||
|
||||
if(isLegalUnicode(ou))
|
||||
return ou;
|
||||
}
|
||||
}
|
||||
|
||||
return map_to_private(code);
|
||||
}
|
||||
|
||||
/*
|
||||
* We have to use a single Unicode value to reencode fonts
|
||||
* if we got multi-unicode values, it might be expanded ligature, try to restore it
|
||||
@ -103,20 +124,7 @@ static inline Unicode check_unicode(Unicode * u, int len, CharCode code, GfxFont
|
||||
return *u;
|
||||
}
|
||||
|
||||
if(!font->isCIDFont())
|
||||
{
|
||||
char * cname = dynamic_cast<Gfx8BitFont*>(font)->getCharName(code);
|
||||
// may be untranslated ligature
|
||||
if(cname)
|
||||
{
|
||||
Unicode ou = globalParams->mapNameToUnicode(cname);
|
||||
|
||||
if(isLegalUnicode(ou))
|
||||
return ou;
|
||||
}
|
||||
}
|
||||
|
||||
return map_to_private(code);
|
||||
return unicode_from_font(code, font);
|
||||
}
|
||||
|
||||
static inline void outputUnicodes(std::ostream & out, const Unicode * u, int uLen)
|
||||
|
Loading…
Reference in New Issue
Block a user