mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 13:00:08 +00:00
a little bit better encoding
This commit is contained in:
parent
1642b4a37e
commit
28eb708339
@ -136,8 +136,12 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix,
|
|||||||
int code2GID_len = 0;
|
int code2GID_len = 0;
|
||||||
int maxcode = 0;
|
int maxcode = 0;
|
||||||
|
|
||||||
|
Gfx8BitFont * font_8bit = nullptr;
|
||||||
|
GfxCIDFont * font_cid = nullptr;
|
||||||
|
|
||||||
if(!font->isCIDFont())
|
if(!font->isCIDFont())
|
||||||
{
|
{
|
||||||
|
font_8bit = dynamic_cast<Gfx8BitFont*>(font);
|
||||||
maxcode = 0xff;
|
maxcode = 0xff;
|
||||||
if(suffix == ".ttf")
|
if(suffix == ".ttf")
|
||||||
{
|
{
|
||||||
@ -149,7 +153,7 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix,
|
|||||||
FoFiTrueType *fftt = nullptr;
|
FoFiTrueType *fftt = nullptr;
|
||||||
if((fftt = FoFiTrueType::make(buf, buflen)))
|
if((fftt = FoFiTrueType::make(buf, buflen)))
|
||||||
{
|
{
|
||||||
code2GID = dynamic_cast<Gfx8BitFont*>(font)->getCodeToGIDMap(fftt);
|
code2GID = font_8bit->getCodeToGIDMap(fftt);
|
||||||
code2GID_len = 256;
|
code2GID_len = 256;
|
||||||
delete fftt;
|
delete fftt;
|
||||||
}
|
}
|
||||||
@ -158,11 +162,25 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// pass
|
// move the slot such that it's consistent with the encoding seen in PDF
|
||||||
|
ofstream out(tmp_dir / (fn + "_.encoding"));
|
||||||
|
add_tmp_file(fn+"_.encoding");
|
||||||
|
|
||||||
|
out << format("/%1% [") % fn << endl;
|
||||||
|
for(int i = 0; i < 256; ++i)
|
||||||
|
{
|
||||||
|
auto cn = font_8bit->getCharName(i);
|
||||||
|
out << "/" << ((cn == nullptr) ? ".notdef" : cn) << endl;
|
||||||
|
}
|
||||||
|
out << "] def" << endl;
|
||||||
|
|
||||||
|
script_fout << format("LoadEncodingFile(%1%)") % (tmp_dir / (fn+"_.encoding")) << endl;
|
||||||
|
script_fout << format("Reencode(\"%1%\")") % fn << endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
font_cid = dynamic_cast<GfxCIDFont*>(font);
|
||||||
maxcode = 0xffff;
|
maxcode = 0xffff;
|
||||||
|
|
||||||
if(suffix == ".ttf")
|
if(suffix == ".ttf")
|
||||||
@ -180,43 +198,56 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix,
|
|||||||
script_fout << "CIDFlatten()" << endl;
|
script_fout << "CIDFlatten()" << endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool use_tounicode = ((suffix == ".ttf") || (param->always_apply_tounicode));
|
bool use_tounicode = ((suffix == ".ttf") || (param->always_apply_tounicode));
|
||||||
auto ctu = font->getToUnicode();
|
|
||||||
|
|
||||||
ofstream map_fout(tmp_dir / (fn + ".encoding"));
|
if(use_tounicode)
|
||||||
add_tmp_file(fn+".encoding");
|
|
||||||
|
|
||||||
for(int i = 0; i <= maxcode; ++i)
|
|
||||||
{
|
{
|
||||||
map_fout << format("0x%|1$X|") % ((code2GID && (i < code2GID_len))? code2GID[i] : i);
|
auto ctu = font->getToUnicode();
|
||||||
|
|
||||||
Unicode u, *pu=&u;
|
ofstream map_fout(tmp_dir / (fn + ".encoding"));
|
||||||
|
add_tmp_file(fn+".encoding");
|
||||||
|
|
||||||
if(use_tounicode)
|
int cnt = 0;
|
||||||
|
for(int i = 0; i <= maxcode; ++i)
|
||||||
{
|
{
|
||||||
int n = 0;
|
if((suffix != ".ttf") && (font_8bit != nullptr) && (font_8bit->getCharName(i) == nullptr))
|
||||||
if(ctu)
|
continue;
|
||||||
n = ctu->mapToUnicode(i, &pu);
|
|
||||||
u = check_unicode(pu, n, i, font);
|
++ cnt;
|
||||||
}
|
map_fout << format("0x%|1$X|") % ((code2GID && (i < code2GID_len))? code2GID[i] : i);
|
||||||
else
|
|
||||||
{
|
Unicode u, *pu=&u;
|
||||||
u = isLegalUnicode(i) ? i : map_to_private(i);
|
|
||||||
|
if(use_tounicode)
|
||||||
|
{
|
||||||
|
int n = 0;
|
||||||
|
if(ctu)
|
||||||
|
n = ctu->mapToUnicode(i, &pu);
|
||||||
|
u = check_unicode(pu, n, i, font);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
u = unicode_from_font(i, font);
|
||||||
|
}
|
||||||
|
|
||||||
|
map_fout << format(" 0x%|1$X|") % u;
|
||||||
|
map_fout << format(" # 0x%|1$X|") % i;
|
||||||
|
|
||||||
|
map_fout << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
map_fout << format(" 0x%|1$X|") % u;
|
if(cnt > 0)
|
||||||
map_fout << format(" # 0x%|1$X|") % i;
|
{
|
||||||
|
script_fout << format("LoadEncodingFile(%1%, \"%2%\")") % (tmp_dir / (fn+".encoding")) % fn << endl;
|
||||||
|
script_fout << format("Reencode(\"%1%\", 1)") % fn << endl;
|
||||||
|
}
|
||||||
|
|
||||||
map_fout << endl;
|
if(ctu)
|
||||||
|
ctu->decRefCnt();
|
||||||
}
|
}
|
||||||
|
|
||||||
script_fout << format("LoadEncodingFile(%1%, \"%2%\")") % (tmp_dir / (fn+".encoding")) % fn << endl;
|
|
||||||
script_fout << format("Reencode(\"%1%\", 1)") % fn << endl;
|
|
||||||
|
|
||||||
if(ctu)
|
|
||||||
ctu->decRefCnt();
|
|
||||||
|
|
||||||
script_fout << format("Generate(%1%)") % ((param->single_html ? tmp_dir : dest_dir) / (fn+".ttf")) << endl;
|
script_fout << format("Generate(%1%)") % ((param->single_html ? tmp_dir : dest_dir) / (fn+".ttf")) << endl;
|
||||||
if(param->single_html)
|
if(param->single_html)
|
||||||
add_tmp_file(fn+".ttf");
|
add_tmp_file(fn+".ttf");
|
||||||
|
@ -130,28 +130,6 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
|
|||||||
}
|
}
|
||||||
outf.close();
|
outf.close();
|
||||||
obj.streamClose();
|
obj.streamClose();
|
||||||
|
|
||||||
/*
|
|
||||||
* Pre re-encode the font such that it's consistent with the encoding used by PDF
|
|
||||||
*/
|
|
||||||
auto output_to_file = [](void * stream, const char * data, int len)->void
|
|
||||||
{
|
|
||||||
reinterpret_cast<ostream*>(stream)->write(data, len);
|
|
||||||
};
|
|
||||||
|
|
||||||
if(suffix == ".cff")
|
|
||||||
{
|
|
||||||
auto f = FoFiType1C::load((char*)((tmp_dir/(fn+suffix)).c_str()));
|
|
||||||
|
|
||||||
suffix = ".pfa";
|
|
||||||
outf.open(tmp_dir / (fn + suffix), ofstream::binary);
|
|
||||||
add_tmp_file(fn+suffix);
|
|
||||||
|
|
||||||
f->convertToType1(nullptr, (const char **)dynamic_cast<Gfx8BitFont*>(font)->getEncoding(), false, output_to_file, &outf);
|
|
||||||
outf.close();
|
|
||||||
|
|
||||||
delete f;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
catch(int)
|
catch(int)
|
||||||
{
|
{
|
||||||
@ -226,10 +204,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
|
|||||||
++nSpaces;
|
++nSpaces;
|
||||||
}
|
}
|
||||||
|
|
||||||
Unicode uu = (cur_font_info.use_tounicode
|
Unicode uu = (cur_font_info.use_tounicode ? check_unicode(u, uLen, code, font) : unicode_from_font(code, font));
|
||||||
? check_unicode(u, uLen, code, font)
|
|
||||||
: (isLegalUnicode(code) ? code : map_to_private(code))
|
|
||||||
);
|
|
||||||
outputUnicodes(html_fout, &uu, 1);
|
outputUnicodes(html_fout, &uu, 1);
|
||||||
|
|
||||||
dx += dx1;
|
dx += dx1;
|
||||||
|
36
src/util.h
36
src/util.h
@ -87,6 +87,27 @@ static inline Unicode map_to_private(CharCode code)
|
|||||||
return private_mapping;
|
return private_mapping;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Try to determine the Unicode value directly from the information in the font
|
||||||
|
*/
|
||||||
|
static inline Unicode unicode_from_font (CharCode code, GfxFont * font)
|
||||||
|
{
|
||||||
|
if(!font->isCIDFont())
|
||||||
|
{
|
||||||
|
char * cname = dynamic_cast<Gfx8BitFont*>(font)->getCharName(code);
|
||||||
|
// may be untranslated ligature
|
||||||
|
if(cname)
|
||||||
|
{
|
||||||
|
Unicode ou = globalParams->mapNameToUnicode(cname);
|
||||||
|
|
||||||
|
if(isLegalUnicode(ou))
|
||||||
|
return ou;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return map_to_private(code);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We have to use a single Unicode value to reencode fonts
|
* We have to use a single Unicode value to reencode fonts
|
||||||
* if we got multi-unicode values, it might be expanded ligature, try to restore it
|
* if we got multi-unicode values, it might be expanded ligature, try to restore it
|
||||||
@ -103,20 +124,7 @@ static inline Unicode check_unicode(Unicode * u, int len, CharCode code, GfxFont
|
|||||||
return *u;
|
return *u;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!font->isCIDFont())
|
return unicode_from_font(code, font);
|
||||||
{
|
|
||||||
char * cname = dynamic_cast<Gfx8BitFont*>(font)->getCharName(code);
|
|
||||||
// may be untranslated ligature
|
|
||||||
if(cname)
|
|
||||||
{
|
|
||||||
Unicode ou = globalParams->mapNameToUnicode(cname);
|
|
||||||
|
|
||||||
if(isLegalUnicode(ou))
|
|
||||||
return ou;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return map_to_private(code);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void outputUnicodes(std::ostream & out, const Unicode * u, int uLen)
|
static inline void outputUnicodes(std::ostream & out, const Unicode * u, int uLen)
|
||||||
|
Loading…
Reference in New Issue
Block a user