From 28eb7083394a9acecad27e60d07454b84910e547 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 28 Aug 2012 00:06:09 +0800 Subject: [PATCH] a little bit better encoding --- src/HTMLRenderer/install.cc | 85 +++++++++++++++++++++++++------------ src/HTMLRenderer/text.cc | 27 +----------- src/util.h | 36 ++++++++++------ 3 files changed, 81 insertions(+), 67 deletions(-) diff --git a/src/HTMLRenderer/install.cc b/src/HTMLRenderer/install.cc index 16569ec..467aec6 100644 --- a/src/HTMLRenderer/install.cc +++ b/src/HTMLRenderer/install.cc @@ -136,8 +136,12 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix, int code2GID_len = 0; int maxcode = 0; + Gfx8BitFont * font_8bit = nullptr; + GfxCIDFont * font_cid = nullptr; + if(!font->isCIDFont()) { + font_8bit = dynamic_cast(font); maxcode = 0xff; if(suffix == ".ttf") { @@ -149,7 +153,7 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix, FoFiTrueType *fftt = nullptr; if((fftt = FoFiTrueType::make(buf, buflen))) { - code2GID = dynamic_cast(font)->getCodeToGIDMap(fftt); + code2GID = font_8bit->getCodeToGIDMap(fftt); code2GID_len = 256; delete fftt; } @@ -158,11 +162,25 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix, } else { - // pass + // move the slot such that it's consistent with the encoding seen in PDF + ofstream out(tmp_dir / (fn + "_.encoding")); + add_tmp_file(fn+"_.encoding"); + + out << format("/%1% [") % fn << endl; + for(int i = 0; i < 256; ++i) + { + auto cn = font_8bit->getCharName(i); + out << "/" << ((cn == nullptr) ? ".notdef" : cn) << endl; + } + out << "] def" << endl; + + script_fout << format("LoadEncodingFile(%1%)") % (tmp_dir / (fn+"_.encoding")) << endl; + script_fout << format("Reencode(\"%1%\")") % fn << endl; } } else { + font_cid = dynamic_cast(font); maxcode = 0xffff; if(suffix == ".ttf") @@ -180,43 +198,56 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix, script_fout << "CIDFlatten()" << endl; } } + bool use_tounicode = ((suffix == ".ttf") || (param->always_apply_tounicode)); - auto ctu = font->getToUnicode(); - ofstream map_fout(tmp_dir / (fn + ".encoding")); - add_tmp_file(fn+".encoding"); - - for(int i = 0; i <= maxcode; ++i) + if(use_tounicode) { - map_fout << format("0x%|1$X|") % ((code2GID && (i < code2GID_len))? code2GID[i] : i); + auto ctu = font->getToUnicode(); - Unicode u, *pu=&u; + ofstream map_fout(tmp_dir / (fn + ".encoding")); + add_tmp_file(fn+".encoding"); - if(use_tounicode) + int cnt = 0; + for(int i = 0; i <= maxcode; ++i) { - int n = 0; - if(ctu) - n = ctu->mapToUnicode(i, &pu); - u = check_unicode(pu, n, i, font); - } - else - { - u = isLegalUnicode(i) ? i : map_to_private(i); + if((suffix != ".ttf") && (font_8bit != nullptr) && (font_8bit->getCharName(i) == nullptr)) + continue; + + ++ cnt; + map_fout << format("0x%|1$X|") % ((code2GID && (i < code2GID_len))? code2GID[i] : i); + + Unicode u, *pu=&u; + + if(use_tounicode) + { + int n = 0; + if(ctu) + n = ctu->mapToUnicode(i, &pu); + u = check_unicode(pu, n, i, font); + } + else + { + u = unicode_from_font(i, font); + } + + map_fout << format(" 0x%|1$X|") % u; + map_fout << format(" # 0x%|1$X|") % i; + + map_fout << endl; } - map_fout << format(" 0x%|1$X|") % u; - map_fout << format(" # 0x%|1$X|") % i; + if(cnt > 0) + { + script_fout << format("LoadEncodingFile(%1%, \"%2%\")") % (tmp_dir / (fn+".encoding")) % fn << endl; + script_fout << format("Reencode(\"%1%\", 1)") % fn << endl; + } - map_fout << endl; + if(ctu) + ctu->decRefCnt(); } - script_fout << format("LoadEncodingFile(%1%, \"%2%\")") % (tmp_dir / (fn+".encoding")) % fn << endl; - script_fout << format("Reencode(\"%1%\", 1)") % fn << endl; - - if(ctu) - ctu->decRefCnt(); - script_fout << format("Generate(%1%)") % ((param->single_html ? tmp_dir : dest_dir) / (fn+".ttf")) << endl; if(param->single_html) add_tmp_file(fn+".ttf"); diff --git a/src/HTMLRenderer/text.cc b/src/HTMLRenderer/text.cc index 6a0359f..4e05162 100644 --- a/src/HTMLRenderer/text.cc +++ b/src/HTMLRenderer/text.cc @@ -130,28 +130,6 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id) } outf.close(); obj.streamClose(); - - /* - * Pre re-encode the font such that it's consistent with the encoding used by PDF - */ - auto output_to_file = [](void * stream, const char * data, int len)->void - { - reinterpret_cast(stream)->write(data, len); - }; - - if(suffix == ".cff") - { - auto f = FoFiType1C::load((char*)((tmp_dir/(fn+suffix)).c_str())); - - suffix = ".pfa"; - outf.open(tmp_dir / (fn + suffix), ofstream::binary); - add_tmp_file(fn+suffix); - - f->convertToType1(nullptr, (const char **)dynamic_cast(font)->getEncoding(), false, output_to_file, &outf); - outf.close(); - - delete f; - } } catch(int) { @@ -226,10 +204,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s) ++nSpaces; } - Unicode uu = (cur_font_info.use_tounicode - ? check_unicode(u, uLen, code, font) - : (isLegalUnicode(code) ? code : map_to_private(code)) - ); + Unicode uu = (cur_font_info.use_tounicode ? check_unicode(u, uLen, code, font) : unicode_from_font(code, font)); outputUnicodes(html_fout, &uu, 1); dx += dx1; diff --git a/src/util.h b/src/util.h index d6b9b20..84dbe63 100644 --- a/src/util.h +++ b/src/util.h @@ -87,6 +87,27 @@ static inline Unicode map_to_private(CharCode code) return private_mapping; } +/* + * Try to determine the Unicode value directly from the information in the font + */ +static inline Unicode unicode_from_font (CharCode code, GfxFont * font) +{ + if(!font->isCIDFont()) + { + char * cname = dynamic_cast(font)->getCharName(code); + // may be untranslated ligature + if(cname) + { + Unicode ou = globalParams->mapNameToUnicode(cname); + + if(isLegalUnicode(ou)) + return ou; + } + } + + return map_to_private(code); +} + /* * We have to use a single Unicode value to reencode fonts * if we got multi-unicode values, it might be expanded ligature, try to restore it @@ -103,20 +124,7 @@ static inline Unicode check_unicode(Unicode * u, int len, CharCode code, GfxFont return *u; } - if(!font->isCIDFont()) - { - char * cname = dynamic_cast(font)->getCharName(code); - // may be untranslated ligature - if(cname) - { - Unicode ou = globalParams->mapNameToUnicode(cname); - - if(isLegalUnicode(ou)) - return ou; - } - } - - return map_to_private(code); + return unicode_from_font(code, font); } static inline void outputUnicodes(std::ostream & out, const Unicode * u, int uLen)