From 0ef68558aefdb8e7a5d48378729865a66e70151d Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Fri, 31 Aug 2012 15:27:17 +0800 Subject: [PATCH] added an option to embed local font for base 14 fonts --- share/all.css | 3 + share/head.html | 1 + share/neck.html | 1 + share/tail.html | 1 + share/unify.pe | 1 + src/HTMLRenderer.h | 9 +- src/HTMLRenderer/install.cc | 243 +++++------------------------------- src/HTMLRenderer/text.cc | 213 ++++++++++++++++++++++++++++++- src/Param.h | 2 + src/pdf2htmlEX.cc | 2 + 10 files changed, 256 insertions(+), 220 deletions(-) diff --git a/share/all.css b/share/all.css index e3f9f89..a52e522 100644 --- a/share/all.css +++ b/share/all.css @@ -1,3 +1,5 @@ +/* Base CSS */ +/* by Wang Lu */ #pdf-main { position:absolute; top:0; @@ -42,3 +44,4 @@ span { .i { position:absolute; } +/* Base CSS END */ diff --git a/share/head.html b/share/head.html index 7c404ba..4df3c44 100644 --- a/share/head.html +++ b/share/head.html @@ -4,3 +4,4 @@ + diff --git a/share/neck.html b/share/neck.html index df9901c..a34e3f9 100644 --- a/share/neck.html +++ b/share/neck.html @@ -20,3 +20,4 @@ function show_pages()
+ diff --git a/share/tail.html b/share/tail.html index 3d0757e..4624e30 100644 --- a/share/tail.html +++ b/share/tail.html @@ -2,3 +2,4 @@
+ diff --git a/share/unify.pe b/share/unify.pe index c1724a2..96ac64b 100644 --- a/share/unify.pe +++ b/share/unify.pe @@ -28,3 +28,4 @@ SetOS2Value("HHeadDescent", -d) Print(ta-td) Print(a) Print(d) +# script end diff --git a/src/HTMLRenderer.h b/src/HTMLRenderer.h index 61c9892..bbfeb52 100644 --- a/src/HTMLRenderer.h +++ b/src/HTMLRenderer.h @@ -125,15 +125,16 @@ class HTMLRenderer : public OutputDev //////////////////////////////////////////////////// void add_tmp_file (const std::string & fn); void clean_tmp_files (); - std::string dump_embedded_font (GfxFont * font, long long fn_id); + boost::filesystem::path dump_embedded_font (GfxFont * font, long long fn_id); + void embed_font(const boost::filesystem::path & filepath, GfxFont * font, FontInfo & info); //////////////////////////////////////////////////// // manage styles //////////////////////////////////////////////////// FontInfo install_font(GfxFont * font); - void install_embedded_font(GfxFont * font, const std::string & suffix, long long fn_id, FontInfo & info); - void install_base_font(GfxFont * font, GfxFontLoc * font_loc, long long fn_id); - void install_external_font (GfxFont * font, long long fn_id); + void install_embedded_font(GfxFont * font, FontInfo & info); + void install_base_font(GfxFont * font, GfxFontLoc * font_loc, FontInfo & info); + void install_external_font (GfxFont * font, FontInfo & info); long long install_font_size(double font_size); long long install_transform_matrix(const double * tm); diff --git a/src/HTMLRenderer/install.cc b/src/HTMLRenderer/install.cc index 3f5c636..4c9d135 100644 --- a/src/HTMLRenderer/install.cc +++ b/src/HTMLRenderer/install.cc @@ -13,16 +13,8 @@ #include -#include -#include - #include "HTMLRenderer.h" #include "namespace.h" -#include "config.h" - -using std::all_of; -using std::max; -using std::min; FontInfo HTMLRenderer::install_font(GfxFont * font) { @@ -69,23 +61,13 @@ FontInfo HTMLRenderer::install_font(GfxFont * font) switch(font_loc -> locType) { case gfxFontLocEmbedded: - { - string suffix = dump_embedded_font(font, new_fn_id); - if(suffix != "") - { - install_embedded_font(font, suffix, new_fn_id, cur_info_iter->second); - } - else - { - export_remote_default_font(new_fn_id); - } - } + install_embedded_font(font, cur_info_iter->second); break; case gfxFontLocExternal: - install_external_font(font, new_fn_id); + install_external_font(font, cur_info_iter->second); break; case gfxFontLocResident: - install_base_font(font, font_loc, new_fn_id); + install_base_font(font, font_loc, cur_info_iter->second); break; default: cerr << "TODO: other font loc" << endl; @@ -104,202 +86,38 @@ FontInfo HTMLRenderer::install_font(GfxFont * font) // TODO // add a new function and move to text.cc -void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix, long long fn_id, FontInfo & info) +void HTMLRenderer::install_embedded_font(GfxFont * font, FontInfo & info) { - string fn = (format("f%|1$x|") % fn_id).str(); - - path script_path = tmp_dir / (fn + ".pe"); - ofstream script_fout(script_path, ofstream::binary); - add_tmp_file(fn+".pe"); - - script_fout << format("Open(%1%, 1)") % (tmp_dir / (fn + suffix)) << endl; - - int * code2GID = nullptr; - int code2GID_len = 0; - int maxcode = 0; - - Gfx8BitFont * font_8bit = nullptr; - - /* - * Step 1 - * dump the font file directly from the font descriptor and put the glyphs into the correct slots - * - * for 8bit + nonTrueType - * re-encoding the font using a PostScript encoding list (glyph id <-> glpyh name) - * - * for 8bit + TrueType - * sort the glpyhs as the original order, and later will map GID (instead of char code) to Unicode - * - * for CID + nonTrueType - * Flatten the font - * - * for CID Truetype - * same as 8bitTrueType, except for that we have to check 65536 charcodes - */ - if(!font->isCIDFont()) + auto path = dump_embedded_font(font, info.id); + if(path != "") { - font_8bit = dynamic_cast(font); - maxcode = 0xff; - if(suffix == ".ttf") - { - script_fout << "Reencode(\"original\")" << endl; - int buflen; - char * buf = nullptr; - if((buf = font->readEmbFontFile(xref, &buflen))) - { - FoFiTrueType *fftt = nullptr; - if((fftt = FoFiTrueType::make(buf, buflen))) - { - code2GID = font_8bit->getCodeToGIDMap(fftt); - code2GID_len = 256; - delete fftt; - } - gfree(buf); - } - } - else - { - // move the slot such that it's consistent with the encoding seen in PDF - ofstream out(tmp_dir / (fn + "_.encoding")); - add_tmp_file(fn+"_.encoding"); - - out << format("/%1% [") % fn << endl; - for(int i = 0; i < 256; ++i) - { - auto cn = font_8bit->getCharName(i); - out << "/" << ((cn == nullptr) ? ".notdef" : cn) << endl; - } - out << "] def" << endl; - - script_fout << format("LoadEncodingFile(%1%)") % (tmp_dir / (fn+"_.encoding")) << endl; - script_fout << format("Reencode(\"%1%\")") % fn << endl; - } + embed_font(path, font, info); + export_remote_font(info, param->font_suffix, param->font_format, font); } else { - maxcode = 0xffff; - - if(suffix == ".ttf") - { - script_fout << "Reencode(\"original\")" << endl; - - GfxCIDFont * _font = dynamic_cast(font); - - // code2GID has been stored for embedded CID fonts - code2GID = _font->getCIDToGID(); - code2GID_len = _font->getCIDToGIDLen(); - } - else - { - script_fout << "CIDFlatten()" << endl; - } + export_remote_default_font(info.id); } - - /* - * Step 2 - * map charcode (or GID for CID truetype) - * generate an Consortium encoding file and let fontforge handle it. - * - * - Always map to Unicode for 8bit TrueType fonts and CID fonts - * - * - For 8bit nonTruetype fonts: - * Try to calculate the correct Unicode value from the glyph names, unless param->always_apply_tounicode is set - * - */ - - info.use_tounicode = ((suffix == ".ttf") || (font->isCIDFont()) || (param->always_apply_tounicode)); - - auto ctu = font->getToUnicode(); - - ofstream map_fout(tmp_dir / (fn + ".encoding")); - add_tmp_file(fn+".encoding"); - - int cnt = 0; - for(int i = 0; i <= maxcode; ++i) - { - if((suffix != ".ttf") && (font_8bit != nullptr) && (font_8bit->getCharName(i) == nullptr)) - continue; - - ++ cnt; - map_fout << format("0x%|1$X|") % ((code2GID && (i < code2GID_len))? code2GID[i] : i); - - Unicode u, *pu=&u; - - if(info.use_tounicode) - { - int n = 0; - if(ctu) - n = ctu->mapToUnicode(i, &pu); - u = check_unicode(pu, n, i, font); - } - else - { - u = unicode_from_font(i, font); - } - - map_fout << format(" 0x%|1$X|") % u; - map_fout << format(" # 0x%|1$X|") % i; - - map_fout << endl; - } - - if(cnt > 0) - { - script_fout << format("LoadEncodingFile(%1%, \"%2%\")") % (tmp_dir / (fn+".encoding")) % fn << endl; - script_fout << format("Reencode(\"%1%\", 1)") % fn << endl; - } - - if(ctu) - ctu->decRefCnt(); - - auto dest = ((param->single_html ? tmp_dir : dest_dir) / (fn+(param->font_suffix))); - if(param->single_html) - add_tmp_file(fn+(param->font_suffix)); - - /* - * [Win|Typo|HHead][Ascent|Descent] - * Firefox & Chrome interprets the values in different ways - * Trying to unify them - */ - script_fout << format("Generate(%1%)") % dest << endl; - script_fout << "Close()" << endl; - script_fout << format("Open(%1%, 1)") % dest << endl; - script_fout << ifstream(PDF2HTMLEX_DATA_PATH / UNIFY_SCRIPT_FILENAME).rdbuf(); - script_fout << format("Generate(%1%)") % dest << endl; - script_fout.close(); - - if(system((boost::format("fontforge -script %1% 1>%2% 2>%3%") % script_path % (tmp_dir / (fn+".info")) % (tmp_dir / NULL_FILENAME)).str().c_str()) != 0) - cerr << "Warning: fontforge failed." << endl; - - add_tmp_file(fn+".info"); - add_tmp_file(NULL_FILENAME); - - // read metric - int em, ascent, descent; - if(ifstream(tmp_dir / (fn+".info")) >> em >> ascent >> descent) - { - if(em != 0) - { - info.ascent = ((double)ascent) / em; - info.descent = -((double)descent) / em; - } - else - { - info.ascent = 0; - info.descent = 0; - } - } - - if(param->debug) - { - cerr << "Ascent: " << info.ascent << " Descent: " << info.descent << endl; - } - - export_remote_font(info, param->font_suffix, param->font_format, font); } -void HTMLRenderer::install_base_font(GfxFont * font, GfxFontLoc * font_loc, long long fn_id) +void HTMLRenderer::install_base_font(GfxFont * font, GfxFontLoc * font_loc, FontInfo & info) { + if(param->embed_base_font) + { + GfxFontLoc * fontloc = font->locateFont(xref, gFalse); + if(fontloc != nullptr) + { + embed_font(path(fontloc->path->getCString()), font, info); + export_remote_font(info, param->font_suffix, param->font_format, font); + return; + } + else + { + cerr << format("Cannot embed base font: f%|1$x|") % info.id << endl; + } + + } + string psname(font_loc->path->getCString()); string basename = psname.substr(0, psname.find('-')); string cssfont; @@ -312,10 +130,13 @@ void HTMLRenderer::install_base_font(GfxFont * font, GfxFontLoc * font_loc, long else cssfont = iter->second; - export_local_font(fn_id, font, psname, cssfont); + info.ascent = font->getAscent(); + info.descent = font->getDescent(); + + export_local_font(info.id, font, psname, cssfont); } -void HTMLRenderer::install_external_font( GfxFont * font, long long fn_id) +void HTMLRenderer::install_external_font( GfxFont * font, FontInfo & info) { string fontname(font->getName()->getCString()); @@ -327,7 +148,7 @@ void HTMLRenderer::install_external_font( GfxFont * font, long long fn_id) cerr << "Warning: workaround for font names in bad encodings." << endl; } - export_local_font(fn_id, font, fontname, ""); + export_local_font(info.id, font, fontname, ""); } long long HTMLRenderer::install_font_size(double font_size) diff --git a/src/HTMLRenderer/text.cc b/src/HTMLRenderer/text.cc index 6759765..2e7117d 100644 --- a/src/HTMLRenderer/text.cc +++ b/src/HTMLRenderer/text.cc @@ -11,19 +11,24 @@ #include #include +#include +#include -#include +#include +#include #include "HTMLRenderer.h" #include "namespace.h" +#include "config.h" -using std::all_of; +using boost::algorithm::to_lower; -string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id) +path HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id) { Object obj, obj1, obj2; Object font_obj, font_obj2, fontdesc_obj; string suffix; + path filepath; try { @@ -119,7 +124,8 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id) string fn = (format("f%|1$x|")%fn_id).str(); ofstream outf; - outf.open(tmp_dir / (fn + suffix), ofstream::binary); + filepath = tmp_dir / (fn + suffix); + outf.open(filepath, ofstream::binary); add_tmp_file(fn+suffix); char buf[1024]; @@ -144,7 +150,204 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id) font_obj2.free(); font_obj.free(); - return suffix; + return filepath; +} + +void HTMLRenderer::embed_font(const path & filepath, GfxFont * font, FontInfo & info) +{ + string suffix = filepath.extension().string(); + to_lower(suffix); + + string fn = (format("f%|1$x|") % info.id).str(); + + path script_path = tmp_dir / (fn + ".pe"); + ofstream script_fout(script_path, ofstream::binary); + add_tmp_file(fn+".pe"); + + script_fout << format("Open(%1%, 1)") % filepath << endl; + + int * code2GID = nullptr; + int code2GID_len = 0; + int maxcode = 0; + + Gfx8BitFont * font_8bit = nullptr; + + /* + * Step 1 + * dump the font file directly from the font descriptor and put the glyphs into the correct slots + * + * for 8bit + nonTrueType + * re-encoding the font using a PostScript encoding list (glyph id <-> glpyh name) + * + * for 8bit + TrueType + * sort the glpyhs as the original order, and later will map GID (instead of char code) to Unicode + * + * for CID + nonTrueType + * Flatten the font + * + * for CID Truetype + * same as 8bitTrueType, except for that we have to check 65536 charcodes + */ + if(!font->isCIDFont()) + { + font_8bit = dynamic_cast(font); + maxcode = 0xff; + if((suffix == ".ttf") || (suffix == ".ttc") || (suffix == ".otf")) + { + script_fout << "Reencode(\"original\")" << endl; + FoFiTrueType *fftt = nullptr; + if((fftt = FoFiTrueType::load((char*)filepath.c_str())) != nullptr) + { + code2GID = font_8bit->getCodeToGIDMap(fftt); + code2GID_len = 256; + delete fftt; + } + } + else + { + // move the slot such that it's consistent with the encoding seen in PDF + ofstream out(tmp_dir / (fn + "_.encoding")); + add_tmp_file(fn+"_.encoding"); + + out << format("/%1% [") % fn << endl; + for(int i = 0; i < 256; ++i) + { + auto cn = font_8bit->getCharName(i); + out << "/" << ((cn == nullptr) ? ".notdef" : cn) << endl; + } + out << "] def" << endl; + + script_fout << format("LoadEncodingFile(%1%)") % (tmp_dir / (fn+"_.encoding")) << endl; + script_fout << format("Reencode(\"%1%\")") % fn << endl; + } + } + else + { + maxcode = 0xffff; + + if(suffix == ".ttf") + { + script_fout << "Reencode(\"original\")" << endl; + + GfxCIDFont * _font = dynamic_cast(font); + + // code2GID has been stored for embedded CID fonts + code2GID = _font->getCIDToGID(); + code2GID_len = _font->getCIDToGIDLen(); + } + else + { + script_fout << "CIDFlatten()" << endl; + } + } + + /* + * Step 2 + * map charcode (or GID for CID truetype) + * generate an Consortium encoding file and let fontforge handle it. + * + * - Always map to Unicode for 8bit TrueType fonts and CID fonts + * + * - For 8bit nonTruetype fonts: + * Try to calculate the correct Unicode value from the glyph names, unless param->always_apply_tounicode is set + * + */ + + info.use_tounicode = ((suffix == ".ttf") || (font->isCIDFont()) || (param->always_apply_tounicode)); + + auto ctu = font->getToUnicode(); + + ofstream map_fout(tmp_dir / (fn + ".encoding")); + add_tmp_file(fn+".encoding"); + + int cnt = 0; + for(int i = 0; i <= maxcode; ++i) + { + if((suffix != ".ttf") && (font_8bit != nullptr) && (font_8bit->getCharName(i) == nullptr)) + continue; + + ++ cnt; + map_fout << format("0x%|1$X|") % ((code2GID && (i < code2GID_len))? code2GID[i] : i); + + Unicode u, *pu=&u; + + if(info.use_tounicode) + { + int n = 0; + if(ctu) + n = ctu->mapToUnicode(i, &pu); + u = check_unicode(pu, n, i, font); + } + else + { + u = unicode_from_font(i, font); + } + + map_fout << format(" 0x%|1$X|") % u; + map_fout << format(" # 0x%|1$X|") % i; + + map_fout << endl; + } + + if(cnt > 0) + { + script_fout << format("LoadEncodingFile(%1%, \"%2%\")") % (tmp_dir / (fn+".encoding")) % fn << endl; + script_fout << format("Reencode(\"%1%\", 1)") % fn << endl; + } + + if(ctu) + ctu->decRefCnt(); + + auto dest = ((param->single_html ? tmp_dir : dest_dir) / (fn+(param->font_suffix))); + if(param->single_html) + add_tmp_file(fn+(param->font_suffix)); + + /* + * [Win|Typo|HHead][Ascent|Descent] + * Firefox & Chrome interprets the values in different ways + * Trying to unify them + */ + script_fout << format("Generate(%1%)") % dest << endl; + script_fout << "Close()" << endl; + script_fout << format("Open(%1%, 1)") % dest << endl; + script_fout << ifstream(PDF2HTMLEX_DATA_PATH / UNIFY_SCRIPT_FILENAME).rdbuf(); + script_fout << format("Generate(%1%)") % dest << endl; + script_fout.close(); + + if(system((boost::format("fontforge -script %1% 1>%2% 2>%3%") % script_path % (tmp_dir / (fn+".info")) % (tmp_dir / NULL_FILENAME)).str().c_str()) != 0) + cerr << "Warning: fontforge failed." << endl; + + add_tmp_file(fn+".info"); + add_tmp_file(NULL_FILENAME); + + // read metric + int em, ascent, descent; + if(ifstream(tmp_dir / (fn+".info")) >> em >> ascent >> descent) + { + if(em != 0) + { + info.ascent = ((double)ascent) / em; + info.descent = -((double)descent) / em; + } + else + { + info.ascent = 0; + info.descent = 0; + } + } + else + { + cerr << "Warning: cannot read font info for " << fn << endl; + info.ascent = font->getAscent(); + info.descent = font->getDescent(); + } + + if(param->debug) + { + cerr << "Ascent: " << info.ascent << " Descent: " << info.descent << endl; + } + + export_remote_font(info, param->font_suffix, param->font_format, font); } void HTMLRenderer::drawString(GfxState * state, GooString * s) diff --git a/src/Param.h b/src/Param.h index 6dccd68..136d6d6 100644 --- a/src/Param.h +++ b/src/Param.h @@ -28,6 +28,8 @@ struct Param int process_nontext; int single_html; + int embed_base_font; + int embed_external_font; // Advanced tweak double h_eps, v_eps; diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 8b2ffe8..8e3aee8 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -76,6 +76,8 @@ po::variables_map parse_options (int argc, char **argv) ("process-nontext", po::value(¶m.process_nontext)->default_value(1), "process nontext objects") ("single-html", po::value(¶m.single_html)->default_value(1), "combine everything into one single HTML file") + ("embed-base-font", po::value(¶m.embed_base_font)->default_value(1), "embed local matched font for base 14 fonts in the PDF file") + ("embed-external-font", po::value(¶m.embed_external_font)->default_value(0), "embed local matched font for external fonts in the PDF file") ("heps", po::value(¶m.h_eps)->default_value(1.0), "max tolerated horizontal offset (in pixels)") ("veps", po::value(¶m.v_eps)->default_value(1.0), "max tolerated vertical offset (in pixels)")