diff --git a/share/all.css b/share/all.css
index e3f9f89..a52e522 100644
--- a/share/all.css
+++ b/share/all.css
@@ -1,3 +1,5 @@
+/* Base CSS */
+/* by Wang Lu */
#pdf-main {
position:absolute;
top:0;
@@ -42,3 +44,4 @@ span {
.i {
position:absolute;
}
+/* Base CSS END */
diff --git a/share/head.html b/share/head.html
index 7c404ba..4df3c44 100644
--- a/share/head.html
+++ b/share/head.html
@@ -4,3 +4,4 @@
+
diff --git a/share/neck.html b/share/neck.html
index df9901c..a34e3f9 100644
--- a/share/neck.html
+++ b/share/neck.html
@@ -20,3 +20,4 @@ function show_pages()
+
diff --git a/share/tail.html b/share/tail.html
index 3d0757e..4624e30 100644
--- a/share/tail.html
+++ b/share/tail.html
@@ -2,3 +2,4 @@
+
diff --git a/share/unify.pe b/share/unify.pe
index c1724a2..96ac64b 100644
--- a/share/unify.pe
+++ b/share/unify.pe
@@ -28,3 +28,4 @@ SetOS2Value("HHeadDescent", -d)
Print(ta-td)
Print(a)
Print(d)
+# script end
diff --git a/src/HTMLRenderer.h b/src/HTMLRenderer.h
index 61c9892..bbfeb52 100644
--- a/src/HTMLRenderer.h
+++ b/src/HTMLRenderer.h
@@ -125,15 +125,16 @@ class HTMLRenderer : public OutputDev
////////////////////////////////////////////////////
void add_tmp_file (const std::string & fn);
void clean_tmp_files ();
- std::string dump_embedded_font (GfxFont * font, long long fn_id);
+ boost::filesystem::path dump_embedded_font (GfxFont * font, long long fn_id);
+ void embed_font(const boost::filesystem::path & filepath, GfxFont * font, FontInfo & info);
////////////////////////////////////////////////////
// manage styles
////////////////////////////////////////////////////
FontInfo install_font(GfxFont * font);
- void install_embedded_font(GfxFont * font, const std::string & suffix, long long fn_id, FontInfo & info);
- void install_base_font(GfxFont * font, GfxFontLoc * font_loc, long long fn_id);
- void install_external_font (GfxFont * font, long long fn_id);
+ void install_embedded_font(GfxFont * font, FontInfo & info);
+ void install_base_font(GfxFont * font, GfxFontLoc * font_loc, FontInfo & info);
+ void install_external_font (GfxFont * font, FontInfo & info);
long long install_font_size(double font_size);
long long install_transform_matrix(const double * tm);
diff --git a/src/HTMLRenderer/install.cc b/src/HTMLRenderer/install.cc
index 3f5c636..4c9d135 100644
--- a/src/HTMLRenderer/install.cc
+++ b/src/HTMLRenderer/install.cc
@@ -13,16 +13,8 @@
#include
-#include
-#include
-
#include "HTMLRenderer.h"
#include "namespace.h"
-#include "config.h"
-
-using std::all_of;
-using std::max;
-using std::min;
FontInfo HTMLRenderer::install_font(GfxFont * font)
{
@@ -69,23 +61,13 @@ FontInfo HTMLRenderer::install_font(GfxFont * font)
switch(font_loc -> locType)
{
case gfxFontLocEmbedded:
- {
- string suffix = dump_embedded_font(font, new_fn_id);
- if(suffix != "")
- {
- install_embedded_font(font, suffix, new_fn_id, cur_info_iter->second);
- }
- else
- {
- export_remote_default_font(new_fn_id);
- }
- }
+ install_embedded_font(font, cur_info_iter->second);
break;
case gfxFontLocExternal:
- install_external_font(font, new_fn_id);
+ install_external_font(font, cur_info_iter->second);
break;
case gfxFontLocResident:
- install_base_font(font, font_loc, new_fn_id);
+ install_base_font(font, font_loc, cur_info_iter->second);
break;
default:
cerr << "TODO: other font loc" << endl;
@@ -104,202 +86,38 @@ FontInfo HTMLRenderer::install_font(GfxFont * font)
// TODO
// add a new function and move to text.cc
-void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix, long long fn_id, FontInfo & info)
+void HTMLRenderer::install_embedded_font(GfxFont * font, FontInfo & info)
{
- string fn = (format("f%|1$x|") % fn_id).str();
-
- path script_path = tmp_dir / (fn + ".pe");
- ofstream script_fout(script_path, ofstream::binary);
- add_tmp_file(fn+".pe");
-
- script_fout << format("Open(%1%, 1)") % (tmp_dir / (fn + suffix)) << endl;
-
- int * code2GID = nullptr;
- int code2GID_len = 0;
- int maxcode = 0;
-
- Gfx8BitFont * font_8bit = nullptr;
-
- /*
- * Step 1
- * dump the font file directly from the font descriptor and put the glyphs into the correct slots
- *
- * for 8bit + nonTrueType
- * re-encoding the font using a PostScript encoding list (glyph id <-> glpyh name)
- *
- * for 8bit + TrueType
- * sort the glpyhs as the original order, and later will map GID (instead of char code) to Unicode
- *
- * for CID + nonTrueType
- * Flatten the font
- *
- * for CID Truetype
- * same as 8bitTrueType, except for that we have to check 65536 charcodes
- */
- if(!font->isCIDFont())
+ auto path = dump_embedded_font(font, info.id);
+ if(path != "")
{
- font_8bit = dynamic_cast(font);
- maxcode = 0xff;
- if(suffix == ".ttf")
- {
- script_fout << "Reencode(\"original\")" << endl;
- int buflen;
- char * buf = nullptr;
- if((buf = font->readEmbFontFile(xref, &buflen)))
- {
- FoFiTrueType *fftt = nullptr;
- if((fftt = FoFiTrueType::make(buf, buflen)))
- {
- code2GID = font_8bit->getCodeToGIDMap(fftt);
- code2GID_len = 256;
- delete fftt;
- }
- gfree(buf);
- }
- }
- else
- {
- // move the slot such that it's consistent with the encoding seen in PDF
- ofstream out(tmp_dir / (fn + "_.encoding"));
- add_tmp_file(fn+"_.encoding");
-
- out << format("/%1% [") % fn << endl;
- for(int i = 0; i < 256; ++i)
- {
- auto cn = font_8bit->getCharName(i);
- out << "/" << ((cn == nullptr) ? ".notdef" : cn) << endl;
- }
- out << "] def" << endl;
-
- script_fout << format("LoadEncodingFile(%1%)") % (tmp_dir / (fn+"_.encoding")) << endl;
- script_fout << format("Reencode(\"%1%\")") % fn << endl;
- }
+ embed_font(path, font, info);
+ export_remote_font(info, param->font_suffix, param->font_format, font);
}
else
{
- maxcode = 0xffff;
-
- if(suffix == ".ttf")
- {
- script_fout << "Reencode(\"original\")" << endl;
-
- GfxCIDFont * _font = dynamic_cast(font);
-
- // code2GID has been stored for embedded CID fonts
- code2GID = _font->getCIDToGID();
- code2GID_len = _font->getCIDToGIDLen();
- }
- else
- {
- script_fout << "CIDFlatten()" << endl;
- }
+ export_remote_default_font(info.id);
}
-
- /*
- * Step 2
- * map charcode (or GID for CID truetype)
- * generate an Consortium encoding file and let fontforge handle it.
- *
- * - Always map to Unicode for 8bit TrueType fonts and CID fonts
- *
- * - For 8bit nonTruetype fonts:
- * Try to calculate the correct Unicode value from the glyph names, unless param->always_apply_tounicode is set
- *
- */
-
- info.use_tounicode = ((suffix == ".ttf") || (font->isCIDFont()) || (param->always_apply_tounicode));
-
- auto ctu = font->getToUnicode();
-
- ofstream map_fout(tmp_dir / (fn + ".encoding"));
- add_tmp_file(fn+".encoding");
-
- int cnt = 0;
- for(int i = 0; i <= maxcode; ++i)
- {
- if((suffix != ".ttf") && (font_8bit != nullptr) && (font_8bit->getCharName(i) == nullptr))
- continue;
-
- ++ cnt;
- map_fout << format("0x%|1$X|") % ((code2GID && (i < code2GID_len))? code2GID[i] : i);
-
- Unicode u, *pu=&u;
-
- if(info.use_tounicode)
- {
- int n = 0;
- if(ctu)
- n = ctu->mapToUnicode(i, &pu);
- u = check_unicode(pu, n, i, font);
- }
- else
- {
- u = unicode_from_font(i, font);
- }
-
- map_fout << format(" 0x%|1$X|") % u;
- map_fout << format(" # 0x%|1$X|") % i;
-
- map_fout << endl;
- }
-
- if(cnt > 0)
- {
- script_fout << format("LoadEncodingFile(%1%, \"%2%\")") % (tmp_dir / (fn+".encoding")) % fn << endl;
- script_fout << format("Reencode(\"%1%\", 1)") % fn << endl;
- }
-
- if(ctu)
- ctu->decRefCnt();
-
- auto dest = ((param->single_html ? tmp_dir : dest_dir) / (fn+(param->font_suffix)));
- if(param->single_html)
- add_tmp_file(fn+(param->font_suffix));
-
- /*
- * [Win|Typo|HHead][Ascent|Descent]
- * Firefox & Chrome interprets the values in different ways
- * Trying to unify them
- */
- script_fout << format("Generate(%1%)") % dest << endl;
- script_fout << "Close()" << endl;
- script_fout << format("Open(%1%, 1)") % dest << endl;
- script_fout << ifstream(PDF2HTMLEX_DATA_PATH / UNIFY_SCRIPT_FILENAME).rdbuf();
- script_fout << format("Generate(%1%)") % dest << endl;
- script_fout.close();
-
- if(system((boost::format("fontforge -script %1% 1>%2% 2>%3%") % script_path % (tmp_dir / (fn+".info")) % (tmp_dir / NULL_FILENAME)).str().c_str()) != 0)
- cerr << "Warning: fontforge failed." << endl;
-
- add_tmp_file(fn+".info");
- add_tmp_file(NULL_FILENAME);
-
- // read metric
- int em, ascent, descent;
- if(ifstream(tmp_dir / (fn+".info")) >> em >> ascent >> descent)
- {
- if(em != 0)
- {
- info.ascent = ((double)ascent) / em;
- info.descent = -((double)descent) / em;
- }
- else
- {
- info.ascent = 0;
- info.descent = 0;
- }
- }
-
- if(param->debug)
- {
- cerr << "Ascent: " << info.ascent << " Descent: " << info.descent << endl;
- }
-
- export_remote_font(info, param->font_suffix, param->font_format, font);
}
-void HTMLRenderer::install_base_font(GfxFont * font, GfxFontLoc * font_loc, long long fn_id)
+void HTMLRenderer::install_base_font(GfxFont * font, GfxFontLoc * font_loc, FontInfo & info)
{
+ if(param->embed_base_font)
+ {
+ GfxFontLoc * fontloc = font->locateFont(xref, gFalse);
+ if(fontloc != nullptr)
+ {
+ embed_font(path(fontloc->path->getCString()), font, info);
+ export_remote_font(info, param->font_suffix, param->font_format, font);
+ return;
+ }
+ else
+ {
+ cerr << format("Cannot embed base font: f%|1$x|") % info.id << endl;
+ }
+
+ }
+
string psname(font_loc->path->getCString());
string basename = psname.substr(0, psname.find('-'));
string cssfont;
@@ -312,10 +130,13 @@ void HTMLRenderer::install_base_font(GfxFont * font, GfxFontLoc * font_loc, long
else
cssfont = iter->second;
- export_local_font(fn_id, font, psname, cssfont);
+ info.ascent = font->getAscent();
+ info.descent = font->getDescent();
+
+ export_local_font(info.id, font, psname, cssfont);
}
-void HTMLRenderer::install_external_font( GfxFont * font, long long fn_id)
+void HTMLRenderer::install_external_font( GfxFont * font, FontInfo & info)
{
string fontname(font->getName()->getCString());
@@ -327,7 +148,7 @@ void HTMLRenderer::install_external_font( GfxFont * font, long long fn_id)
cerr << "Warning: workaround for font names in bad encodings." << endl;
}
- export_local_font(fn_id, font, fontname, "");
+ export_local_font(info.id, font, fontname, "");
}
long long HTMLRenderer::install_font_size(double font_size)
diff --git a/src/HTMLRenderer/text.cc b/src/HTMLRenderer/text.cc
index 6759765..2e7117d 100644
--- a/src/HTMLRenderer/text.cc
+++ b/src/HTMLRenderer/text.cc
@@ -11,19 +11,24 @@
#include
#include
+#include
+#include
-#include
+#include
+#include
#include "HTMLRenderer.h"
#include "namespace.h"
+#include "config.h"
-using std::all_of;
+using boost::algorithm::to_lower;
-string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
+path HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
{
Object obj, obj1, obj2;
Object font_obj, font_obj2, fontdesc_obj;
string suffix;
+ path filepath;
try
{
@@ -119,7 +124,8 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
string fn = (format("f%|1$x|")%fn_id).str();
ofstream outf;
- outf.open(tmp_dir / (fn + suffix), ofstream::binary);
+ filepath = tmp_dir / (fn + suffix);
+ outf.open(filepath, ofstream::binary);
add_tmp_file(fn+suffix);
char buf[1024];
@@ -144,7 +150,204 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
font_obj2.free();
font_obj.free();
- return suffix;
+ return filepath;
+}
+
+void HTMLRenderer::embed_font(const path & filepath, GfxFont * font, FontInfo & info)
+{
+ string suffix = filepath.extension().string();
+ to_lower(suffix);
+
+ string fn = (format("f%|1$x|") % info.id).str();
+
+ path script_path = tmp_dir / (fn + ".pe");
+ ofstream script_fout(script_path, ofstream::binary);
+ add_tmp_file(fn+".pe");
+
+ script_fout << format("Open(%1%, 1)") % filepath << endl;
+
+ int * code2GID = nullptr;
+ int code2GID_len = 0;
+ int maxcode = 0;
+
+ Gfx8BitFont * font_8bit = nullptr;
+
+ /*
+ * Step 1
+ * dump the font file directly from the font descriptor and put the glyphs into the correct slots
+ *
+ * for 8bit + nonTrueType
+ * re-encoding the font using a PostScript encoding list (glyph id <-> glpyh name)
+ *
+ * for 8bit + TrueType
+ * sort the glpyhs as the original order, and later will map GID (instead of char code) to Unicode
+ *
+ * for CID + nonTrueType
+ * Flatten the font
+ *
+ * for CID Truetype
+ * same as 8bitTrueType, except for that we have to check 65536 charcodes
+ */
+ if(!font->isCIDFont())
+ {
+ font_8bit = dynamic_cast(font);
+ maxcode = 0xff;
+ if((suffix == ".ttf") || (suffix == ".ttc") || (suffix == ".otf"))
+ {
+ script_fout << "Reencode(\"original\")" << endl;
+ FoFiTrueType *fftt = nullptr;
+ if((fftt = FoFiTrueType::load((char*)filepath.c_str())) != nullptr)
+ {
+ code2GID = font_8bit->getCodeToGIDMap(fftt);
+ code2GID_len = 256;
+ delete fftt;
+ }
+ }
+ else
+ {
+ // move the slot such that it's consistent with the encoding seen in PDF
+ ofstream out(tmp_dir / (fn + "_.encoding"));
+ add_tmp_file(fn+"_.encoding");
+
+ out << format("/%1% [") % fn << endl;
+ for(int i = 0; i < 256; ++i)
+ {
+ auto cn = font_8bit->getCharName(i);
+ out << "/" << ((cn == nullptr) ? ".notdef" : cn) << endl;
+ }
+ out << "] def" << endl;
+
+ script_fout << format("LoadEncodingFile(%1%)") % (tmp_dir / (fn+"_.encoding")) << endl;
+ script_fout << format("Reencode(\"%1%\")") % fn << endl;
+ }
+ }
+ else
+ {
+ maxcode = 0xffff;
+
+ if(suffix == ".ttf")
+ {
+ script_fout << "Reencode(\"original\")" << endl;
+
+ GfxCIDFont * _font = dynamic_cast(font);
+
+ // code2GID has been stored for embedded CID fonts
+ code2GID = _font->getCIDToGID();
+ code2GID_len = _font->getCIDToGIDLen();
+ }
+ else
+ {
+ script_fout << "CIDFlatten()" << endl;
+ }
+ }
+
+ /*
+ * Step 2
+ * map charcode (or GID for CID truetype)
+ * generate an Consortium encoding file and let fontforge handle it.
+ *
+ * - Always map to Unicode for 8bit TrueType fonts and CID fonts
+ *
+ * - For 8bit nonTruetype fonts:
+ * Try to calculate the correct Unicode value from the glyph names, unless param->always_apply_tounicode is set
+ *
+ */
+
+ info.use_tounicode = ((suffix == ".ttf") || (font->isCIDFont()) || (param->always_apply_tounicode));
+
+ auto ctu = font->getToUnicode();
+
+ ofstream map_fout(tmp_dir / (fn + ".encoding"));
+ add_tmp_file(fn+".encoding");
+
+ int cnt = 0;
+ for(int i = 0; i <= maxcode; ++i)
+ {
+ if((suffix != ".ttf") && (font_8bit != nullptr) && (font_8bit->getCharName(i) == nullptr))
+ continue;
+
+ ++ cnt;
+ map_fout << format("0x%|1$X|") % ((code2GID && (i < code2GID_len))? code2GID[i] : i);
+
+ Unicode u, *pu=&u;
+
+ if(info.use_tounicode)
+ {
+ int n = 0;
+ if(ctu)
+ n = ctu->mapToUnicode(i, &pu);
+ u = check_unicode(pu, n, i, font);
+ }
+ else
+ {
+ u = unicode_from_font(i, font);
+ }
+
+ map_fout << format(" 0x%|1$X|") % u;
+ map_fout << format(" # 0x%|1$X|") % i;
+
+ map_fout << endl;
+ }
+
+ if(cnt > 0)
+ {
+ script_fout << format("LoadEncodingFile(%1%, \"%2%\")") % (tmp_dir / (fn+".encoding")) % fn << endl;
+ script_fout << format("Reencode(\"%1%\", 1)") % fn << endl;
+ }
+
+ if(ctu)
+ ctu->decRefCnt();
+
+ auto dest = ((param->single_html ? tmp_dir : dest_dir) / (fn+(param->font_suffix)));
+ if(param->single_html)
+ add_tmp_file(fn+(param->font_suffix));
+
+ /*
+ * [Win|Typo|HHead][Ascent|Descent]
+ * Firefox & Chrome interprets the values in different ways
+ * Trying to unify them
+ */
+ script_fout << format("Generate(%1%)") % dest << endl;
+ script_fout << "Close()" << endl;
+ script_fout << format("Open(%1%, 1)") % dest << endl;
+ script_fout << ifstream(PDF2HTMLEX_DATA_PATH / UNIFY_SCRIPT_FILENAME).rdbuf();
+ script_fout << format("Generate(%1%)") % dest << endl;
+ script_fout.close();
+
+ if(system((boost::format("fontforge -script %1% 1>%2% 2>%3%") % script_path % (tmp_dir / (fn+".info")) % (tmp_dir / NULL_FILENAME)).str().c_str()) != 0)
+ cerr << "Warning: fontforge failed." << endl;
+
+ add_tmp_file(fn+".info");
+ add_tmp_file(NULL_FILENAME);
+
+ // read metric
+ int em, ascent, descent;
+ if(ifstream(tmp_dir / (fn+".info")) >> em >> ascent >> descent)
+ {
+ if(em != 0)
+ {
+ info.ascent = ((double)ascent) / em;
+ info.descent = -((double)descent) / em;
+ }
+ else
+ {
+ info.ascent = 0;
+ info.descent = 0;
+ }
+ }
+ else
+ {
+ cerr << "Warning: cannot read font info for " << fn << endl;
+ info.ascent = font->getAscent();
+ info.descent = font->getDescent();
+ }
+
+ if(param->debug)
+ {
+ cerr << "Ascent: " << info.ascent << " Descent: " << info.descent << endl;
+ }
+
+ export_remote_font(info, param->font_suffix, param->font_format, font);
}
void HTMLRenderer::drawString(GfxState * state, GooString * s)
diff --git a/src/Param.h b/src/Param.h
index 6dccd68..136d6d6 100644
--- a/src/Param.h
+++ b/src/Param.h
@@ -28,6 +28,8 @@ struct Param
int process_nontext;
int single_html;
+ int embed_base_font;
+ int embed_external_font;
// Advanced tweak
double h_eps, v_eps;
diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc
index 8b2ffe8..8e3aee8 100644
--- a/src/pdf2htmlEX.cc
+++ b/src/pdf2htmlEX.cc
@@ -76,6 +76,8 @@ po::variables_map parse_options (int argc, char **argv)
("process-nontext", po::value(¶m.process_nontext)->default_value(1), "process nontext objects")
("single-html", po::value(¶m.single_html)->default_value(1), "combine everything into one single HTML file")
+ ("embed-base-font", po::value(¶m.embed_base_font)->default_value(1), "embed local matched font for base 14 fonts in the PDF file")
+ ("embed-external-font", po::value(¶m.embed_external_font)->default_value(0), "embed local matched font for external fonts in the PDF file")
("heps", po::value(¶m.h_eps)->default_value(1.0), "max tolerated horizontal offset (in pixels)")
("veps", po::value(¶m.v_eps)->default_value(1.0), "max tolerated vertical offset (in pixels)")