diff --git a/src/HTMLRenderer/HTMLRenderer.h b/src/HTMLRenderer/HTMLRenderer.h
index d583e17..3a9f0b7 100644
--- a/src/HTMLRenderer/HTMLRenderer.h
+++ b/src/HTMLRenderer/HTMLRenderer.h
@@ -146,27 +146,30 @@ protected:
void set_stream_flags (std::ostream & out);
- std::string dump_embedded_font (GfxFont * font, long long fn_id);
- void embed_font(const std::string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only = false);
-
// convert a LinkAction to a string that our Javascript code can understand
std::string get_linkaction_str(LinkAction *, std::string & detail);
////////////////////////////////////////////////////
- // manage styles
+ /*
+ * manage fonts
+ *
+ * In PDF: (install_*)
+ * embedded font: fonts embedded in PDF
+ * base font: standard 14 fonts defined in PDF spec
+ * external font: fonts that have only names provided in PDF, the viewer should find a local font to match with
+ *
+ * In HTML: (export_*)
+ * remote font: to be retrieved from the web server
+ * remote default font: fallback styles for invalid fonts
+ * local font: to be substituted with a local (client side) font
+ */
////////////////////////////////////////////////////
+ std::string dump_embedded_font (GfxFont * font, long long fn_id);
+ void embed_font(const std::string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only = false);
const FontInfo * install_font(GfxFont * font);
void install_embedded_font(GfxFont * font, FontInfo & info);
void install_base_font(GfxFont * font, GfxFontLoc * font_loc, FontInfo & info);
void install_external_font (GfxFont * font, FontInfo & info);
-
- ////////////////////////////////////////////////////
- // export css styles
- ////////////////////////////////////////////////////
- /*
- * remote font: to be retrieved from the web server
- * local font: to be substituted with a local (client side) font
- */
void export_remote_font(const FontInfo & info, const std::string & suffix, GfxFont * font);
void export_remote_default_font(long long fn_id);
void export_local_font(const FontInfo & info, GfxFont * font, const std::string & original_font_name, const std::string & cssfont);
diff --git a/src/HTMLRenderer/font.cc b/src/HTMLRenderer/font.cc
index a6c24be..5d17d57 100644
--- a/src/HTMLRenderer/font.cc
+++ b/src/HTMLRenderer/font.cc
@@ -11,8 +11,11 @@
#include
#include
#include
+#include
#include
+#include
+#include
#include "Param.h"
#include "HTMLRenderer.h"
@@ -20,13 +23,482 @@
#include "util/math.h"
#include "util/misc.h"
#include "util/base64stream.h"
+#include "util/ffw.h"
+#include "util/path.h"
+#include "util/unicode.h"
namespace pdf2htmlEX {
-using std::abs;
+using std::min;
+using std::unordered_set;
using std::cerr;
using std::endl;
+string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
+{
+ Object obj, obj1, obj2;
+ Object font_obj, font_obj2, fontdesc_obj;
+ string suffix;
+ string filepath;
+
+ try
+ {
+ // mupdf consulted
+ string subtype;
+
+ auto * id = font->getID();
+
+ Object ref_obj;
+ ref_obj.initRef(id->num, id->gen);
+ ref_obj.fetch(xref, &font_obj);
+ ref_obj.free();
+
+ if(!font_obj.isDict())
+ {
+ cerr << "Font object is not a dictionary" << endl;
+ throw 0;
+ }
+
+ Dict * dict = font_obj.getDict();
+ if(dict->lookup("DescendantFonts", &font_obj2)->isArray())
+ {
+ if(font_obj2.arrayGetLength() == 0)
+ {
+ cerr << "Warning: empty DescendantFonts array" << endl;
+ }
+ else
+ {
+ if(font_obj2.arrayGetLength() > 1)
+ cerr << "TODO: multiple entries in DescendantFonts array" << endl;
+
+ if(font_obj2.arrayGet(0, &obj2)->isDict())
+ {
+ dict = obj2.getDict();
+ }
+ }
+ }
+
+ if(!dict->lookup("FontDescriptor", &fontdesc_obj)->isDict())
+ {
+ cerr << "Cannot find FontDescriptor " << endl;
+ throw 0;
+ }
+
+ dict = fontdesc_obj.getDict();
+
+ if(dict->lookup("FontFile3", &obj)->isStream())
+ {
+ if(obj.streamGetDict()->lookup("Subtype", &obj1)->isName())
+ {
+ subtype = obj1.getName();
+ if(subtype == "Type1C")
+ {
+ suffix = ".cff";
+ }
+ else if (subtype == "CIDFontType0C")
+ {
+ suffix = ".cid";
+ }
+ else
+ {
+ cerr << "Unknown subtype: " << subtype << endl;
+ throw 0;
+ }
+ }
+ else
+ {
+ cerr << "Invalid subtype in font descriptor" << endl;
+ throw 0;
+ }
+ }
+ else if (dict->lookup("FontFile2", &obj)->isStream())
+ {
+ suffix = ".ttf";
+ }
+ else if (dict->lookup("FontFile", &obj)->isStream())
+ {
+ suffix = ".pfa";
+ }
+ else
+ {
+ cerr << "Cannot find FontFile for dump" << endl;
+ throw 0;
+ }
+
+ if(suffix == "")
+ {
+ cerr << "Font type unrecognized" << endl;
+ throw 0;
+ }
+
+ obj.streamReset();
+
+ filepath = (char*)str_fmt("%s/f%llx%s", param->tmp_dir.c_str(), fn_id, suffix.c_str());
+ tmp_files.add(filepath);
+
+ ofstream outf(filepath, ofstream::binary);
+ if(!outf)
+ throw string("Cannot open file ") + filepath + " for writing";
+
+ char buf[1024];
+ int len;
+ while((len = obj.streamGetChars(1024, (Guchar*)buf)) > 0)
+ {
+ outf.write(buf, len);
+ }
+ outf.close();
+ obj.streamClose();
+ }
+ catch(int)
+ {
+ cerr << "Someting wrong when trying to dump font " << hex << fn_id << dec << endl;
+ }
+
+ obj2.free();
+ obj1.free();
+ obj.free();
+
+ fontdesc_obj.free();
+ font_obj2.free();
+ font_obj.free();
+
+ return filepath;
+}
+
+void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only)
+{
+ if(param->debug)
+ {
+ cerr << "Embed font: " << filepath << " " << info.id << endl;
+ }
+
+ ffw_load_font(filepath.c_str());
+ ffw_prepare_font();
+
+ if(param->debug)
+ {
+ auto fn = str_fmt("%s/__raw_font_%lld", param->tmp_dir.c_str(), info.id, param->font_suffix.c_str());
+ tmp_files.add((char*)fn);
+ ofstream((char*)fn, ofstream::binary) << ifstream(filepath).rdbuf();
+ }
+
+ int * code2GID = nullptr;
+ int code2GID_len = 0;
+ int maxcode = 0;
+
+ Gfx8BitFont * font_8bit = nullptr;
+ GfxCIDFont * font_cid = nullptr;
+
+ string suffix = get_suffix(filepath);
+ for(auto iter = suffix.begin(); iter != suffix.end(); ++iter)
+ *iter = tolower(*iter);
+
+ /*
+ * if parm->tounicode is 0, try the provided tounicode map first
+ */
+ info.use_tounicode = (is_truetype_suffix(suffix) || (param->tounicode >= 0));
+ bool has_space = false;
+
+ const char * used_map = nullptr;
+
+ info.em_size = ffw_get_em_size();
+
+ if(get_metric_only)
+ {
+ ffw_metric(&info.ascent, &info.descent);
+ ffw_close();
+ return;
+ }
+
+ used_map = preprocessor.get_code_map(hash_ref(font->getID()));
+
+ /*
+ * Step 1
+ * dump the font file directly from the font descriptor and put the glyphs into the correct slots *
+ * for 8bit + nonTrueType
+ * re-encoding the font by glyph names
+ *
+ * for 8bit + TrueType
+ * sort the glpyhs as the original order, and later will map GID (instead of char code) to Unicode
+ *
+ * for CID + nonTrueType
+ * Flatten the font
+ *
+ * for CID Truetype
+ * same as 8bitTrueType, except for that we have to check 65536 charcodes
+ */
+ if(!font->isCIDFont())
+ {
+ font_8bit = dynamic_cast(font);
+ maxcode = 0xff;
+ if(is_truetype_suffix(suffix))
+ {
+ ffw_reencode_glyph_order();
+ FoFiTrueType *fftt = nullptr;
+ if((fftt = FoFiTrueType::load((char*)filepath.c_str())) != nullptr)
+ {
+ code2GID = font_8bit->getCodeToGIDMap(fftt);
+ code2GID_len = 256;
+ delete fftt;
+ }
+ }
+ else
+ {
+ // move the slot such that it's consistent with the encoding seen in PDF
+ unordered_set nameset;
+ bool name_conflict_warned = false;
+
+ memset(cur_mapping2, 0, 0x100 * sizeof(char*));
+
+ for(int i = 0; i < 256; ++i)
+ {
+ if(!used_map[i]) continue;
+
+ auto cn = font_8bit->getCharName(i);
+ if(cn == nullptr)
+ {
+ continue;
+ }
+ else
+ {
+ if(nameset.insert(string(cn)).second)
+ {
+ cur_mapping2[i] = cn;
+ }
+ else
+ {
+ if(!name_conflict_warned)
+ {
+ name_conflict_warned = true;
+ //TODO: may be resolved using advanced font properties?
+ cerr << "Warning: encoding confliction detected in font: " << hex << info.id << dec << endl;
+ }
+ }
+ }
+ }
+
+ ffw_reencode_raw2(cur_mapping2, 256, 0);
+ }
+ }
+ else
+ {
+ font_cid = dynamic_cast(font);
+ maxcode = 0xffff;
+
+ if(is_truetype_suffix(suffix))
+ {
+ ffw_reencode_glyph_order();
+
+ GfxCIDFont * _font = dynamic_cast(font);
+
+ // code2GID has been stored for embedded CID fonts
+ code2GID = _font->getCIDToGID();
+ code2GID_len = _font->getCIDToGIDLen();
+ }
+ else
+ {
+ ffw_cidflatten();
+ }
+ }
+
+ /*
+ * Step 2
+ * - map charcode (or GID for CID truetype)
+ *
+ * -> Always map to Unicode for 8bit TrueType fonts and CID fonts
+ *
+ * -> For 8bit nonTruetype fonts:
+ * Try to calculate the correct Unicode value from the glyph names, when collision is detected in ToUnicode Map
+ *
+ * - Fill in the width_list, and set widths accordingly
+ * - Remove unused glyphs
+ */
+
+
+ {
+ unordered_set codeset;
+ bool name_conflict_warned = false;
+
+ auto ctu = font->getToUnicode();
+ memset(cur_mapping, -1, 0x10000 * sizeof(*cur_mapping));
+ memset(width_list, -1, 0x10000 * sizeof(*width_list));
+
+ if(code2GID)
+ maxcode = min(maxcode, code2GID_len - 1);
+
+ bool is_truetype = is_truetype_suffix(suffix);
+ int max_key = maxcode;
+ /*
+ * Traverse all possible codes
+ */
+ bool retried = false; // avoid infinite loop
+ for(int i = 0; i <= maxcode; ++i)
+ {
+ if(!used_map[i])
+ continue;
+
+ /*
+ * Skip glyphs without names (only for non-ttf fonts)
+ */
+ if(!is_truetype && (font_8bit != nullptr)
+ && (font_8bit->getCharName(i) == nullptr))
+ {
+ continue;
+ }
+
+ int k = i;
+ if(code2GID)
+ {
+ if((k = code2GID[i]) == 0) continue;
+ }
+
+ if(k > max_key)
+ max_key = k;
+
+ Unicode u, *pu=&u;
+ if(info.use_tounicode)
+ {
+ int n = ctu ? (ctu->mapToUnicode(i, &pu)) : 0;
+ u = check_unicode(pu, n, i, font);
+ }
+ else
+ {
+ u = unicode_from_font(i, font);
+ }
+
+ if(u == ' ')
+ has_space = true;
+
+ if(codeset.insert(u).second)
+ {
+ cur_mapping[k] = u;
+ }
+ else
+ {
+ // collision detected
+ if(param->tounicode == 0)
+ {
+ // in auto mode, just drop the tounicode map
+ if(!retried)
+ {
+ cerr << "ToUnicode CMap is not valid and got dropped for font: " << hex << info.id << dec << endl;
+ retried = true;
+ codeset.clear();
+ info.use_tounicode = false;
+ //TODO: constant for the length
+ memset(cur_mapping, -1, 0x10000 * sizeof(*cur_mapping));
+ memset(width_list, -1, 0x10000 * sizeof(*width_list));
+ i = -1;
+ continue;
+ }
+ }
+ if(!name_conflict_warned)
+ {
+ name_conflict_warned = true;
+ //TODO: may be resolved using advanced font properties?
+ cerr << "Warning: encoding confliction detected in font: " << hex << info.id << dec << endl;
+ }
+ }
+
+ if(font_8bit)
+ {
+ width_list[k] = (int)floor(font_8bit->getWidth(i) * info.em_size + 0.5);
+ }
+ else
+ {
+ char buf[2];
+ buf[0] = (i >> 8) & 0xff;
+ buf[1] = (i & 0xff);
+ width_list[k] = (int)floor(font_cid->getWidth(buf, 2) * info.em_size + 0.5);
+ }
+ }
+
+ ffw_set_widths(width_list, max_key + 1, param->stretch_narrow_glyph, param->squeeze_wide_glyph, param->remove_unused_glyph);
+
+ ffw_reencode_raw(cur_mapping, max_key + 1, 1);
+
+ // we need the space character for offsets
+ if(!has_space)
+ {
+ int space_width;
+ if(font_8bit)
+ {
+ space_width = (int)floor(font_8bit->getWidth(' ') * info.em_size + 0.5);
+ }
+ else
+ {
+ char buf[2] = {0, ' '};
+ space_width = (int)floor(font_cid->getWidth(buf, 2) * info.em_size + 0.5);
+ }
+ ffw_make_char((int)' ', space_width);
+ }
+
+ if(ctu)
+ ctu->decRefCnt();
+ }
+
+ /*
+ * Step 3
+ *
+ * Generate the font as desired
+ *
+ */
+ string cur_tmp_fn = (char*)str_fmt("%s/__tmp_font1%s", param->tmp_dir.c_str(), param->font_suffix.c_str());
+ tmp_files.add(cur_tmp_fn);
+ string other_tmp_fn = (char*)str_fmt("%s/__tmp_font2%s", param->tmp_dir.c_str(), param->font_suffix.c_str());
+ tmp_files.add(other_tmp_fn);
+
+ ffw_save(cur_tmp_fn.c_str());
+
+ ffw_close();
+
+ /*
+ * Step 4
+ * Font Hinting
+ */
+ bool hinted = false;
+
+ // Call external hinting program if specified
+ if(param->external_hint_tool != "")
+ {
+ hinted = (system((char*)str_fmt("%s \"%s\" \"%s\"", param->external_hint_tool.c_str(), cur_tmp_fn.c_str(), other_tmp_fn.c_str())) == 0);
+ }
+
+ // Call internal hinting procedure if specified
+ if((!hinted) && (param->auto_hint))
+ {
+ ffw_load_font(cur_tmp_fn.c_str());
+ ffw_auto_hint();
+ ffw_save(other_tmp_fn.c_str());
+ ffw_close();
+ hinted = true;
+ }
+
+ if(hinted)
+ {
+ swap(cur_tmp_fn, other_tmp_fn);
+ }
+
+ /*
+ * Step 5
+ * Generate the font
+ * Reload to retrieve/fix accurate ascent/descent
+ */
+ string fn = (char*)str_fmt("%s/f%llx%s",
+ (param->single_html ? param->tmp_dir : param->dest_dir).c_str(),
+ info.id, param->font_suffix.c_str());
+
+ if(param->single_html)
+ tmp_files.add(fn);
+
+ ffw_load_font(cur_tmp_fn.c_str());
+ ffw_metric(&info.ascent, &info.descent);
+ ffw_save(fn.c_str());
+
+ ffw_close();
+}
+
+
const FontInfo * HTMLRenderer::install_font(GfxFont * font)
{
assert(sizeof(long long) == 2*sizeof(int));
diff --git a/src/HTMLRenderer/text.cc b/src/HTMLRenderer/text.cc
index d92b42a..1fc3998 100644
--- a/src/HTMLRenderer/text.cc
+++ b/src/HTMLRenderer/text.cc
@@ -6,498 +6,20 @@
* Copyright (C) 2012 Lu Wang
*/
-#include
-#include
-#include
-#include
-#include
-#include
-#include
+#include
#include "HTMLRenderer.h"
#include "TextLineBuffer.h"
-#include "util/ffw.h"
#include "util/namespace.h"
#include "util/unicode.h"
-#include "util/path.h"
-#include "util/math.h"
-#include "util/misc.h"
namespace pdf2htmlEX {
-using std::unordered_set;
-using std::min;
using std::all_of;
-using std::floor;
-using std::swap;
using std::cerr;
using std::endl;
-string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
-{
- Object obj, obj1, obj2;
- Object font_obj, font_obj2, fontdesc_obj;
- string suffix;
- string filepath;
-
- try
- {
- // mupdf consulted
- string subtype;
-
- auto * id = font->getID();
-
- Object ref_obj;
- ref_obj.initRef(id->num, id->gen);
- ref_obj.fetch(xref, &font_obj);
- ref_obj.free();
-
- if(!font_obj.isDict())
- {
- cerr << "Font object is not a dictionary" << endl;
- throw 0;
- }
-
- Dict * dict = font_obj.getDict();
- if(dict->lookup("DescendantFonts", &font_obj2)->isArray())
- {
- if(font_obj2.arrayGetLength() == 0)
- {
- cerr << "Warning: empty DescendantFonts array" << endl;
- }
- else
- {
- if(font_obj2.arrayGetLength() > 1)
- cerr << "TODO: multiple entries in DescendantFonts array" << endl;
-
- if(font_obj2.arrayGet(0, &obj2)->isDict())
- {
- dict = obj2.getDict();
- }
- }
- }
-
- if(!dict->lookup("FontDescriptor", &fontdesc_obj)->isDict())
- {
- cerr << "Cannot find FontDescriptor " << endl;
- throw 0;
- }
-
- dict = fontdesc_obj.getDict();
-
- if(dict->lookup("FontFile3", &obj)->isStream())
- {
- if(obj.streamGetDict()->lookup("Subtype", &obj1)->isName())
- {
- subtype = obj1.getName();
- if(subtype == "Type1C")
- {
- suffix = ".cff";
- }
- else if (subtype == "CIDFontType0C")
- {
- suffix = ".cid";
- }
- else
- {
- cerr << "Unknown subtype: " << subtype << endl;
- throw 0;
- }
- }
- else
- {
- cerr << "Invalid subtype in font descriptor" << endl;
- throw 0;
- }
- }
- else if (dict->lookup("FontFile2", &obj)->isStream())
- {
- suffix = ".ttf";
- }
- else if (dict->lookup("FontFile", &obj)->isStream())
- {
- suffix = ".pfa";
- }
- else
- {
- cerr << "Cannot find FontFile for dump" << endl;
- throw 0;
- }
-
- if(suffix == "")
- {
- cerr << "Font type unrecognized" << endl;
- throw 0;
- }
-
- obj.streamReset();
-
- filepath = (char*)str_fmt("%s/f%llx%s", param->tmp_dir.c_str(), fn_id, suffix.c_str());
- tmp_files.add(filepath);
-
- ofstream outf(filepath, ofstream::binary);
- if(!outf)
- throw string("Cannot open file ") + filepath + " for writing";
-
- char buf[1024];
- int len;
- while((len = obj.streamGetChars(1024, (Guchar*)buf)) > 0)
- {
- outf.write(buf, len);
- }
- outf.close();
- obj.streamClose();
- }
- catch(int)
- {
- cerr << "Someting wrong when trying to dump font " << hex << fn_id << dec << endl;
- }
-
- obj2.free();
- obj1.free();
- obj.free();
-
- fontdesc_obj.free();
- font_obj2.free();
- font_obj.free();
-
- return filepath;
-}
-
-void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only)
-{
- if(param->debug)
- {
- cerr << "Embed font: " << filepath << " " << info.id << endl;
- }
-
- ffw_load_font(filepath.c_str());
- ffw_prepare_font();
-
- if(param->debug)
- {
- auto fn = str_fmt("%s/__raw_font_%lld", param->tmp_dir.c_str(), info.id, param->font_suffix.c_str());
- tmp_files.add((char*)fn);
- ofstream((char*)fn, ofstream::binary) << ifstream(filepath).rdbuf();
- }
-
- int * code2GID = nullptr;
- int code2GID_len = 0;
- int maxcode = 0;
-
- Gfx8BitFont * font_8bit = nullptr;
- GfxCIDFont * font_cid = nullptr;
-
- string suffix = get_suffix(filepath);
- for(auto iter = suffix.begin(); iter != suffix.end(); ++iter)
- *iter = tolower(*iter);
-
- /*
- * if parm->tounicode is 0, try the provided tounicode map first
- */
- info.use_tounicode = (is_truetype_suffix(suffix) || (param->tounicode >= 0));
- bool has_space = false;
-
- const char * used_map = nullptr;
-
- info.em_size = ffw_get_em_size();
-
- if(get_metric_only)
- {
- ffw_metric(&info.ascent, &info.descent);
- ffw_close();
- return;
- }
-
- used_map = preprocessor.get_code_map(hash_ref(font->getID()));
-
- /*
- * Step 1
- * dump the font file directly from the font descriptor and put the glyphs into the correct slots *
- * for 8bit + nonTrueType
- * re-encoding the font by glyph names
- *
- * for 8bit + TrueType
- * sort the glpyhs as the original order, and later will map GID (instead of char code) to Unicode
- *
- * for CID + nonTrueType
- * Flatten the font
- *
- * for CID Truetype
- * same as 8bitTrueType, except for that we have to check 65536 charcodes
- */
- if(!font->isCIDFont())
- {
- font_8bit = dynamic_cast(font);
- maxcode = 0xff;
- if(is_truetype_suffix(suffix))
- {
- ffw_reencode_glyph_order();
- FoFiTrueType *fftt = nullptr;
- if((fftt = FoFiTrueType::load((char*)filepath.c_str())) != nullptr)
- {
- code2GID = font_8bit->getCodeToGIDMap(fftt);
- code2GID_len = 256;
- delete fftt;
- }
- }
- else
- {
- // move the slot such that it's consistent with the encoding seen in PDF
- unordered_set nameset;
- bool name_conflict_warned = false;
-
- memset(cur_mapping2, 0, 0x100 * sizeof(char*));
-
- for(int i = 0; i < 256; ++i)
- {
- if(!used_map[i]) continue;
-
- auto cn = font_8bit->getCharName(i);
- if(cn == nullptr)
- {
- continue;
- }
- else
- {
- if(nameset.insert(string(cn)).second)
- {
- cur_mapping2[i] = cn;
- }
- else
- {
- if(!name_conflict_warned)
- {
- name_conflict_warned = true;
- //TODO: may be resolved using advanced font properties?
- cerr << "Warning: encoding confliction detected in font: " << hex << info.id << dec << endl;
- }
- }
- }
- }
-
- ffw_reencode_raw2(cur_mapping2, 256, 0);
- }
- }
- else
- {
- font_cid = dynamic_cast(font);
- maxcode = 0xffff;
-
- if(is_truetype_suffix(suffix))
- {
- ffw_reencode_glyph_order();
-
- GfxCIDFont * _font = dynamic_cast(font);
-
- // code2GID has been stored for embedded CID fonts
- code2GID = _font->getCIDToGID();
- code2GID_len = _font->getCIDToGIDLen();
- }
- else
- {
- ffw_cidflatten();
- }
- }
-
- /*
- * Step 2
- * - map charcode (or GID for CID truetype)
- *
- * -> Always map to Unicode for 8bit TrueType fonts and CID fonts
- *
- * -> For 8bit nonTruetype fonts:
- * Try to calculate the correct Unicode value from the glyph names, when collision is detected in ToUnicode Map
- *
- * - Fill in the width_list, and set widths accordingly
- * - Remove unused glyphs
- */
-
-
- {
- unordered_set codeset;
- bool name_conflict_warned = false;
-
- auto ctu = font->getToUnicode();
- memset(cur_mapping, -1, 0x10000 * sizeof(*cur_mapping));
- memset(width_list, -1, 0x10000 * sizeof(*width_list));
-
- if(code2GID)
- maxcode = min(maxcode, code2GID_len - 1);
-
- bool is_truetype = is_truetype_suffix(suffix);
- int max_key = maxcode;
- /*
- * Traverse all possible codes
- */
- bool retried = false; // avoid infinite loop
- for(int i = 0; i <= maxcode; ++i)
- {
- if(!used_map[i])
- continue;
-
- /*
- * Skip glyphs without names (only for non-ttf fonts)
- */
- if(!is_truetype && (font_8bit != nullptr)
- && (font_8bit->getCharName(i) == nullptr))
- {
- continue;
- }
-
- int k = i;
- if(code2GID)
- {
- if((k = code2GID[i]) == 0) continue;
- }
-
- if(k > max_key)
- max_key = k;
-
- Unicode u, *pu=&u;
- if(info.use_tounicode)
- {
- int n = ctu ? (ctu->mapToUnicode(i, &pu)) : 0;
- u = check_unicode(pu, n, i, font);
- }
- else
- {
- u = unicode_from_font(i, font);
- }
-
- if(u == ' ')
- has_space = true;
-
- if(codeset.insert(u).second)
- {
- cur_mapping[k] = u;
- }
- else
- {
- // collision detected
- if(param->tounicode == 0)
- {
- // in auto mode, just drop the tounicode map
- if(!retried)
- {
- cerr << "ToUnicode CMap is not valid and got dropped for font: " << hex << info.id << dec << endl;
- retried = true;
- codeset.clear();
- info.use_tounicode = false;
- //TODO: constant for the length
- memset(cur_mapping, -1, 0x10000 * sizeof(*cur_mapping));
- memset(width_list, -1, 0x10000 * sizeof(*width_list));
- i = -1;
- continue;
- }
- }
- if(!name_conflict_warned)
- {
- name_conflict_warned = true;
- //TODO: may be resolved using advanced font properties?
- cerr << "Warning: encoding confliction detected in font: " << hex << info.id << dec << endl;
- }
- }
-
- if(font_8bit)
- {
- width_list[k] = (int)floor(font_8bit->getWidth(i) * info.em_size + 0.5);
- }
- else
- {
- char buf[2];
- buf[0] = (i >> 8) & 0xff;
- buf[1] = (i & 0xff);
- width_list[k] = (int)floor(font_cid->getWidth(buf, 2) * info.em_size + 0.5);
- }
- }
-
- ffw_set_widths(width_list, max_key + 1, param->stretch_narrow_glyph, param->squeeze_wide_glyph, param->remove_unused_glyph);
-
- ffw_reencode_raw(cur_mapping, max_key + 1, 1);
-
- // we need the space character for offsets
- if(!has_space)
- {
- int space_width;
- if(font_8bit)
- {
- space_width = (int)floor(font_8bit->getWidth(' ') * info.em_size + 0.5);
- }
- else
- {
- char buf[2] = {0, ' '};
- space_width = (int)floor(font_cid->getWidth(buf, 2) * info.em_size + 0.5);
- }
- ffw_make_char((int)' ', space_width);
- }
-
- if(ctu)
- ctu->decRefCnt();
- }
-
- /*
- * Step 3
- *
- * Generate the font as desired
- *
- */
- string cur_tmp_fn = (char*)str_fmt("%s/__tmp_font1%s", param->tmp_dir.c_str(), param->font_suffix.c_str());
- tmp_files.add(cur_tmp_fn);
- string other_tmp_fn = (char*)str_fmt("%s/__tmp_font2%s", param->tmp_dir.c_str(), param->font_suffix.c_str());
- tmp_files.add(other_tmp_fn);
-
- ffw_save(cur_tmp_fn.c_str());
-
- ffw_close();
-
- /*
- * Step 4
- * Font Hinting
- */
- bool hinted = false;
-
- // Call external hinting program if specified
- if(param->external_hint_tool != "")
- {
- hinted = (system((char*)str_fmt("%s \"%s\" \"%s\"", param->external_hint_tool.c_str(), cur_tmp_fn.c_str(), other_tmp_fn.c_str())) == 0);
- }
-
- // Call internal hinting procedure if specified
- if((!hinted) && (param->auto_hint))
- {
- ffw_load_font(cur_tmp_fn.c_str());
- ffw_auto_hint();
- ffw_save(other_tmp_fn.c_str());
- ffw_close();
- hinted = true;
- }
-
- if(hinted)
- {
- swap(cur_tmp_fn, other_tmp_fn);
- }
-
- /*
- * Step 5
- * Generate the font
- * Reload to retrieve/fix accurate ascent/descent
- */
- string fn = (char*)str_fmt("%s/f%llx%s",
- (param->single_html ? param->tmp_dir : param->dest_dir).c_str(),
- info.id, param->font_suffix.c_str());
-
- if(param->single_html)
- tmp_files.add(fn);
-
- ffw_load_font(cur_tmp_fn.c_str());
- ffw_metric(&info.ascent, &info.descent);
- ffw_save(fn.c_str());
-
- ffw_close();
-}
-
void HTMLRenderer::drawString(GfxState * state, GooString * s)
{
if(s->getLength() == 0)