From 4543fc9dbcd200c389cafd2c705251e98b6b7024 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Mon, 3 Sep 2012 20:57:14 +0800 Subject: [PATCH] remove scripts, link to fontforge directly --- CMakeLists.txt | 15 +++- TODO | 2 + share/unify.pe | 31 ------- src/HTMLRenderer.h | 3 - src/HTMLRenderer/general.cc | 4 +- src/HTMLRenderer/text.cc | 125 ++++++++++++++--------------- src/ff/ff.c | 156 ++++++++++++++++++++++++++++++++++++ src/ff/ff.h | 32 ++++++++ 8 files changed, 264 insertions(+), 104 deletions(-) delete mode 100644 share/unify.pe create mode 100644 src/ff/ff.c create mode 100644 src/ff/ff.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 87f9a93..78936ae 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,9 @@ find_package(Boost REQUIRED COMPONENTS program_options filesystem system) include_directories(${Boost_INCLUDE_DIRS}) link_directories ( ${Boost_LIBRARY_DIRS} ) include_directories(${CMAKE_SOURCE_DIR}/src) +pkg_check_modules(FONTFORGE REQUIRED fontforge) +include_directories(${FONTFORGE_INCLUDE_DIRS}) +link_directories ( ${FONTFORGE_LIBRARY_DIRS} ) set(PDF2HTMLEX_VERSION "0.2") set(ARCHIVE_NAME pdf2htmlex-${PDF2HTMLEX_VERSION}) @@ -17,10 +20,14 @@ add_custom_target(dist | bzip2 > ${CMAKE_BINARY_DIR}/${ARCHIVE_NAME}.tar.bz2 WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}) +set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wunused-function") +#set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} -O2") +set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} -ggdb") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wunused-function") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2") -#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb") +#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb") configure_file (${CMAKE_SOURCE_DIR}/src/config.h.in ${CMAKE_SOURCE_DIR}/src/config.h) @@ -34,6 +41,8 @@ add_executable(pdf2htmlEX src/HTMLRenderer/text.cc src/HTMLRenderer/image.cc src/HTMLRenderer/namespace.h + src/ff/ff.h + src/ff/ff.c src/BackgroundRenderer.h src/BackgroundRenderer.cc src/Consts.h @@ -41,7 +50,7 @@ add_executable(pdf2htmlEX src/util.h src/config.h) -target_link_libraries(pdf2htmlEX poppler boost_program_options boost_filesystem boost_system) +target_link_libraries(pdf2htmlEX poppler boost_program_options boost_filesystem boost_system fontforge python2.7) install (TARGETS pdf2htmlEX DESTINATION bin) file (GLOB datafiles share/*) diff --git a/TODO b/TODO index eaff33b..a0e2219 100644 --- a/TODO +++ b/TODO @@ -1,3 +1,5 @@ +cmake - pkgconfig - fontforge - python + link fontforge instead of script option to break ligatures diff --git a/share/unify.pe b/share/unify.pe deleted file mode 100644 index 82f05ec..0000000 --- a/share/unify.pe +++ /dev/null @@ -1,31 +0,0 @@ -# script piece to unify the metrics -# by WangLu -# 2012.08.31 -wa = GetOS2Value("WinAscent") -wd = GetOS2Value("WinDescent") -ta = GetOS2Value("TypoAscent") -td = GetOS2Value("TypoDescent") -ha = GetOS2Value("HHeadAscent") -hd = GetOS2Value("HHeadDescent") -a = wa -if (ta > a) - a = ta -endif -if (ha > a) - a = ha -endif -d = wd -if (-td > d) - d = -td -endif -if (-hd > d) - d = -hd -endif -SetOS2Value("WinAscent", a) -SetOS2Value("WinDescent", d) -SetOS2Value("HHeadAscent", a) -SetOS2Value("HHeadDescent", -d) -Print(ta-td) -Print(a) -Print(d) -# script end diff --git a/src/HTMLRenderer.h b/src/HTMLRenderer.h index 88e20c2..8d26ca6 100644 --- a/src/HTMLRenderer.h +++ b/src/HTMLRenderer.h @@ -297,9 +297,6 @@ class HTMLRenderer : public OutputDev static const std::string NECK_HTML_FILENAME; static const std::string TAIL_HTML_FILENAME; static const std::string CSS_FILENAME; - static const std::string UNIFY_SCRIPT_FILENAME; - // for cross-platform purpose, use a "null" file instead of /dev/null - static const std::string NULL_FILENAME; }; #endif /* HTMLRENDERER_H_ */ diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 45a36c3..9de3d4d 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -13,6 +13,7 @@ #include "BackgroundRenderer.h" #include "config.h" #include "namespace.h" +#include "ff/ff.h" using std::fixed; using std::flush; @@ -26,6 +27,7 @@ HTMLRenderer::HTMLRenderer(const Param * param) ,dest_dir(param->dest_dir) ,tmp_dir(param->tmp_dir) { + ff_init(); } HTMLRenderer::~HTMLRenderer() @@ -242,5 +244,3 @@ const std::string HTMLRenderer::HEAD_HTML_FILENAME = "head.html"; const std::string HTMLRenderer::NECK_HTML_FILENAME = "neck.html"; const std::string HTMLRenderer::TAIL_HTML_FILENAME = "tail.html"; const std::string HTMLRenderer::CSS_FILENAME = "all.css"; -const std::string HTMLRenderer::UNIFY_SCRIPT_FILENAME = "unify.pe"; -const std::string HTMLRenderer::NULL_FILENAME = "null"; diff --git a/src/HTMLRenderer/text.cc b/src/HTMLRenderer/text.cc index d32439c..1ce27a9 100644 --- a/src/HTMLRenderer/text.cc +++ b/src/HTMLRenderer/text.cc @@ -1,5 +1,5 @@ /* - * text.ccc + * text.cc * * Handling text & font, and relative stuffs * @@ -18,6 +18,7 @@ #include #include +#include "ff/ff.h" #include "HTMLRenderer.h" #include "namespace.h" #include "config.h" @@ -162,11 +163,7 @@ void HTMLRenderer::embed_font(const path & filepath, GfxFont * font, FontInfo & string fn = (format("f%|1$x|") % info.id).str(); - path script_path = tmp_dir / (fn + ".pe"); - ofstream script_fout(script_path, ofstream::binary); - add_tmp_file(fn+".pe"); - - script_fout << format("Open(%1%, 1)") % filepath << endl; + ff_load_font(filepath.c_str()); int * code2GID = nullptr; int code2GID_len = 0; @@ -200,7 +197,7 @@ void HTMLRenderer::embed_font(const path & filepath, GfxFont * font, FontInfo & maxcode = 0xff; if((suffix == ".ttf") || (suffix == ".ttc") || (suffix == ".otf")) { - script_fout << "Reencode(\"original\")" << endl; + ff_reencode("original", 0); FoFiTrueType *fftt = nullptr; if((fftt = FoFiTrueType::load((char*)filepath.c_str())) != nullptr) { @@ -246,8 +243,8 @@ void HTMLRenderer::embed_font(const path & filepath, GfxFont * font, FontInfo & } out << "] def" << endl; - script_fout << format("LoadEncodingFile(%1%)") % (tmp_dir / (fn+"_.encoding")) << endl; - script_fout << format("Reencode(\"%1%\")") % fn << endl; + ff_load_encoding((tmp_dir / (fn+"_.encoding")).c_str(), nullptr); + ff_reencode(fn.c_str(), 0); } } else @@ -256,7 +253,7 @@ void HTMLRenderer::embed_font(const path & filepath, GfxFont * font, FontInfo & if(suffix == ".ttf") { - script_fout << "Reencode(\"original\")" << endl; + ff_reencode("original", 0); GfxCIDFont * _font = dynamic_cast(font); @@ -266,7 +263,7 @@ void HTMLRenderer::embed_font(const path & filepath, GfxFont * font, FontInfo & } else { - script_fout << "CIDFlatten()" << endl; + ff_cidflatten(); } } @@ -280,46 +277,49 @@ void HTMLRenderer::embed_font(const path & filepath, GfxFont * font, FontInfo & * - For 8bit nonTruetype fonts: * Try to calculate the correct Unicode value from the glyph names, unless param->always_apply_tounicode is set * + * TODO: build Encoding directly, without read/write files */ auto ctu = font->getToUnicode(); - - ofstream map_fout(tmp_dir / (fn + ".encoding")); - add_tmp_file(fn+".encoding"); - int cnt = 0; - for(int i = 0; i <= maxcode; ++i) + { - if((suffix != ".ttf") && (font_8bit != nullptr) && (font_8bit->getCharName(i) == nullptr)) - continue; + ofstream map_fout(tmp_dir / (fn + ".encoding")); + add_tmp_file(fn+".encoding"); - ++ cnt; - map_fout << format("0x%|1$X|") % ((code2GID && (i < code2GID_len))? code2GID[i] : i); - - Unicode u, *pu=&u; - - if(info.use_tounicode) + for(int i = 0; i <= maxcode; ++i) { - int n = 0; - if(ctu) - n = ctu->mapToUnicode(i, &pu); - u = check_unicode(pu, n, i, font); - } - else - { - u = unicode_from_font(i, font); - } + if((suffix != ".ttf") && (font_8bit != nullptr) && (font_8bit->getCharName(i) == nullptr)) + continue; - map_fout << format(" 0x%|1$X|") % u; - map_fout << format(" # 0x%|1$X|") % i; + ++ cnt; + map_fout << format("0x%|1$X|") % ((code2GID && (i < code2GID_len))? code2GID[i] : i); - map_fout << endl; + Unicode u, *pu=&u; + + if(info.use_tounicode) + { + int n = 0; + if(ctu) + n = ctu->mapToUnicode(i, &pu); + u = check_unicode(pu, n, i, font); + } + else + { + u = unicode_from_font(i, font); + } + + map_fout << format(" 0x%|1$X|") % u; + map_fout << format(" # 0x%|1$X|") % i; + + map_fout << endl; + } } if(cnt > 0) { - script_fout << format("LoadEncodingFile(%1%, \"%2%\")") % (tmp_dir / (fn+".encoding")) % fn << endl; - script_fout << format("Reencode(\"%1%\", 1)") % fn << endl; + ff_load_encoding((tmp_dir / (fn+".encoding")).c_str(), fn.c_str()); + ff_reencode(fn.c_str(), 1); } if(ctu) @@ -336,42 +336,37 @@ void HTMLRenderer::embed_font(const path & filepath, GfxFont * font, FontInfo & * Trying to unify them */ // Generate an intermediate ttf font in order to retrieve the metrics - add_tmp_file(fn + "_.ttf"); - script_fout << format("Generate(%1%)") % (tmp_dir / (fn + "_.ttf")) << endl; - script_fout << "Close()" << endl; - script_fout << format("Open(%1%, 1)") % (tmp_dir / (fn + "_.ttf")) << endl; - script_fout << ifstream(PDF2HTMLEX_DATA_PATH / UNIFY_SCRIPT_FILENAME).rdbuf(); - script_fout << format("Generate(%1%)") % dest << endl; - script_fout.close(); + // TODO: see if we can get the values without save/load + + string tmp_fn = fn+"_.ttf"; + add_tmp_file(tmp_fn); + auto tmp_path = tmp_dir / tmp_fn; + ff_save(tmp_path.c_str()); + ff_close(); + ff_load_font(tmp_path.c_str()); - if(system((boost::format("fontforge -script %1% 1>%2% 2>%3%") % script_path % (tmp_dir / (fn+".info")) % (tmp_dir / NULL_FILENAME)).str().c_str()) != 0) - cerr << "Warning: fontforge failed." << endl; + int em = ff_get_em_size(); + int ascent = ff_get_max_ascent(); + int descent = ff_get_max_descent(); - add_tmp_file(fn+".info"); - add_tmp_file(NULL_FILENAME); + ff_set_ascent(ascent); + ff_set_descent(descent); + ff_save(dest.c_str()); + ff_close(); - // read metric - int em, ascent, descent; - if(ifstream(tmp_dir / (fn+".info")) >> em >> ascent >> descent) + if(em != 0) { - if(em != 0) - { - info.ascent = ((double)ascent) / em; - info.descent = -((double)descent) / em; - } - else - { - info.ascent = 0; - info.descent = 0; - } + info.ascent = ((double)ascent) / em; + info.descent = -((double)descent) / em; } else { - cerr << "Warning: cannot read font info for " << fn << endl; - info.ascent = font->getAscent(); - info.descent = font->getDescent(); + info.ascent = 0; + info.descent = 0; } + // read metric + if(param->debug) { cerr << "Ascent: " << info.ascent << " Descent: " << info.descent << endl; diff --git a/src/ff/ff.c b/src/ff/ff.c new file mode 100644 index 0000000..f8f69a9 --- /dev/null +++ b/src/ff/ff.c @@ -0,0 +1,156 @@ +/* + * ff.c + * + * Processing fonts using Fontforge + * + * by WangLu + * 2012.09.03 + */ + +#include +#include +#include +#include + +#include +#include + +#include "ff.h" + +SplineFont * cur_font = NULL; + +static void err(const char * format, ...) +{ + va_list al; + va_start(al, format); + vfprintf(stderr, format, al); + va_end(al); + exit(-1); +} +static char * strcopy(const char * str) +{ + if(str == NULL) return NULL; + + char * _ = strdup(str); + if(!_) + err("Not enough memory"); + return _; +} + +static int max(int a, int b) +{ + return (a>b) ? a : b; +} + +void ff_init(void) +{ + InitSimpleStuff(); + if ( default_encoding==NULL ) + default_encoding=FindOrMakeEncoding("ISO8859-1"); + if ( default_encoding==NULL ) + default_encoding=&custom; /* In case iconv is broken */ +} +void ff_load_font(const char * filename) +{ + char * _filename = strcopy(filename); + cur_font = LoadSplineFont(_filename, 1); + free(_filename); + + if(!cur_font) + err("Cannot load font %s\n", filename); + + if(!cur_font->fv) + FVAppend(_FontViewCreate(cur_font)); +} + +void ff_load_encoding(const char * filename, const char * encname) +{ + char * _filename = strcopy(filename); + char * _encname = strcopy(encname); + ParseEncodingFile(_filename, _encname); + free(_encname); + free(_filename); +} + +void ff_reencode(const char * encname, int force) +{ + Encoding * enc = FindOrMakeEncoding(encname); + if(!enc) + err("Unknown encoding %s\n", encname); + + if(force) + { + SFForceEncoding(cur_font, cur_font->fv->map, enc); + } + else + { + EncMapFree(cur_font->fv->map); + cur_font->fv->map= EncMapFromEncoding(cur_font, enc); + } + + SFReplaceEncodingBDFProps(cur_font, cur_font->fv->map); +} + +void ff_cidflatten(void) +{ + printf("cid flatten\n"); + + if(!cur_font->cidmaster) + err("Cannot flatten a non-CID font"); + SFFlatten(cur_font->cidmaster); +} + +void ff_save(const char * filename) +{ + char * _filename = strcopy(filename); + char * _ = strcopy(""); + + int r = GenerateScript(cur_font, _filename + , _, -1, -1, NULL, NULL, cur_font->fv->map, NULL, ly_fore); + + free(_); + free(_filename); + + if(!r) + err("Cannot save font to %s\n", filename); +} + +void ff_close(void) +{ + FontViewClose(cur_font->fv); + cur_font = NULL; +} + +int ff_get_em_size(void) +{ + return (cur_font->pfminfo.os2_typoascent - cur_font->pfminfo.os2_typodescent); +} + +int ff_get_max_ascent(void) +{ + return max(cur_font->pfminfo.os2_winascent, + max(cur_font->pfminfo.os2_typoascent, + cur_font->pfminfo.hhead_ascent)); +} + +int ff_get_max_descent(void) +{ + return max(cur_font->pfminfo.os2_windescent, + max(-cur_font->pfminfo.os2_typodescent, + -cur_font->pfminfo.hhead_descent)); +} + +void ff_set_ascent(int a) +{ + cur_font->pfminfo.os2_winascent = a; + cur_font->pfminfo.os2_typoascent = a; + cur_font->pfminfo.hhead_ascent = a; +} + +void ff_set_descent(int d) +{ + cur_font->pfminfo.os2_windescent = d; + cur_font->pfminfo.os2_typodescent = -d; + cur_font->pfminfo.hhead_descent = -d; +} + diff --git a/src/ff/ff.h b/src/ff/ff.h new file mode 100644 index 0000000..c43fc04 --- /dev/null +++ b/src/ff/ff.h @@ -0,0 +1,32 @@ +/* + * ff.h + * + * Processing fonts using Fontforge + * + * fontforge.h cannot be included in C++ + * So this wrapper in C publishes several functions we need + * + * by WangLu + * 2012.09.03 + */ + +#ifdef __cplusplus +extern "C" { +#endif + +void ff_init(void); +void ff_load_font(const char * filename); +void ff_load_encoding(const char * filename, const char * encname); +void ff_reencode(const char * encname, int force); +void ff_cidflatten(void); +void ff_save(const char * filename); +void ff_close(void); +int ff_get_em_size(void); +int ff_get_max_ascent(void); +int ff_get_max_descent(void); +void ff_set_ascent(int a); +void ff_set_descent(int d); + +#ifdef __cplusplus +} +#endif