1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 04:50:09 +00:00

remove scripts, link to fontforge directly

This commit is contained in:
Lu Wang 2012-09-03 20:57:14 +08:00
parent 879d854170
commit 4543fc9dbc
8 changed files with 264 additions and 104 deletions

View File

@ -9,6 +9,9 @@ find_package(Boost REQUIRED COMPONENTS program_options filesystem system)
include_directories(${Boost_INCLUDE_DIRS}) include_directories(${Boost_INCLUDE_DIRS})
link_directories ( ${Boost_LIBRARY_DIRS} ) link_directories ( ${Boost_LIBRARY_DIRS} )
include_directories(${CMAKE_SOURCE_DIR}/src) include_directories(${CMAKE_SOURCE_DIR}/src)
pkg_check_modules(FONTFORGE REQUIRED fontforge)
include_directories(${FONTFORGE_INCLUDE_DIRS})
link_directories ( ${FONTFORGE_LIBRARY_DIRS} )
set(PDF2HTMLEX_VERSION "0.2") set(PDF2HTMLEX_VERSION "0.2")
set(ARCHIVE_NAME pdf2htmlex-${PDF2HTMLEX_VERSION}) set(ARCHIVE_NAME pdf2htmlex-${PDF2HTMLEX_VERSION})
@ -17,10 +20,14 @@ add_custom_target(dist
| bzip2 > ${CMAKE_BINARY_DIR}/${ARCHIVE_NAME}.tar.bz2 | bzip2 > ${CMAKE_BINARY_DIR}/${ARCHIVE_NAME}.tar.bz2
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}) WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wunused-function")
#set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} -O2")
set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} -ggdb")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wunused-function") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wunused-function")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2") #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb")
configure_file (${CMAKE_SOURCE_DIR}/src/config.h.in ${CMAKE_SOURCE_DIR}/src/config.h) configure_file (${CMAKE_SOURCE_DIR}/src/config.h.in ${CMAKE_SOURCE_DIR}/src/config.h)
@ -34,6 +41,8 @@ add_executable(pdf2htmlEX
src/HTMLRenderer/text.cc src/HTMLRenderer/text.cc
src/HTMLRenderer/image.cc src/HTMLRenderer/image.cc
src/HTMLRenderer/namespace.h src/HTMLRenderer/namespace.h
src/ff/ff.h
src/ff/ff.c
src/BackgroundRenderer.h src/BackgroundRenderer.h
src/BackgroundRenderer.cc src/BackgroundRenderer.cc
src/Consts.h src/Consts.h
@ -41,7 +50,7 @@ add_executable(pdf2htmlEX
src/util.h src/util.h
src/config.h) src/config.h)
target_link_libraries(pdf2htmlEX poppler boost_program_options boost_filesystem boost_system) target_link_libraries(pdf2htmlEX poppler boost_program_options boost_filesystem boost_system fontforge python2.7)
install (TARGETS pdf2htmlEX DESTINATION bin) install (TARGETS pdf2htmlEX DESTINATION bin)
file (GLOB datafiles share/*) file (GLOB datafiles share/*)

2
TODO
View File

@ -1,3 +1,5 @@
cmake - pkgconfig - fontforge - python
link fontforge instead of script link fontforge instead of script
option to break ligatures option to break ligatures

View File

@ -1,31 +0,0 @@
# script piece to unify the metrics
# by WangLu
# 2012.08.31
wa = GetOS2Value("WinAscent")
wd = GetOS2Value("WinDescent")
ta = GetOS2Value("TypoAscent")
td = GetOS2Value("TypoDescent")
ha = GetOS2Value("HHeadAscent")
hd = GetOS2Value("HHeadDescent")
a = wa
if (ta > a)
a = ta
endif
if (ha > a)
a = ha
endif
d = wd
if (-td > d)
d = -td
endif
if (-hd > d)
d = -hd
endif
SetOS2Value("WinAscent", a)
SetOS2Value("WinDescent", d)
SetOS2Value("HHeadAscent", a)
SetOS2Value("HHeadDescent", -d)
Print(ta-td)
Print(a)
Print(d)
# script end

View File

@ -297,9 +297,6 @@ class HTMLRenderer : public OutputDev
static const std::string NECK_HTML_FILENAME; static const std::string NECK_HTML_FILENAME;
static const std::string TAIL_HTML_FILENAME; static const std::string TAIL_HTML_FILENAME;
static const std::string CSS_FILENAME; static const std::string CSS_FILENAME;
static const std::string UNIFY_SCRIPT_FILENAME;
// for cross-platform purpose, use a "null" file instead of /dev/null
static const std::string NULL_FILENAME;
}; };
#endif /* HTMLRENDERER_H_ */ #endif /* HTMLRENDERER_H_ */

View File

@ -13,6 +13,7 @@
#include "BackgroundRenderer.h" #include "BackgroundRenderer.h"
#include "config.h" #include "config.h"
#include "namespace.h" #include "namespace.h"
#include "ff/ff.h"
using std::fixed; using std::fixed;
using std::flush; using std::flush;
@ -26,6 +27,7 @@ HTMLRenderer::HTMLRenderer(const Param * param)
,dest_dir(param->dest_dir) ,dest_dir(param->dest_dir)
,tmp_dir(param->tmp_dir) ,tmp_dir(param->tmp_dir)
{ {
ff_init();
} }
HTMLRenderer::~HTMLRenderer() HTMLRenderer::~HTMLRenderer()
@ -242,5 +244,3 @@ const std::string HTMLRenderer::HEAD_HTML_FILENAME = "head.html";
const std::string HTMLRenderer::NECK_HTML_FILENAME = "neck.html"; const std::string HTMLRenderer::NECK_HTML_FILENAME = "neck.html";
const std::string HTMLRenderer::TAIL_HTML_FILENAME = "tail.html"; const std::string HTMLRenderer::TAIL_HTML_FILENAME = "tail.html";
const std::string HTMLRenderer::CSS_FILENAME = "all.css"; const std::string HTMLRenderer::CSS_FILENAME = "all.css";
const std::string HTMLRenderer::UNIFY_SCRIPT_FILENAME = "unify.pe";
const std::string HTMLRenderer::NULL_FILENAME = "null";

View File

@ -1,5 +1,5 @@
/* /*
* text.ccc * text.cc
* *
* Handling text & font, and relative stuffs * Handling text & font, and relative stuffs
* *
@ -18,6 +18,7 @@
#include <CharCodeToUnicode.h> #include <CharCodeToUnicode.h>
#include <fofi/FoFiTrueType.h> #include <fofi/FoFiTrueType.h>
#include "ff/ff.h"
#include "HTMLRenderer.h" #include "HTMLRenderer.h"
#include "namespace.h" #include "namespace.h"
#include "config.h" #include "config.h"
@ -162,11 +163,7 @@ void HTMLRenderer::embed_font(const path & filepath, GfxFont * font, FontInfo &
string fn = (format("f%|1$x|") % info.id).str(); string fn = (format("f%|1$x|") % info.id).str();
path script_path = tmp_dir / (fn + ".pe"); ff_load_font(filepath.c_str());
ofstream script_fout(script_path, ofstream::binary);
add_tmp_file(fn+".pe");
script_fout << format("Open(%1%, 1)") % filepath << endl;
int * code2GID = nullptr; int * code2GID = nullptr;
int code2GID_len = 0; int code2GID_len = 0;
@ -200,7 +197,7 @@ void HTMLRenderer::embed_font(const path & filepath, GfxFont * font, FontInfo &
maxcode = 0xff; maxcode = 0xff;
if((suffix == ".ttf") || (suffix == ".ttc") || (suffix == ".otf")) if((suffix == ".ttf") || (suffix == ".ttc") || (suffix == ".otf"))
{ {
script_fout << "Reencode(\"original\")" << endl; ff_reencode("original", 0);
FoFiTrueType *fftt = nullptr; FoFiTrueType *fftt = nullptr;
if((fftt = FoFiTrueType::load((char*)filepath.c_str())) != nullptr) if((fftt = FoFiTrueType::load((char*)filepath.c_str())) != nullptr)
{ {
@ -246,8 +243,8 @@ void HTMLRenderer::embed_font(const path & filepath, GfxFont * font, FontInfo &
} }
out << "] def" << endl; out << "] def" << endl;
script_fout << format("LoadEncodingFile(%1%)") % (tmp_dir / (fn+"_.encoding")) << endl; ff_load_encoding((tmp_dir / (fn+"_.encoding")).c_str(), nullptr);
script_fout << format("Reencode(\"%1%\")") % fn << endl; ff_reencode(fn.c_str(), 0);
} }
} }
else else
@ -256,7 +253,7 @@ void HTMLRenderer::embed_font(const path & filepath, GfxFont * font, FontInfo &
if(suffix == ".ttf") if(suffix == ".ttf")
{ {
script_fout << "Reencode(\"original\")" << endl; ff_reencode("original", 0);
GfxCIDFont * _font = dynamic_cast<GfxCIDFont*>(font); GfxCIDFont * _font = dynamic_cast<GfxCIDFont*>(font);
@ -266,7 +263,7 @@ void HTMLRenderer::embed_font(const path & filepath, GfxFont * font, FontInfo &
} }
else else
{ {
script_fout << "CIDFlatten()" << endl; ff_cidflatten();
} }
} }
@ -280,46 +277,49 @@ void HTMLRenderer::embed_font(const path & filepath, GfxFont * font, FontInfo &
* - For 8bit nonTruetype fonts: * - For 8bit nonTruetype fonts:
* Try to calculate the correct Unicode value from the glyph names, unless param->always_apply_tounicode is set * Try to calculate the correct Unicode value from the glyph names, unless param->always_apply_tounicode is set
* *
* TODO: build Encoding directly, without read/write files
*/ */
auto ctu = font->getToUnicode(); auto ctu = font->getToUnicode();
ofstream map_fout(tmp_dir / (fn + ".encoding"));
add_tmp_file(fn+".encoding");
int cnt = 0; int cnt = 0;
for(int i = 0; i <= maxcode; ++i)
{ {
if((suffix != ".ttf") && (font_8bit != nullptr) && (font_8bit->getCharName(i) == nullptr)) ofstream map_fout(tmp_dir / (fn + ".encoding"));
continue; add_tmp_file(fn+".encoding");
++ cnt; for(int i = 0; i <= maxcode; ++i)
map_fout << format("0x%|1$X|") % ((code2GID && (i < code2GID_len))? code2GID[i] : i);
Unicode u, *pu=&u;
if(info.use_tounicode)
{ {
int n = 0; if((suffix != ".ttf") && (font_8bit != nullptr) && (font_8bit->getCharName(i) == nullptr))
if(ctu) continue;
n = ctu->mapToUnicode(i, &pu);
u = check_unicode(pu, n, i, font);
}
else
{
u = unicode_from_font(i, font);
}
map_fout << format(" 0x%|1$X|") % u; ++ cnt;
map_fout << format(" # 0x%|1$X|") % i; map_fout << format("0x%|1$X|") % ((code2GID && (i < code2GID_len))? code2GID[i] : i);
map_fout << endl; Unicode u, *pu=&u;
if(info.use_tounicode)
{
int n = 0;
if(ctu)
n = ctu->mapToUnicode(i, &pu);
u = check_unicode(pu, n, i, font);
}
else
{
u = unicode_from_font(i, font);
}
map_fout << format(" 0x%|1$X|") % u;
map_fout << format(" # 0x%|1$X|") % i;
map_fout << endl;
}
} }
if(cnt > 0) if(cnt > 0)
{ {
script_fout << format("LoadEncodingFile(%1%, \"%2%\")") % (tmp_dir / (fn+".encoding")) % fn << endl; ff_load_encoding((tmp_dir / (fn+".encoding")).c_str(), fn.c_str());
script_fout << format("Reencode(\"%1%\", 1)") % fn << endl; ff_reencode(fn.c_str(), 1);
} }
if(ctu) if(ctu)
@ -336,42 +336,37 @@ void HTMLRenderer::embed_font(const path & filepath, GfxFont * font, FontInfo &
* Trying to unify them * Trying to unify them
*/ */
// Generate an intermediate ttf font in order to retrieve the metrics // Generate an intermediate ttf font in order to retrieve the metrics
add_tmp_file(fn + "_.ttf"); // TODO: see if we can get the values without save/load
script_fout << format("Generate(%1%)") % (tmp_dir / (fn + "_.ttf")) << endl;
script_fout << "Close()" << endl;
script_fout << format("Open(%1%, 1)") % (tmp_dir / (fn + "_.ttf")) << endl;
script_fout << ifstream(PDF2HTMLEX_DATA_PATH / UNIFY_SCRIPT_FILENAME).rdbuf();
script_fout << format("Generate(%1%)") % dest << endl;
script_fout.close();
if(system((boost::format("fontforge -script %1% 1>%2% 2>%3%") % script_path % (tmp_dir / (fn+".info")) % (tmp_dir / NULL_FILENAME)).str().c_str()) != 0) string tmp_fn = fn+"_.ttf";
cerr << "Warning: fontforge failed." << endl; add_tmp_file(tmp_fn);
auto tmp_path = tmp_dir / tmp_fn;
ff_save(tmp_path.c_str());
ff_close();
ff_load_font(tmp_path.c_str());
add_tmp_file(fn+".info"); int em = ff_get_em_size();
add_tmp_file(NULL_FILENAME); int ascent = ff_get_max_ascent();
int descent = ff_get_max_descent();
// read metric ff_set_ascent(ascent);
int em, ascent, descent; ff_set_descent(descent);
if(ifstream(tmp_dir / (fn+".info")) >> em >> ascent >> descent) ff_save(dest.c_str());
ff_close();
if(em != 0)
{ {
if(em != 0) info.ascent = ((double)ascent) / em;
{ info.descent = -((double)descent) / em;
info.ascent = ((double)ascent) / em;
info.descent = -((double)descent) / em;
}
else
{
info.ascent = 0;
info.descent = 0;
}
} }
else else
{ {
cerr << "Warning: cannot read font info for " << fn << endl; info.ascent = 0;
info.ascent = font->getAscent(); info.descent = 0;
info.descent = font->getDescent();
} }
// read metric
if(param->debug) if(param->debug)
{ {
cerr << "Ascent: " << info.ascent << " Descent: " << info.descent << endl; cerr << "Ascent: " << info.ascent << " Descent: " << info.descent << endl;

156
src/ff/ff.c Normal file
View File

@ -0,0 +1,156 @@
/*
* ff.c
*
* Processing fonts using Fontforge
*
* by WangLu
* 2012.09.03
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <fontforge.h>
#include <baseviews.h>
#include "ff.h"
SplineFont * cur_font = NULL;
static void err(const char * format, ...)
{
va_list al;
va_start(al, format);
vfprintf(stderr, format, al);
va_end(al);
exit(-1);
}
static char * strcopy(const char * str)
{
if(str == NULL) return NULL;
char * _ = strdup(str);
if(!_)
err("Not enough memory");
return _;
}
static int max(int a, int b)
{
return (a>b) ? a : b;
}
void ff_init(void)
{
InitSimpleStuff();
if ( default_encoding==NULL )
default_encoding=FindOrMakeEncoding("ISO8859-1");
if ( default_encoding==NULL )
default_encoding=&custom; /* In case iconv is broken */
}
void ff_load_font(const char * filename)
{
char * _filename = strcopy(filename);
cur_font = LoadSplineFont(_filename, 1);
free(_filename);
if(!cur_font)
err("Cannot load font %s\n", filename);
if(!cur_font->fv)
FVAppend(_FontViewCreate(cur_font));
}
void ff_load_encoding(const char * filename, const char * encname)
{
char * _filename = strcopy(filename);
char * _encname = strcopy(encname);
ParseEncodingFile(_filename, _encname);
free(_encname);
free(_filename);
}
void ff_reencode(const char * encname, int force)
{
Encoding * enc = FindOrMakeEncoding(encname);
if(!enc)
err("Unknown encoding %s\n", encname);
if(force)
{
SFForceEncoding(cur_font, cur_font->fv->map, enc);
}
else
{
EncMapFree(cur_font->fv->map);
cur_font->fv->map= EncMapFromEncoding(cur_font, enc);
}
SFReplaceEncodingBDFProps(cur_font, cur_font->fv->map);
}
void ff_cidflatten(void)
{
printf("cid flatten\n");
if(!cur_font->cidmaster)
err("Cannot flatten a non-CID font");
SFFlatten(cur_font->cidmaster);
}
void ff_save(const char * filename)
{
char * _filename = strcopy(filename);
char * _ = strcopy("");
int r = GenerateScript(cur_font, _filename
, _, -1, -1, NULL, NULL, cur_font->fv->map, NULL, ly_fore);
free(_);
free(_filename);
if(!r)
err("Cannot save font to %s\n", filename);
}
void ff_close(void)
{
FontViewClose(cur_font->fv);
cur_font = NULL;
}
int ff_get_em_size(void)
{
return (cur_font->pfminfo.os2_typoascent - cur_font->pfminfo.os2_typodescent);
}
int ff_get_max_ascent(void)
{
return max(cur_font->pfminfo.os2_winascent,
max(cur_font->pfminfo.os2_typoascent,
cur_font->pfminfo.hhead_ascent));
}
int ff_get_max_descent(void)
{
return max(cur_font->pfminfo.os2_windescent,
max(-cur_font->pfminfo.os2_typodescent,
-cur_font->pfminfo.hhead_descent));
}
void ff_set_ascent(int a)
{
cur_font->pfminfo.os2_winascent = a;
cur_font->pfminfo.os2_typoascent = a;
cur_font->pfminfo.hhead_ascent = a;
}
void ff_set_descent(int d)
{
cur_font->pfminfo.os2_windescent = d;
cur_font->pfminfo.os2_typodescent = -d;
cur_font->pfminfo.hhead_descent = -d;
}

32
src/ff/ff.h Normal file
View File

@ -0,0 +1,32 @@
/*
* ff.h
*
* Processing fonts using Fontforge
*
* fontforge.h cannot be included in C++
* So this wrapper in C publishes several functions we need
*
* by WangLu
* 2012.09.03
*/
#ifdef __cplusplus
extern "C" {
#endif
void ff_init(void);
void ff_load_font(const char * filename);
void ff_load_encoding(const char * filename, const char * encname);
void ff_reencode(const char * encname, int force);
void ff_cidflatten(void);
void ff_save(const char * filename);
void ff_close(void);
int ff_get_em_size(void);
int ff_get_max_ascent(void);
int ff_get_max_descent(void);
void ff_set_ascent(int a);
void ff_set_descent(int d);
#ifdef __cplusplus
}
#endif