1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 13:00:08 +00:00

support external font

This commit is contained in:
Lu Wang 2012-08-10 21:30:41 +08:00
parent 86578b4c67
commit 5a4eccc632
6 changed files with 179 additions and 117 deletions

View File

@ -17,7 +17,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb") #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb")
add_executable(pdftohtmlEX src/pdftohtmlEX.cc src/HTMLRenderer.cc src/HTMLRenderer.h src/BackgroundRenderer.cc src/BackgroundRenderer.h src/Consts.h) add_executable(pdftohtmlEX src/pdftohtmlEX.cc src/HTMLRenderer.cc src/HTMLRenderer.h src/BackgroundRenderer.cc src/BackgroundRenderer.h src/Consts.h src/Consts.cc src/util.h)
target_link_libraries(pdftohtmlEX poppler boost_program_options) target_link_libraries(pdftohtmlEX poppler boost_program_options)

85
src/Consts.cc Normal file
View File

@ -0,0 +1,85 @@
/*
* Constants
*
* by WangLu
* 2012.08.10
*/
#include "Consts.h"
const double EPS = 1e-6;
const char * HTML_HEAD = "<!DOCTYPE html>\n\
<html><head>\
<meta charset=\"utf-8\">\
<style type=\"text/css\">\
#pdf-main {\
font-family: sans-serif;\
position:absolute;\
top:0;\
left:0;\
bottom:0;\
right:0;\
overflow:auto;\
background-color:grey;\
}\
#pdf-main > .p {\
position:relative;\
margin:13px auto;\
background-color:white;\
overflow:hidden;\
display:none;\
}\
.p > .l {\
position:absolute; \
white-space:pre;\
}\
.l > .w {\
display:inline-block;\
visibility:hidden;\
}\
::selection{\
background: rgba(168,209,255,0.5);\
}\
::-moz-selection{\
background: rgba(168,209,255,0.5);\
}\
</style><link rel=\"stylesheet\" type=\"text/css\" href=\"all.css\" />\
<script type=\"text/javascript\">\
function show_pages()\
{\
var pages = document.getElementById('pdf-main').childNodes;\
var idx = 0;\
var f = function(){\
if (idx < pages.length) {\
try{\
pages[idx].style.display='block';\
}catch(e){}\
++idx;\
setTimeout(f,100);\
}\
};\
f();\
};\
</script>\
</head><body onload=\"show_pages();\"><div id=\"pdf-main\">";
const char * HTML_TAIL = "</div></body></html>";
const std::map<std::string, std::string> BASE_14_FONT_CSS_FONT_MAP({\
{ "Courier", "Courier,monospace" },\
{ "Helvetica", "Helvetica,Arial,\"Nimbus Sans L\",sans-serif" },\
{ "Times", "Times,\"Time New Roman\",\"Nimbus Roman No9 L\",serif" },\
{ "Symbol", "Symbol,\"Standard Symbols L\"" },\
{ "ZapfDingbats", "ZapfDingbats,\"Dingbats\"" },\
});
const double id_matrix[6] = {1.0, 0.0, 0.0, 1.0, 0.0, 0.0};
const std::map<std::string, std::string> GB_ENCODED_FONT_NAME_MAP({\
{"\xCB\xCE\xCC\xE5", "SimSun"},\
{"\xBA\xDA\xCC\xE5", "SimHei"},\
{"\xBF\xAC\xCC\xE5_GB2312", "SimKai"},\
{"\xB7\xC2\xCB\xCE_GB2312", "SimFang"},\
{"\xC1\xA5\xCA\xE9", "SimLi"},\
});

View File

@ -9,72 +9,17 @@
#define CONSTS_H__ #define CONSTS_H__
#include <string> #include <string>
#include <map> #include <map>
#include <string>
const char * HTML_HEAD = "<!DOCTYPE html>\n\ extern const double EPS;
<html><head>\
<meta charset=\"utf-8\">\
<style type=\"text/css\">\
#pdf-main {\
font-family: sans-serif;\
position:absolute;\
top:0;\
left:0;\
bottom:0;\
right:0;\
overflow:auto;\
background-color:grey;\
}\
#pdf-main > .p {\
position:relative;\
margin:13px auto;\
background-color:white;\
overflow:hidden;\
display:none;\
}\
.p > .l {\
position:absolute; \
white-space:pre;\
}\
.l > .w {\
display:inline-block;\
visibility:hidden;\
}\
::selection{\
background: rgba(168,209,255,0.5);\
}\
::-moz-selection{\
background: rgba(168,209,255,0.5);\
}\
</style><link rel=\"stylesheet\" type=\"text/css\" href=\"all.css\" />\
<script type=\"text/javascript\">\
function show_pages()\
{\
var pages = document.getElementById('pdf-main').childNodes;\
var idx = 0;\
var f = function(){\
if (idx < pages.length) {\
try{\
pages[idx].style.display='block';\
}catch(e){}\
++idx;\
setTimeout(f,100);\
}\
};\
f();\
};\
</script>\
</head><body onload=\"show_pages();\"><div id=\"pdf-main\">";
const char * HTML_TAIL = "</div></body></html>"; extern const char * HTML_HEAD;
extern const char * HTML_TAIL;
const std::map<string, string> BASE_14_FONT_CSS_FONT_MAP({\ extern const std::map<std::string, std::string> BASE_14_FONT_CSS_FONT_MAP;
{ "Courier", "Courier,monospace" },\
{ "Helvetica", "Helvetica,Arial,\"Nimbus Sans L\",sans-serif" },\
{ "Times", "Times,\"Time New Roman\",\"Nimbus Roman No9 L\",serif" },\
{ "Symbol", "Symbol,\"Standard Symbols L\"" },\
{ "ZapfDingbats", "ZapfDingbats,\"Dingbats\"" },\
});
const double id_matrix[6] = {1.0, 0.0, 0.0, 1.0, 0.0, 0.0}; extern const double id_matrix[6];
extern const std::map<std::string, std::string> GB_ENCODED_FONT_NAME_MAP;
#endif //CONSTS_H__ #endif //CONSTS_H__

View File

@ -18,7 +18,6 @@
#include <boost/algorithm/string.hpp> #include <boost/algorithm/string.hpp>
#include <GfxFont.h> #include <GfxFont.h>
#include <UTF8.h>
#include <fofi/FoFiType1C.h> #include <fofi/FoFiType1C.h>
#include <fofi/FoFiTrueType.h> #include <fofi/FoFiTrueType.h>
#include <splash/SplashBitmap.h> #include <splash/SplashBitmap.h>
@ -27,6 +26,7 @@
#include "HTMLRenderer.h" #include "HTMLRenderer.h"
#include "BackgroundRenderer.h" #include "BackgroundRenderer.h"
#include "Consts.h" #include "Consts.h"
#include "util.h"
/* /*
* CSS classes * CSS classes
@ -163,37 +163,6 @@ void HTMLRenderer::close_cur_line()
} }
} }
void HTMLRenderer::outputUnicodes(const Unicode * u, int uLen)
{
for(int i = 0; i < uLen; ++i)
{
switch(u[i])
{
case '&':
html_fout << "&amp;";
break;
case '\"':
html_fout << "&quot;";
break;
case '\'':
html_fout << "&apos;";
break;
case '<':
html_fout << "&lt;";
break;
case '>':
html_fout << "&gt;";
break;
default:
{
char buf[4];
auto n = mapUTF8(u[i], buf, 4);
html_fout.write(buf, n);
}
}
}
}
void HTMLRenderer::updateAll(GfxState * state) void HTMLRenderer::updateAll(GfxState * state)
{ {
all_changed = true; all_changed = true;
@ -359,12 +328,12 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
{ {
Unicode u = (c&0xff); Unicode u = (c&0xff);
c >>= 8; c >>= 8;
outputUnicodes(&u, 1); outputUnicodes(html_fout, &u, 1);
} }
} }
else else
{ {
outputUnicodes(u, uLen); outputUnicodes(html_fout, u, uLen);
} }
dx += dx1; dx += dx1;
@ -436,8 +405,7 @@ long long HTMLRenderer::install_font(GfxFont * font)
install_embedded_font(font, new_fn_id); install_embedded_font(font, new_fn_id);
break; break;
case gfxFontLocExternal: case gfxFontLocExternal:
std::cerr << "TODO: external font" << std::endl; install_external_font(font, new_fn_id);
export_remote_default_font(new_fn_id);
break; break;
case gfxFontLocResident: case gfxFontLocResident:
install_base_font(font, font_loc, new_fn_id); install_base_font(font, font_loc, new_fn_id);
@ -502,8 +470,20 @@ void HTMLRenderer::install_embedded_font (GfxFont * font, long long fn_id)
export_remote_font(fn_id, ".ttf", "truetype", font); export_remote_font(fn_id, ".ttf", "truetype", font);
} }
void HTMLRenderer::install_external_font( GfxFont * font, long long fn_id)
{
std::string fontname(font->getName()->getCString());
// resolve bad encodings in GB
auto iter = GB_ENCODED_FONT_NAME_MAP.find(fontname);
if(iter != GB_ENCODED_FONT_NAME_MAP.end())
fontname = iter->second;
export_local_font(fn_id, font, fontname, "");
}
void HTMLRenderer::install_base_font( GfxFont * font, GfxFontLoc * font_loc, long long fn_id) void HTMLRenderer::install_base_font(GfxFont * font, GfxFontLoc * font_loc, long long fn_id)
{ {
std::string psname(font_loc->path->getCString()); std::string psname(font_loc->path->getCString());
string basename = psname.substr(0, psname.find('-')); string basename = psname.substr(0, psname.find('-'));
@ -517,7 +497,7 @@ void HTMLRenderer::install_base_font( GfxFont * font, GfxFontLoc * font_loc, lon
else else
cssfont = iter->second; cssfont = iter->second;
export_local_font(fn_id, font, font_loc, psname, cssfont); export_local_font(fn_id, font, psname, cssfont);
} }
long long HTMLRenderer::install_font_size(double font_size) long long HTMLRenderer::install_font_size(double font_size)
@ -601,7 +581,7 @@ void HTMLRenderer::export_remote_default_font(long long fn_id)
allcss_fout << endl; allcss_fout << endl;
} }
void HTMLRenderer::export_local_font(long long fn_id, GfxFont * font, GfxFontLoc * font_loc, const string & original_font_name, const string & cssfont) void HTMLRenderer::export_local_font(long long fn_id, GfxFont * font, const string & original_font_name, const string & cssfont)
{ {
allcss_fout << boost::format(".f%|1$x|{") % fn_id; allcss_fout << boost::format(".f%|1$x|{") % fn_id;
allcss_fout << "font-family:" << ((cssfont == "") ? (original_font_name+","+general_font_family(font)) : cssfont) << ";"; allcss_fout << "font-family:" << ((cssfont == "") ? (original_font_name+","+general_font_family(font)) : cssfont) << ";";

View File

@ -29,20 +29,10 @@
#include <GfxFont.h> #include <GfxFont.h>
#include "Param.h" #include "Param.h"
#include "util.h"
using namespace std; using namespace std;
static const double EPS = 1e-6;
inline bool _equal(double x, double y) { return std::abs(x-y) < EPS; }
inline bool _is_positive(double x) { return x > EPS; }
inline bool _tm_equal(const double * tm1, const double * tm2, int size = 6)
{
for(int i = 0; i < size; ++i)
if(!_equal(tm1[i], tm2[i]))
return false;
return true;
}
class HTMLRenderer : public OutputDev class HTMLRenderer : public OutputDev
{ {
public: public:
@ -106,13 +96,12 @@ class HTMLRenderer : public OutputDev
private: private:
void close_cur_line(); void close_cur_line();
void outputUnicodes(const Unicode * u, int uLen);
// return the mapped font name // return the mapped font name
long long install_font(GfxFont * font); long long install_font(GfxFont * font);
static void output_to_file(void * outf, const char * data, int len); static void output_to_file(void * outf, const char * data, int len);
void install_embedded_font (GfxFont * font, long long fn_id); void install_embedded_font (GfxFont * font, long long fn_id);
void install_external_font (GfxFont * font, long long fn_id);
void install_base_font(GfxFont * font, GfxFontLoc * font_loc, long long fn_id); void install_base_font(GfxFont * font, GfxFontLoc * font_loc, long long fn_id);
long long install_font_size(double font_size); long long install_font_size(double font_size);
@ -126,7 +115,7 @@ class HTMLRenderer : public OutputDev
*/ */
void export_remote_font(long long fn_id, const string & suffix, const string & format, GfxFont * font); void export_remote_font(long long fn_id, const string & suffix, const string & format, GfxFont * font);
void export_remote_default_font(long long fn_id); void export_remote_default_font(long long fn_id);
void export_local_font(long long fn_id, GfxFont * font, GfxFontLoc * font_loc, const string & original_font_name, const string & cssfont); void export_local_font(long long fn_id, GfxFont * font, const string & original_font_name, const string & cssfont);
std::string general_font_family(GfxFont * font); std::string general_font_family(GfxFont * font);
void export_font_size(long long fs_id, double font_size); void export_font_size(long long fs_id, double font_size);

63
src/util.h Normal file
View File

@ -0,0 +1,63 @@
/*
* Misc functions
*
*
* by WangLu
* 2012.08.10
*/
#ifndef UTIL_H__
#define UTIL_H__
#include <algorithm>
#include <ostream>
#include <UTF8.h>
#include "Consts.h"
static inline bool _equal(double x, double y) { return std::abs(x-y) < EPS; }
static inline bool _is_positive(double x) { return x > EPS; }
static inline bool _tm_equal(const double * tm1, const double * tm2, int size = 6)
{
for(int i = 0; i < size; ++i)
if(!_equal(tm1[i], tm2[i]))
return false;
return true;
}
static inline void outputUnicodes(std::ostream & out, const Unicode * u, int uLen)
{
for(int i = 0; i < uLen; ++i)
{
switch(u[i])
{
case '&':
out << "&amp;";
break;
case '\"':
out << "&quot;";
break;
case '\'':
out << "&apos;";
break;
case '<':
out << "&lt;";
break;
case '>':
out << "&gt;";
break;
default:
{
char buf[4];
auto n = mapUTF8(u[i], buf, 4);
out.write(buf, n);
}
}
}
}
#endif //UTIL_H__