1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 13:00:08 +00:00

support external font

This commit is contained in:
Lu Wang 2012-08-10 21:30:41 +08:00
parent 86578b4c67
commit 5a4eccc632
6 changed files with 179 additions and 117 deletions

View File

@ -17,7 +17,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb")
add_executable(pdftohtmlEX src/pdftohtmlEX.cc src/HTMLRenderer.cc src/HTMLRenderer.h src/BackgroundRenderer.cc src/BackgroundRenderer.h src/Consts.h)
add_executable(pdftohtmlEX src/pdftohtmlEX.cc src/HTMLRenderer.cc src/HTMLRenderer.h src/BackgroundRenderer.cc src/BackgroundRenderer.h src/Consts.h src/Consts.cc src/util.h)
target_link_libraries(pdftohtmlEX poppler boost_program_options)

85
src/Consts.cc Normal file
View File

@ -0,0 +1,85 @@
/*
* Constants
*
* by WangLu
* 2012.08.10
*/
#include "Consts.h"
const double EPS = 1e-6;
const char * HTML_HEAD = "<!DOCTYPE html>\n\
<html><head>\
<meta charset=\"utf-8\">\
<style type=\"text/css\">\
#pdf-main {\
font-family: sans-serif;\
position:absolute;\
top:0;\
left:0;\
bottom:0;\
right:0;\
overflow:auto;\
background-color:grey;\
}\
#pdf-main > .p {\
position:relative;\
margin:13px auto;\
background-color:white;\
overflow:hidden;\
display:none;\
}\
.p > .l {\
position:absolute; \
white-space:pre;\
}\
.l > .w {\
display:inline-block;\
visibility:hidden;\
}\
::selection{\
background: rgba(168,209,255,0.5);\
}\
::-moz-selection{\
background: rgba(168,209,255,0.5);\
}\
</style><link rel=\"stylesheet\" type=\"text/css\" href=\"all.css\" />\
<script type=\"text/javascript\">\
function show_pages()\
{\
var pages = document.getElementById('pdf-main').childNodes;\
var idx = 0;\
var f = function(){\
if (idx < pages.length) {\
try{\
pages[idx].style.display='block';\
}catch(e){}\
++idx;\
setTimeout(f,100);\
}\
};\
f();\
};\
</script>\
</head><body onload=\"show_pages();\"><div id=\"pdf-main\">";
const char * HTML_TAIL = "</div></body></html>";
const std::map<std::string, std::string> BASE_14_FONT_CSS_FONT_MAP({\
{ "Courier", "Courier,monospace" },\
{ "Helvetica", "Helvetica,Arial,\"Nimbus Sans L\",sans-serif" },\
{ "Times", "Times,\"Time New Roman\",\"Nimbus Roman No9 L\",serif" },\
{ "Symbol", "Symbol,\"Standard Symbols L\"" },\
{ "ZapfDingbats", "ZapfDingbats,\"Dingbats\"" },\
});
const double id_matrix[6] = {1.0, 0.0, 0.0, 1.0, 0.0, 0.0};
const std::map<std::string, std::string> GB_ENCODED_FONT_NAME_MAP({\
{"\xCB\xCE\xCC\xE5", "SimSun"},\
{"\xBA\xDA\xCC\xE5", "SimHei"},\
{"\xBF\xAC\xCC\xE5_GB2312", "SimKai"},\
{"\xB7\xC2\xCB\xCE_GB2312", "SimFang"},\
{"\xC1\xA5\xCA\xE9", "SimLi"},\
});

View File

@ -9,72 +9,17 @@
#define CONSTS_H__
#include <string>
#include <map>
#include <string>
const char * HTML_HEAD = "<!DOCTYPE html>\n\
<html><head>\
<meta charset=\"utf-8\">\
<style type=\"text/css\">\
#pdf-main {\
font-family: sans-serif;\
position:absolute;\
top:0;\
left:0;\
bottom:0;\
right:0;\
overflow:auto;\
background-color:grey;\
}\
#pdf-main > .p {\
position:relative;\
margin:13px auto;\
background-color:white;\
overflow:hidden;\
display:none;\
}\
.p > .l {\
position:absolute; \
white-space:pre;\
}\
.l > .w {\
display:inline-block;\
visibility:hidden;\
}\
::selection{\
background: rgba(168,209,255,0.5);\
}\
::-moz-selection{\
background: rgba(168,209,255,0.5);\
}\
</style><link rel=\"stylesheet\" type=\"text/css\" href=\"all.css\" />\
<script type=\"text/javascript\">\
function show_pages()\
{\
var pages = document.getElementById('pdf-main').childNodes;\
var idx = 0;\
var f = function(){\
if (idx < pages.length) {\
try{\
pages[idx].style.display='block';\
}catch(e){}\
++idx;\
setTimeout(f,100);\
}\
};\
f();\
};\
</script>\
</head><body onload=\"show_pages();\"><div id=\"pdf-main\">";
extern const double EPS;
const char * HTML_TAIL = "</div></body></html>";
extern const char * HTML_HEAD;
extern const char * HTML_TAIL;
const std::map<string, string> BASE_14_FONT_CSS_FONT_MAP({\
{ "Courier", "Courier,monospace" },\
{ "Helvetica", "Helvetica,Arial,\"Nimbus Sans L\",sans-serif" },\
{ "Times", "Times,\"Time New Roman\",\"Nimbus Roman No9 L\",serif" },\
{ "Symbol", "Symbol,\"Standard Symbols L\"" },\
{ "ZapfDingbats", "ZapfDingbats,\"Dingbats\"" },\
});
extern const std::map<std::string, std::string> BASE_14_FONT_CSS_FONT_MAP;
const double id_matrix[6] = {1.0, 0.0, 0.0, 1.0, 0.0, 0.0};
extern const double id_matrix[6];
extern const std::map<std::string, std::string> GB_ENCODED_FONT_NAME_MAP;
#endif //CONSTS_H__

View File

@ -18,7 +18,6 @@
#include <boost/algorithm/string.hpp>
#include <GfxFont.h>
#include <UTF8.h>
#include <fofi/FoFiType1C.h>
#include <fofi/FoFiTrueType.h>
#include <splash/SplashBitmap.h>
@ -27,6 +26,7 @@
#include "HTMLRenderer.h"
#include "BackgroundRenderer.h"
#include "Consts.h"
#include "util.h"
/*
* CSS classes
@ -163,37 +163,6 @@ void HTMLRenderer::close_cur_line()
}
}
void HTMLRenderer::outputUnicodes(const Unicode * u, int uLen)
{
for(int i = 0; i < uLen; ++i)
{
switch(u[i])
{
case '&':
html_fout << "&amp;";
break;
case '\"':
html_fout << "&quot;";
break;
case '\'':
html_fout << "&apos;";
break;
case '<':
html_fout << "&lt;";
break;
case '>':
html_fout << "&gt;";
break;
default:
{
char buf[4];
auto n = mapUTF8(u[i], buf, 4);
html_fout.write(buf, n);
}
}
}
}
void HTMLRenderer::updateAll(GfxState * state)
{
all_changed = true;
@ -359,12 +328,12 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
{
Unicode u = (c&0xff);
c >>= 8;
outputUnicodes(&u, 1);
outputUnicodes(html_fout, &u, 1);
}
}
else
{
outputUnicodes(u, uLen);
outputUnicodes(html_fout, u, uLen);
}
dx += dx1;
@ -436,8 +405,7 @@ long long HTMLRenderer::install_font(GfxFont * font)
install_embedded_font(font, new_fn_id);
break;
case gfxFontLocExternal:
std::cerr << "TODO: external font" << std::endl;
export_remote_default_font(new_fn_id);
install_external_font(font, new_fn_id);
break;
case gfxFontLocResident:
install_base_font(font, font_loc, new_fn_id);
@ -503,6 +471,18 @@ void HTMLRenderer::install_embedded_font (GfxFont * font, long long fn_id)
export_remote_font(fn_id, ".ttf", "truetype", font);
}
void HTMLRenderer::install_external_font( GfxFont * font, long long fn_id)
{
std::string fontname(font->getName()->getCString());
// resolve bad encodings in GB
auto iter = GB_ENCODED_FONT_NAME_MAP.find(fontname);
if(iter != GB_ENCODED_FONT_NAME_MAP.end())
fontname = iter->second;
export_local_font(fn_id, font, fontname, "");
}
void HTMLRenderer::install_base_font(GfxFont * font, GfxFontLoc * font_loc, long long fn_id)
{
std::string psname(font_loc->path->getCString());
@ -517,7 +497,7 @@ void HTMLRenderer::install_base_font( GfxFont * font, GfxFontLoc * font_loc, lon
else
cssfont = iter->second;
export_local_font(fn_id, font, font_loc, psname, cssfont);
export_local_font(fn_id, font, psname, cssfont);
}
long long HTMLRenderer::install_font_size(double font_size)
@ -601,7 +581,7 @@ void HTMLRenderer::export_remote_default_font(long long fn_id)
allcss_fout << endl;
}
void HTMLRenderer::export_local_font(long long fn_id, GfxFont * font, GfxFontLoc * font_loc, const string & original_font_name, const string & cssfont)
void HTMLRenderer::export_local_font(long long fn_id, GfxFont * font, const string & original_font_name, const string & cssfont)
{
allcss_fout << boost::format(".f%|1$x|{") % fn_id;
allcss_fout << "font-family:" << ((cssfont == "") ? (original_font_name+","+general_font_family(font)) : cssfont) << ";";

View File

@ -29,20 +29,10 @@
#include <GfxFont.h>
#include "Param.h"
#include "util.h"
using namespace std;
static const double EPS = 1e-6;
inline bool _equal(double x, double y) { return std::abs(x-y) < EPS; }
inline bool _is_positive(double x) { return x > EPS; }
inline bool _tm_equal(const double * tm1, const double * tm2, int size = 6)
{
for(int i = 0; i < size; ++i)
if(!_equal(tm1[i], tm2[i]))
return false;
return true;
}
class HTMLRenderer : public OutputDev
{
public:
@ -106,13 +96,12 @@ class HTMLRenderer : public OutputDev
private:
void close_cur_line();
void outputUnicodes(const Unicode * u, int uLen);
// return the mapped font name
long long install_font(GfxFont * font);
static void output_to_file(void * outf, const char * data, int len);
void install_embedded_font (GfxFont * font, long long fn_id);
void install_external_font (GfxFont * font, long long fn_id);
void install_base_font(GfxFont * font, GfxFontLoc * font_loc, long long fn_id);
long long install_font_size(double font_size);
@ -126,7 +115,7 @@ class HTMLRenderer : public OutputDev
*/
void export_remote_font(long long fn_id, const string & suffix, const string & format, GfxFont * font);
void export_remote_default_font(long long fn_id);
void export_local_font(long long fn_id, GfxFont * font, GfxFontLoc * font_loc, const string & original_font_name, const string & cssfont);
void export_local_font(long long fn_id, GfxFont * font, const string & original_font_name, const string & cssfont);
std::string general_font_family(GfxFont * font);
void export_font_size(long long fs_id, double font_size);

63
src/util.h Normal file
View File

@ -0,0 +1,63 @@
/*
* Misc functions
*
*
* by WangLu
* 2012.08.10
*/
#ifndef UTIL_H__
#define UTIL_H__
#include <algorithm>
#include <ostream>
#include <UTF8.h>
#include "Consts.h"
static inline bool _equal(double x, double y) { return std::abs(x-y) < EPS; }
static inline bool _is_positive(double x) { return x > EPS; }
static inline bool _tm_equal(const double * tm1, const double * tm2, int size = 6)
{
for(int i = 0; i < size; ++i)
if(!_equal(tm1[i], tm2[i]))
return false;
return true;
}
static inline void outputUnicodes(std::ostream & out, const Unicode * u, int uLen)
{
for(int i = 0; i < uLen; ++i)
{
switch(u[i])
{
case '&':
out << "&amp;";
break;
case '\"':
out << "&quot;";
break;
case '\'':
out << "&apos;";
break;
case '<':
out << "&lt;";
break;
case '>':
out << "&gt;";
break;
default:
{
char buf[4];
auto n = mapUTF8(u[i], buf, 4);
out.write(buf, n);
}
}
}
}
#endif //UTIL_H__