1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 04:50:09 +00:00

merge devv

This commit is contained in:
Lu Wang 2012-08-12 19:00:32 +08:00
commit 5dee117b4c
10 changed files with 494 additions and 182 deletions

View File

@ -14,10 +14,10 @@ SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wunused-function")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb")
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb")
add_executable(pdftohtmlEX src/pdftohtmlEX.cc src/HTMLRenderer.cc src/HTMLRenderer.h src/BackgroundRenderer.cc src/BackgroundRenderer.h src/Consts.h)
add_executable(pdftohtmlEX src/pdftohtmlEX.cc src/HTMLRenderer.cc src/HTMLRenderer.h src/BackgroundRenderer.cc src/BackgroundRenderer.h src/Consts.h src/Consts.cc src/util.h)
target_link_libraries(pdftohtmlEX poppler boost_program_options)

View File

@ -49,6 +49,7 @@ GPLv3
We would like to acknowledge the following projects that have been consulted while writing this program:
* pdftops & pdftohtml from poppler
* MuPDF
* PDF.js
* Crocodoc
* Google Doc

View File

@ -1,6 +1,12 @@
#!/bin/bash
set -e
TMPDIR=/tmp/pdf2htmlEX
# prepare the temporary directory
test -d $TMPDIR || mkdir -p $TMPDIR
rm -f $TMPDIR/* 2>/dev/null
# Get directory of the script
SOURCE="${BASH_SOURCE[0]}"
while [ -h "$SOURCE" ] ; do SOURCE="$(readlink "$SOURCE")"; done
@ -8,13 +14,13 @@ SCRIPT_DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"/
# Execute
${SCRIPT_DIR}/pdftohtmlEX $*
if [ -f convert.pe ]; then
if [ -f $TMPDIR/convert.pe ]; then
echo -n "Converting fonts: "
fontforge -script convert.pe 2>/dev/null
fontforge -script $TMPDIR/convert.pe 2>/dev/null
echo "."
rm convert.pe
fi
rm *.encoding 2>/dev/null
#clean
#rm -f $TMPDIR/* 2>/dev/null
echo "Done."

89
src/Consts.cc Normal file
View File

@ -0,0 +1,89 @@
/*
* Constants
*
* by WangLu
* 2012.08.10
*/
#include "Consts.h"
const double EPS = 1e-6;
const std::string HTML_HEAD = "<!DOCTYPE html>\n\
<html><head>\
<meta charset=\"utf-8\">\
<style type=\"text/css\">\
#pdf-main {\
font-family: sans-serif;\
position:absolute;\
top:0;\
left:0;\
bottom:0;\
right:0;\
overflow:auto;\
background-color:grey;\
}\
#pdf-main > .p {\
position:relative;\
margin:13px auto;\
background-color:white;\
overflow:hidden;\
display:none;\
}\
.p > .l {\
position:absolute; \
white-space:pre;\
}\
.l > .w {\
display:inline-block;\
visibility:hidden;\
}\
::selection{\
background: rgba(168,209,255,0.5);\
}\
::-moz-selection{\
background: rgba(168,209,255,0.5);\
}\
</style><link rel=\"stylesheet\" type=\"text/css\" href=\"all.css\" />\
<script type=\"text/javascript\">\
function show_pages()\
{\
var pages = document.getElementById('pdf-main').childNodes;\
var idx = 0;\
var f = function(){\
if (idx < pages.length) {\
try{\
pages[idx].style.display='block';\
}catch(e){}\
++idx;\
setTimeout(f,100);\
}\
};\
f();\
};\
</script>\
</head><body onload=\"show_pages();\"><div id=\"pdf-main\">";
const std::string HTML_TAIL = "</div></body></html>";
const std::string TMP_DIR = "/tmp/pdf2htmlEX";
const std::map<std::string, std::string> BASE_14_FONT_CSS_FONT_MAP({\
{ "Courier", "Courier,monospace" },\
{ "Helvetica", "Helvetica,Arial,\"Nimbus Sans L\",sans-serif" },\
{ "Times", "Times,\"Time New Roman\",\"Nimbus Roman No9 L\",serif" },\
{ "Symbol", "Symbol,\"Standard Symbols L\"" },\
{ "ZapfDingbats", "ZapfDingbats,\"Dingbats\"" },\
});
const double id_matrix[6] = {1.0, 0.0, 0.0, 1.0, 0.0, 0.0};
const std::map<std::string, std::string> GB_ENCODED_FONT_NAME_MAP({\
{"\xCB\xCE\xCC\xE5", "SimSun"},\
{"\xBA\xDA\xCC\xE5", "SimHei"},\
{"\xBF\xAC\xCC\xE5_GB2312", "SimKai"},\
{"\xB7\xC2\xCB\xCE_GB2312", "SimFang"},\
{"\xC1\xA5\xCA\xE9", "SimLi"},\
});

View File

@ -9,72 +9,19 @@
#define CONSTS_H__
#include <string>
#include <map>
#include <string>
const char * HTML_HEAD = "<!DOCTYPE html>\n\
<html><head>\
<meta charset=\"utf-8\">\
<style type=\"text/css\">\
#pdf-main {\
font-family: sans-serif;\
position:absolute;\
top:0;\
left:0;\
bottom:0;\
right:0;\
overflow:auto;\
background-color:grey;\
}\
#pdf-main > .p {\
position:relative;\
margin:13px auto;\
background-color:white;\
overflow:hidden;\
display:none;\
}\
.p > .l {\
position:absolute; \
white-space:pre;\
}\
.l > .w {\
display:inline-block;\
visibility:hidden;\
}\
::selection{\
background: rgba(168,209,255,0.5);\
}\
::-moz-selection{\
background: rgba(168,209,255,0.5);\
}\
</style><link rel=\"stylesheet\" type=\"text/css\" href=\"all.css\" />\
<script type=\"text/javascript\">\
function show_pages()\
{\
var pages = document.getElementById('pdf-main').childNodes;\
var idx = 0;\
var f = function(){\
if (idx < pages.length) {\
try{\
pages[idx].style.display='block';\
}catch(e){}\
++idx;\
setTimeout(f,100);\
}\
};\
f();\
};\
</script>\
</head><body onload=\"show_pages();\"><div id=\"pdf-main\">";
extern const double EPS;
const char * HTML_TAIL = "</div></body></html>";
extern const std::string HTML_HEAD;
extern const std::string HTML_TAIL;
const std::map<string, string> BASE_14_FONT_CSS_FONT_MAP({\
{ "Courier", "Courier,monospace" },\
{ "Helvetica", "Helvetica,Arial,\"Nimbus Sans L\",sans-serif" },\
{ "Times", "Times,\"Time New Roman\",\"Nimbus Roman No9 L\",serif" },\
{ "Symbol", "Symbol,\"Standard Symbols L\"" },\
{ "ZapfDingbats", "ZapfDingbats,\"Dingbats\"" },\
});
extern const std::string TMP_DIR;
const double id_matrix[6] = {1.0, 0.0, 0.0, 1.0, 0.0, 0.0};
extern const std::map<std::string, std::string> BASE_14_FONT_CSS_FONT_MAP;
extern const double id_matrix[6];
extern const std::map<std::string, std::string> GB_ENCODED_FONT_NAME_MAP;
#endif //CONSTS_H__

View File

@ -14,19 +14,24 @@
#include <cassert>
#include <fstream>
#include <algorithm>
#include <boost/format.hpp>
#include <boost/algorithm/string.hpp>
// for gil bug
const int *int_p_NULL = nullptr;
#include <boost/gil/gil_all.hpp>
#include <boost/gil/extension/io/png_dynamic_io.hpp>
#include <GfxFont.h>
#include <UTF8.h>
#include <CharCodeToUnicode.h>
#include <fofi/FoFiType1C.h>
#include <fofi/FoFiTrueType.h>
#include <splash/SplashBitmap.h>
#include <CharCodeToUnicode.h>
#include "HTMLRenderer.h"
#include "BackgroundRenderer.h"
#include "Consts.h"
#include "util.h"
/*
* CSS classes
@ -50,7 +55,8 @@ HTMLRenderer::HTMLRenderer(const Param * param)
:line_opened(false)
,html_fout(param->output_filename.c_str(), ofstream::binary)
,allcss_fout("all.css")
,fontscript_fout("convert.pe")
,fontscript_fout(TMP_DIR+"/convert.pe")
,image_count(0)
,param(param)
{
// install default font & size
@ -102,10 +108,10 @@ void HTMLRenderer::process(PDFDoc *doc)
for(int i = param->first_page; i <= param->last_page ; ++i)
{
doc->displayPage(bg_renderer, i, 4*param->h_dpi, 4*param->v_dpi,
doc->displayPage(bg_renderer, i, param->h_dpi2, param->v_dpi2,
0, true, false, false,
nullptr, nullptr, nullptr, nullptr);
bg_renderer->getBitmap()->writeImgFile(splashFormatPng, (char*)(boost::format("p%|1$x|.png")%i).str().c_str(), 4*param->h_dpi, 4*param->v_dpi);
bg_renderer->getBitmap()->writeImgFile(splashFormatPng, (char*)(boost::format("p%|1$x|.png")%i).str().c_str(), param->h_dpi2, param->v_dpi2);
std::cerr << ".";
std::cerr.flush();
@ -163,37 +169,6 @@ void HTMLRenderer::close_cur_line()
}
}
void HTMLRenderer::outputUnicodes(const Unicode * u, int uLen)
{
for(int i = 0; i < uLen; ++i)
{
switch(u[i])
{
case '&':
html_fout << "&amp;";
break;
case '\"':
html_fout << "&quot;";
break;
case '\'':
html_fout << "&apos;";
break;
case '<':
html_fout << "&lt;";
break;
case '>':
html_fout << "&gt;";
break;
default:
{
char buf[4];
auto n = mapUTF8(u[i], buf, 4);
html_fout.write(buf, n);
}
}
}
}
void HTMLRenderer::updateAll(GfxState * state)
{
all_changed = true;
@ -308,11 +283,13 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
//real pos & hori_scale
if(0)
{
#if 0
html_fout << "\"";
double x,y;
state->transform(state->getCurX(), state->getCurY(), &x, &y);
html_fout << boost::format("data-lx=\"%5%\" data-ly=\"%6%\" data-drawscale=\"%4%\" data-x=\"%1%\" data-y=\"%2%\" data-hs=\"%3%")
%x%y%(state->getHorizScaling())%draw_scale%state->getLineX()%state->getLineY();
#endif
}
html_fout << "\">";
@ -348,7 +325,28 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
std::cerr << "TODO: non-zero origins" << std::endl;
}
outputUnicodes(u, uLen);
if(uLen == 0)
{
// TODO
#if 0
CharCode c = 0;
for(int i = 0; i < n; ++i)
{
c = (c<<8) | (code&0xff);
code >>= 8;
}
for(int i = 0; i < n; ++i)
{
Unicode u = (c&0xff);
c >>= 8;
outputUnicodes(html_fout, &u, 1);
}
#endif
}
else
{
outputUnicodes(html_fout, u, uLen);
}
dx += dx1;
dy += dy1;
@ -376,6 +374,41 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
draw_ty += dy;
}
void HTMLRenderer::drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg)
{
boost::gil::rgb16_image_t img(width, height);
auto imgview = view(img);
auto loc = imgview.xy_at(0,0);
ImageStream * img_stream = new ImageStream(str, width, colorMap->getNumPixelComps(), colorMap->getBits());
img_stream->reset();
for(int i = 0; i < height; ++i)
{
auto p = img_stream->getLine();
for(int j = 0; j < width; ++j)
{
GfxRGB rgb;
colorMap->getRGB(p, &rgb);
*loc = boost::gil::rgb16_pixel_t(rgb.r, rgb.g, rgb.b);
p += colorMap->getNumPixelComps();
++ loc.x();
}
loc = imgview.xy_at(0, i+1);
}
boost::gil::png_write_view((boost::format("i%|1$x|.png")%image_count).str(), imgview);
img_stream->close();
delete img_stream;
++ image_count;
}
// The font installation code is stolen from PSOutputDev.cc in poppler
long long HTMLRenderer::install_font(GfxFont * font)
@ -383,6 +416,7 @@ long long HTMLRenderer::install_font(GfxFont * font)
assert(sizeof(long long) == 2*sizeof(int));
long long fn_id = (font == nullptr) ? 0 : *reinterpret_cast<long long*>(font->getID());
auto iter = font_name_map.find(fn_id);
if(iter != font_name_map.end())
return iter->second.fn_id;
@ -397,7 +431,10 @@ long long HTMLRenderer::install_font(GfxFont * font)
return new_fn_id;
}
string new_fn = (boost::format("f%|1$x|") % new_fn_id).str();
if(param->debug)
{
std::cerr << "Install font: (" << (font->getID()->num) << ' ' << (font->getID()->gen) << ") -> " << boost::format("f%|1$x|")%new_fn_id << std::endl;
}
if(font->getType() == fontType3) {
std::cerr << "Type 3 fonts are unsupported and will be rendered as Image" << std::endl;
@ -416,11 +453,20 @@ long long HTMLRenderer::install_font(GfxFont * font)
switch(font_loc -> locType)
{
case gfxFontLocEmbedded:
install_embedded_font(font, new_fn_id);
{
std::string suffix = dump_embedded_font(font, new_fn_id);
if(suffix != "")
{
install_embedded_font(font, suffix, new_fn_id);
}
else
{
export_remote_default_font(new_fn_id);
}
}
break;
case gfxFontLocExternal:
std::cerr << "TODO: external font" << std::endl;
export_remote_default_font(new_fn_id);
install_external_font(font, new_fn_id);
break;
case gfxFontLocResident:
install_base_font(font, font_loc, new_fn_id);
@ -430,63 +476,207 @@ long long HTMLRenderer::install_font(GfxFont * font)
export_remote_default_font(new_fn_id);
break;
}
delete font_loc;
}
else
{
export_remote_default_font(new_fn_id);
}
return new_fn_id;
}
void HTMLRenderer::install_embedded_font (GfxFont * font, long long fn_id)
std::string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
{
// fontforge doesn't fully support ToUnicode CMap, so we have to generate the encoding for fontforge
fontscript_fout << boost::format("Open(\"%1%(%2%)\",1)") % param->input_filename % font->getName()->getCString() << endl;
if(font->hasToUnicodeCMap() && (font->getType() == fontTrueType))
// mupdf consulted
Object ref_obj, font_obj, font_obj2, fontdesc_obj;
Object obj, obj1, obj2;
Dict * dict = nullptr;
std::string suffix, subtype;
char buf[1024];
int len;
ofstream outf;
auto * id = font->getID();
ref_obj.initRef(id->num, id->gen);
ref_obj.fetch(xref, &font_obj);
ref_obj.free();
if(!font_obj.isDict())
{
char * buf;
int buflen;
FoFiTrueType * ttf;
if((buf = font->readEmbFontFile(xref, &buflen)))
{
if((ttf = FoFiTrueType::make(buf, buflen)))
{
auto ctg = dynamic_cast<Gfx8BitFont*>(font)->getCodeToGIDMap(ttf);
auto ctu = font->getToUnicode();
ofstream map_fout((boost::format("f%|1$x|.encoding") % fn_id).str().c_str());
for(int i = 0; i < 256; ++i)
{
int code = ctg[i];
Unicode * u;
auto n = ctu->mapToUnicode(i, &u);
// not sure what to do when n > 1
if(n > 0)
{
map_fout << boost::format("0x%|1$X|") % code;
for(int j = 0; j < n; ++j)
map_fout << boost::format(" 0x%|1$X|") % u[j];
map_fout << boost::format(" # 0x%|1$X|") % i << endl;
}
}
fontscript_fout << boost::format("LoadEncodingFile(\"f%|1$x|.encoding\", \"f%|1$x|\")") % fn_id << endl;
fontscript_fout << boost::format("Reencode(\"f%|1$x|\", 1)") % fn_id << endl;
ctu->decRefCnt();
delete ttf;
}
gfree(buf);
}
std::cerr << "Font object is not a dictionary" << std::endl;
goto err;
}
fontscript_fout << boost::format("Generate(\"f%|1$x|.ttf\")") % fn_id << endl;
dict = font_obj.getDict();
if(dict->lookup("DescendantFonts", &font_obj2)->isArray())
{
if(font_obj2.arrayGetLength() == 0)
{
std::cerr << "Warning: empty DescendantFonts array" << std::endl;
}
else
{
if(font_obj2.arrayGetLength() > 1)
std::cerr << "TODO: multiple entries in DescendantFonts array" << std::endl;
if(font_obj2.arrayGet(0, &obj2)->isDict())
{
dict = obj2.getDict();
}
}
}
if(!dict->lookup("FontDescriptor", &fontdesc_obj)->isDict())
{
std::cerr << "Cannot find FontDescriptor " << std::endl;
goto err;
}
dict = fontdesc_obj.getDict();
if(dict->lookup("FontFile3", &obj)->isStream())
{
if(obj.streamGetDict()->lookup("Subtype", &obj1)->isName())
{
subtype = obj1.getName();
if(subtype == "Type1C")
{
suffix = ".cff";
}
else if (subtype == "CIDFontType0C")
{
suffix = ".cid";
}
else
{
std::cerr << "Unknown subtype: " << subtype << std::endl;
goto err;
}
}
else
{
std::cerr << "Invalid subtype in font descriptor" << std::endl;
goto err;
}
}
else if (dict->lookup("FontFile2", &obj)->isStream())
{
suffix = ".ttf";
}
else if (dict->lookup("FontFile", &obj)->isStream())
{
suffix = ".ttf";
}
else
{
std::cerr << "Cannot find FontFile for dump" << std::endl;
goto err;
}
if(suffix == "")
{
std::cerr << "Font type unrecognized" << std::endl;
goto err;
}
obj.streamReset();
outf.open((boost::format("%1%/f%|2$x|%3%")%TMP_DIR%fn_id%suffix).str().c_str(), ofstream::binary);
while((len = obj.streamGetChars(1024, (Guchar*)buf)) > 0)
{
outf.write(buf, len);
}
outf.close();
obj.streamClose();
err:
obj2.free();
obj1.free();
obj.free();
fontdesc_obj.free();
font_obj2.free();
font_obj.free();
return suffix;
}
void HTMLRenderer::install_embedded_font(GfxFont * font, const std::string & suffix, long long fn_id)
{
// TODO Should use standard way to handle CID fonts
std::string fn = (boost::format("f%|1$x|") % fn_id).str();
fontscript_fout << boost::format("Open(\"%1%/%2%%3%\",1)") % TMP_DIR % fn % suffix << endl;
auto ctu = font->getToUnicode();
int * code2GID = nullptr;
if(ctu)
{
// TODO: ctu could be CID2Unicode for CID fonts
int maxcode = 0;
if(!font->isCIDFont())
{
maxcode = 0xff;
//TODO read code2GID for TrueType
}
else
{
maxcode = 0xffff;
if(suffix != ".ttf")
{
fontscript_fout << "CIDFlatten()" << endl;
}
else
{
fontscript_fout << boost::format("Reencode(\"original\")") << endl;
int len;
// code2GID has been stored for embedded CID fonts
code2GID = dynamic_cast<GfxCIDFont*>(font)->getCodeToGIDMap(nullptr, &len);
}
}
if(maxcode > 0)
{
ofstream map_fout((boost::format("%1%/%2%.encoding") % TMP_DIR % fn).str().c_str());
int cnt = 0;
for(int i = 0; i <= maxcode; ++i)
{
Unicode * u;
auto n = ctu->mapToUnicode(i, &u);
// not sure what to do when n > 1
if(n > 0)
{
++cnt;
map_fout << boost::format("0x%|1$X|") % (code2GID ? code2GID[i] : i);
for(int j = 0; j < n; ++j)
map_fout << boost::format(" 0x%|1$X|") % u[j];
map_fout << boost::format(" # 0x%|1$X|") % i << endl;
}
}
if(cnt > 0)
{
fontscript_fout << boost::format("LoadEncodingFile(\"%1%/%2%.encoding\", \"%2%\")") % TMP_DIR % fn << endl;
fontscript_fout << boost::format("Reencode(\"%1%\", 1)") % fn << endl;
}
}
ctu->decRefCnt();
}
fontscript_fout << boost::format("Generate(\"%1%.ttf\")") % fn << endl;
export_remote_font(fn_id, ".ttf", "truetype", font);
}
void HTMLRenderer::install_base_font( GfxFont * font, GfxFontLoc * font_loc, long long fn_id)
void HTMLRenderer::install_base_font(GfxFont * font, GfxFontLoc * font_loc, long long fn_id)
{
std::string psname(font_loc->path->getCString());
string basename = psname.substr(0, psname.find('-'));
@ -500,9 +690,24 @@ void HTMLRenderer::install_base_font( GfxFont * font, GfxFontLoc * font_loc, lon
else
cssfont = iter->second;
export_local_font(fn_id, font, font_loc, psname, cssfont);
export_local_font(fn_id, font, psname, cssfont);
}
void HTMLRenderer::install_external_font( GfxFont * font, long long fn_id)
{
std::string fontname(font->getName()->getCString());
// resolve bad encodings in GB
auto iter = GB_ENCODED_FONT_NAME_MAP.find(fontname);
if(iter != GB_ENCODED_FONT_NAME_MAP.end())
{
fontname = iter->second;
std::cerr << "Warning: workaround for font names in bad encodings." << std::endl;
}
export_local_font(fn_id, font, fontname, "");
}
long long HTMLRenderer::install_font_size(double font_size)
{
auto iter = font_size_map.lower_bound(font_size - EPS);
@ -584,13 +789,14 @@ void HTMLRenderer::export_remote_default_font(long long fn_id)
allcss_fout << endl;
}
void HTMLRenderer::export_local_font(long long fn_id, GfxFont * font, GfxFontLoc * font_loc, const string & original_font_name, const string & cssfont)
void HTMLRenderer::export_local_font(long long fn_id, GfxFont * font, const string & original_font_name, const string & cssfont)
{
allcss_fout << boost::format(".f%|1$x|{") % fn_id;
allcss_fout << "font-family:" << ((cssfont == "") ? (original_font_name+","+general_font_family(font)) : cssfont) << ";";
allcss_fout << "font-family:" << ((cssfont == "") ? (original_font_name + "," + general_font_family(font)) : cssfont) << ";";
if(font->isBold())
allcss_fout << "font-weight:bold;";
if(boost::algorithm::ifind_first(original_font_name, "oblique"))
allcss_fout << "font-style:oblique;";
else if(font->isItalic())
@ -667,7 +873,7 @@ void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm)
void HTMLRenderer::export_color(long long color_id, const GfxRGB * rgb)
{
allcss_fout << boost::format(".c%|1$x|{color:rgb(%2%,%3%,%4%);}")
% color_id % rgb->r % rgb->g % rgb->b;
% color_id % (int)colToByte(rgb->r) % (int)colToByte(rgb->g) % (int)colToByte(rgb->b);
allcss_fout << endl;
}

View File

@ -29,20 +29,10 @@
#include <GfxFont.h>
#include "Param.h"
#include "util.h"
using namespace std;
static const double EPS = 1e-6;
inline bool _equal(double x, double y) { return std::abs(x-y) < EPS; }
inline bool _is_positive(double x) { return x > EPS; }
inline bool _tm_equal(const double * tm1, const double * tm2, int size = 6)
{
for(int i = 0; i < size; ++i)
if(!_equal(tm1[i], tm2[i]))
return false;
return true;
}
class HTMLRenderer : public OutputDev
{
public:
@ -100,20 +90,23 @@ class HTMLRenderer : public OutputDev
virtual void updateTextShift(GfxState * state, double shift);
virtual void updateFillColor(GfxState * state);
//----- text drawing
virtual void drawString(GfxState * state, GooString * s);
virtual void drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg);
private:
void close_cur_line();
void outputUnicodes(const Unicode * u, int uLen);
// return the mapped font name
long long install_font(GfxFont * font);
static void output_to_file(void * outf, const char * data, int len);
void install_embedded_font (GfxFont * font, long long fn_id);
std::string dump_embedded_font (GfxFont * font, long long fn_id);
void install_embedded_font(GfxFont * font, const std::string & suffix, long long fn_id);
void install_base_font(GfxFont * font, GfxFontLoc * font_loc, long long fn_id);
void install_external_font (GfxFont * font, long long fn_id);
long long install_font_size(double font_size);
long long install_whitespace(double ws_width, double & actual_width);
@ -126,7 +119,7 @@ class HTMLRenderer : public OutputDev
*/
void export_remote_font(long long fn_id, const string & suffix, const string & format, GfxFont * font);
void export_remote_default_font(long long fn_id);
void export_local_font(long long fn_id, GfxFont * font, GfxFontLoc * font_loc, const string & original_font_name, const string & cssfont);
void export_local_font(long long fn_id, GfxFont * font, const string & original_font_name, const string & cssfont);
std::string general_font_family(GfxFont * font);
void export_font_size(long long fs_id, double font_size);
@ -143,8 +136,6 @@ class HTMLRenderer : public OutputDev
double pageWidth ;
double pageHeight ;
// state tracking when processing pdf
void check_state_change(GfxState * state);
void reset_state_track();
@ -250,6 +241,8 @@ class HTMLRenderer : public OutputDev
};
map<Color, long long> color_map;
int image_count;
const Param * param;
};

View File

@ -18,9 +18,12 @@ struct Param
int first_page, last_page;
double h_dpi, v_dpi;
double h_dpi2, v_dpi2;
double h_eps, v_eps;
int process_nontext;
int debug;
};

View File

@ -11,12 +11,13 @@
#include <ctime>
#include <string>
#include <limits>
#include <goo/GooString.h>
#include <iostream>
#include <boost/program_options.hpp>
#include <boost/algorithm/string/predicate.hpp>
#include <goo/GooString.h>
#include "Object.h"
#include "PDFDoc.h"
#include "PDFDocFactory.h"
@ -121,11 +122,14 @@ po::variables_map parse_options (int argc, char **argv)
("metadata,m", "show the document meta data in JSON")
("owner-password,o", po::value<string>(&param.owner_password)->default_value(""), "owner password (for encrypted files)")
("user-password,u", po::value<string>(&param.user_password)->default_value(""), "user password (for encrypted files)")
("hdpi", po::value<double>(&param.h_dpi)->default_value(72.0), "horizontal DPI")
("vdpi", po::value<double>(&param.v_dpi)->default_value(72.0), "vertical DPI")
("hdpi", po::value<double>(&param.h_dpi)->default_value(72.0), "horizontal DPI for text")
("vdpi", po::value<double>(&param.v_dpi)->default_value(72.0), "vertical DPI for text")
("hdpi2", po::value<double>(&param.h_dpi2)->default_value(144.0), "horizontal DPI for non-text")
("vdpi2", po::value<double>(&param.v_dpi2)->default_value(144.0), "vertical DPI for non-text")
("heps", po::value<double>(&param.h_eps)->default_value(1.0), "max tolerated horizontal offset (in pixels)")
("veps", po::value<double>(&param.v_eps)->default_value(1.0), "max tolerated vertical offset (in pixels)")
("process-nontext", po::value<int>(&param.process_nontext)->default_value(1), "process nontext objects")
("debug", po::value<int>(&param.debug)->default_value(0), "output debug information")
;
opt_hidden.add_options()

63
src/util.h Normal file
View File

@ -0,0 +1,63 @@
/*
* Misc functions
*
*
* by WangLu
* 2012.08.10
*/
#ifndef UTIL_H__
#define UTIL_H__
#include <algorithm>
#include <ostream>
#include <UTF8.h>
#include "Consts.h"
static inline bool _equal(double x, double y) { return std::abs(x-y) < EPS; }
static inline bool _is_positive(double x) { return x > EPS; }
static inline bool _tm_equal(const double * tm1, const double * tm2, int size = 6)
{
for(int i = 0; i < size; ++i)
if(!_equal(tm1[i], tm2[i]))
return false;
return true;
}
static inline void outputUnicodes(std::ostream & out, const Unicode * u, int uLen)
{
for(int i = 0; i < uLen; ++i)
{
switch(u[i])
{
case '&':
out << "&amp;";
break;
case '\"':
out << "&quot;";
break;
case '\'':
out << "&apos;";
break;
case '<':
out << "&lt;";
break;
case '>':
out << "&gt;";
break;
default:
{
char buf[4];
auto n = mapUTF8(u[i], buf, 4);
out.write(buf, n);
}
}
}
}
#endif //UTIL_H__