1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-07-05 17:48:38 +00:00

Merge branch 'devv'

This commit is contained in:
Lu Wang 2012-10-03 12:54:24 +08:00
commit 0eb1f962f7
28 changed files with 894 additions and 253 deletions

View File

@ -20,6 +20,22 @@ include_directories(${POPPLER_INCLUDE_DIRS})
link_directories(${POPPLER_LIBRARY_DIRS}) link_directories(${POPPLER_LIBRARY_DIRS})
set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${POPPLER_LIBRARIES}) set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${POPPLER_LIBRARIES})
# disable CAIRO for now
if(0)
pkg_check_modules(POPPLER_CAIRO poppler-cairo>=0.20.0)
if(POPPLER_CAIRO_FOUND)
set(HAVE_CAIRO 1)
include_directories(${POPPLER_CAIRO_INCLUDE_DIRS})
link_directories(${POPPLER_CAIRO_LIBRARY_DIRS})
set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${POPPLER_CAIRO_LIBRARIES})
else()
set(HAVE_CAIRO 0)
endif()
else()
set(HAVE_CAIRO 0)
endif()
# fontforge starts using pkg-config 'correctly' since 2.0.0 # fontforge starts using pkg-config 'correctly' since 2.0.0
pkg_check_modules(FONTFORGE libfontforge>=2.0.0) pkg_check_modules(FONTFORGE libfontforge>=2.0.0)
if(FONTFORGE_FOUND) if(FONTFORGE_FOUND)
@ -127,13 +143,17 @@ add_executable(pdf2htmlEX
src/HTMLRenderer/export.cc src/HTMLRenderer/export.cc
src/HTMLRenderer/text.cc src/HTMLRenderer/text.cc
src/HTMLRenderer/image.cc src/HTMLRenderer/image.cc
src/HTMLRenderer/draw.cc
src/HTMLRenderer/link.cc src/HTMLRenderer/link.cc
src/include/namespace.h src/include/namespace.h
src/HTMLRenderer/LineBuffer.cc src/HTMLRenderer/LineBuffer.cc
src/include/ffw.h src/include/ffw.h
src/ffw.c src/ffw.c
src/include/BackgroundRenderer.h src/include/BackgroundRenderer.h
src/BackgroundRenderer.cc src/include/SplashBackgroundRenderer.h
src/SplashBackgroundRenderer.cc
src/include/CairoBackgroundRenderer.h
src/CairoBackgroundRenderer.cc
src/include/Preprocessor.h src/include/Preprocessor.h
src/Preprocessor.cc src/Preprocessor.cc
src/include/util.h src/include/util.h

View File

@ -38,6 +38,7 @@ This program is designed for scientific papers with complicate formulas and figu
* Proper styling (Color, Transformation...) * Proper styling (Color, Transformation...)
* Links * Links
* Optimization for Web * Optimization for Web
* [EXPERIMENTAL] Path drawing with CSS
### Not supported yet ### Not supported yet
@ -89,6 +90,12 @@ I have tested with CYGWIN without any problem, and I believe it also works on Mi
pdf2htmlEX --help pdf2htmlEX --help
### For Geeks
* Experimental and unsupported
pdf2htmlEX --process-nontext 0 --css-draw 0 /path/to/foobar.pdf
## FAQ ## FAQ
[here](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ) [here](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ)

5
TODO
View File

@ -1,3 +1,6 @@
Integrate splash/cairo
native support for image
native support for draw
about glyph width: about glyph width:
- IE - IE
@ -29,5 +32,3 @@ combine lines (unwarp)
Printing Printing
multiple charcode mapped to a same glyph multiple charcode mapped to a same glyph
check if we can add information to the font, and let browsers show ligatures automatically check if we can add information to the font, and let browsers show ligatures automatically
native support for image
native support for draw

View File

@ -81,4 +81,12 @@ span {
} }
.a { .a {
} }
.Cd {
position:absolute;
transform-origin:0% 100%;
-ms-transform-origin:0% 100%;
-moz-transform-origin:0% 100%;
-webkit-transform-origin:0% 100%;
-o-transform-origin:0% 100%;
}
/* Base CSS END */ /* Base CSS END */

View File

@ -1,23 +0,0 @@
/*
* BackgroundRenderer.cc
*
* Copyright (C) 2012 by Lu Wang coolwanglu<at>gmail.com
*/
#include <algorithm>
#include "GfxFont.h"
#include "BackgroundRenderer.h"
#include "util.h"
using namespace pdf2htmlEX;
void BackgroundRenderer::drawChar(GfxState *state, double x, double y,
double dx, double dy,
double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen)
{
// SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen);
}

View File

@ -0,0 +1,30 @@
/*
* CairoBackgroundRenderer.cc
*
* Copyright (C) 2012 Lu Wang <coolwanglu@gmail.com>
*/
#include "pdf2htmlEX-config.h"
#if HAVE_CAIRO
#include "CairoBackgroundRenderer.h"
namespace pdf2htmlEX {
void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y,
double dx, double dy,
double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen)
{
// CairoOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen);
}
void CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno, const std::string & filename)
{
}
} // namespace pdf2htmlEX
#endif // HAVE_CAIRO

View File

@ -22,7 +22,7 @@ using std::ostream;
void HTMLRenderer::LineBuffer::reset(GfxState * state) void HTMLRenderer::LineBuffer::reset(GfxState * state)
{ {
state->transform(state->getCurX(), state->getCurY(), &x, &y); state->transform(state->getCurX(), state->getCurY(), &x, &y);
tm_id = renderer->cur_tm_id; tm_id = renderer->cur_ttm_id;
} }
void HTMLRenderer::LineBuffer::append_unicodes(const Unicode * u, int l) void HTMLRenderer::LineBuffer::append_unicodes(const Unicode * u, int l)
@ -75,7 +75,7 @@ void HTMLRenderer::LineBuffer::flush(void)
for(auto iter = states.begin(); iter != states.end(); ++iter) for(auto iter = states.begin(); iter != states.end(); ++iter)
{ {
const auto & s = *iter; const auto & s = *iter;
max_ascent = max(max_ascent, s.ascent * s.draw_font_size); max_ascent = max<double>(max_ascent, s.ascent * s.draw_font_size);
} }
ostream & out = renderer->html_fout; ostream & out = renderer->html_fout;
@ -155,7 +155,7 @@ void HTMLRenderer::LineBuffer::flush(void)
++ cur_offset_iter; ++ cur_offset_iter;
} }
size_t next_text_idx = min(cur_state_iter->start_idx, cur_offset_iter->start_idx); size_t next_text_idx = min<size_t>(cur_state_iter->start_idx, cur_offset_iter->start_idx);
outputUnicodes(out, (&text.front()) + cur_text_idx, next_text_idx - cur_text_idx); outputUnicodes(out, (&text.front()) + cur_text_idx, next_text_idx - cur_text_idx);
cur_text_idx = next_text_idx; cur_text_idx = next_text_idx;

399
src/HTMLRenderer/draw.cc Normal file
View File

@ -0,0 +1,399 @@
/*
* Draw.cc
*
* Handling path drawing
*
* by WangLu
* 2012.10.01
*/
#include <algorithm>
#include <cmath>
#include <sstream>
#include <vector>
#include <iostream>
#include "HTMLRenderer.h"
#include "util.h"
#include "namespace.h"
namespace pdf2htmlEX {
using std::swap;
using std::min;
using std::max;
using std::acos;
using std::asin;
using std::ostringstream;
using std::sqrt;
using std::vector;
using std::ostream;
static bool is_horizontal_line(GfxSubpath * path)
{
return ((path->getNumPoints() == 2)
&& (!path->getCurve(1))
&& (_equal(path->getY(0), path->getY(1))));
}
static bool is_vertical_line(GfxSubpath * path)
{
return ((path->getNumPoints() == 2)
&& (!path->getCurve(1))
&& (_equal(path->getX(0), path->getX(1))));
}
static bool is_rectangle(GfxSubpath * path)
{
if (!(((path->getNumPoints() != 4) && (path->isClosed()))
|| ((path->getNumPoints() == 5)
&& _equal(path->getX(0), path->getX(4))
&& _equal(path->getY(0), path->getY(4)))))
return false;
for(int i = 1; i < path->getNumPoints(); ++i)
if(path->getCurve(i))
return false;
return (_equal(path->getY(0), path->getY(1))
&& _equal(path->getX(1), path->getX(2))
&& _equal(path->getY(2), path->getY(3))
&& _equal(path->getX(3), path->getX(0)))
|| (_equal(path->getX(0), path->getX(1))
&& _equal(path->getY(1), path->getY(2))
&& _equal(path->getX(2), path->getX(3))
&& _equal(path->getY(3), path->getY(0)));
}
static void get_shading_bbox(GfxState * state, GfxShading * shading,
double & x1, double & y1, double & x2, double & y2)
{
// from SplashOutputDev.cc in poppler
if(shading->getHasBBox())
{
shading->getBBox(&x1, &y1, &x2, &y2);
}
else
{
state->getClipBBox(&x1, &y1, &x2, &y2);
Matrix ctm, ictm;
state->getCTM(&ctm);
ctm.invertTo(&ictm);
double x[4], y[4];
ictm.transform(x1, y1, &x[0], &y[0]);
ictm.transform(x2, y1, &x[1], &y[1]);
ictm.transform(x1, y2, &x[2], &y[2]);
ictm.transform(x2, y2, &x[3], &y[3]);
x1 = x2 = x[0];
y1 = y2 = y[0];
for(int i = 1; i < 4; ++i)
{
x1 = min<double>(x1, x[i]);
y1 = min<double>(y1, y[i]);
x2 = max<double>(x2, x[i]);
y2 = max<double>(y2, y[i]);
}
}
}
/*
* Note that the coordinate system in HTML and PDF are different
*/
static double get_angle(double dx, double dy)
{
double r = hypot(dx, dy);
/*
* acos always returns [0, pi]
*/
double ang = acos(dx / r);
/*
* for angle below x-axis
*/
if(dy < 0)
ang = -ang;
return ang;
}
class LinearGradient
{
public:
LinearGradient(GfxAxialShading * shading,
double x1, double y1, double x2, double y2);
void dumpto (ostream & out);
static void style_function (void * p, ostream & out)
{
static_cast<LinearGradient*>(p)->dumpto(out);
}
// TODO, add alpha
class ColorStop
{
public:
GfxRGB rgb;
double pos; // [0,1]
};
vector<ColorStop> stops;
double angle;
};
LinearGradient::LinearGradient (GfxAxialShading * shading,
double x1, double y1, double x2, double y2)
{
// coordinate for t = 0 and t = 1
double t0x, t0y, t1x, t1y;
shading->getCoords(&t0x, &t0y, &t1x, &t1y);
angle = get_angle(t1x - t0x, t1y - t0y);
// get the range of t in the box
// from GfxState.cc in poppler
double box_tmin, box_tmax;
{
double idx = t1x - t0x;
double idy = t1y - t0y;
double inv_len = 1.0 / (idx * idx + idy * idy);
idx *= inv_len;
idy *= inv_len;
// t of (x1,y1)
box_tmin = box_tmax = (x1 - t0x) * idx + (y1 - t0y) * idy;
double tdx = (x2 - x1) * idx;
if(tdx < 0)
box_tmin += tdx;
else
box_tmax += tdx;
double tdy = (y2 - y1) * idy;
if(tdy < 0)
box_tmin += tdy;
else
box_tmax += tdy;
}
// get the domain of t in the box
double domain_tmin = max<double>(box_tmin, shading->getDomain0());
double domain_tmax = min<double>(box_tmax, shading->getDomain1());
// TODO: better sampling
// TODO: check background color
{
stops.clear();
double tstep = (domain_tmax - domain_tmin) / 13.0;
for(double t = domain_tmin; t <= domain_tmax; t += tstep)
{
GfxColor color;
shading->getColor(t, &color);
ColorStop stop;
shading->getColorSpace()->getRGB(&color, &stop.rgb);
stop.pos = (t - box_tmin) / (box_tmax - box_tmin);
stops.push_back(stop);
}
}
}
void LinearGradient::dumpto (ostream & out)
{
auto prefixes = {"", "-ms-", "-moz-", "-webkit-", "-o-"};
for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter)
{
out << "background-image:" << (*iter) << "linear-gradient(" << _round(angle) << "rad";
for(auto iter2 = stops.begin(); iter2 != stops.end(); ++iter2)
{
out << "," << (iter2->rgb) << " " << _round((iter2->pos) * 100) << "%";
}
out << ");";
}
}
GBool HTMLRenderer::axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax)
{
if(!(param->css_draw)) return gFalse;
double x1, y1, x2, y2;
get_shading_bbox(state, shading, x1, y1, x2, y2);
LinearGradient lg(shading, x1, y1, x2, y2);
// TODO: check background color
css_draw_rectangle(x1, y1, x2-x1, y2-y1, state->getCTM(),
nullptr, 0,
nullptr, nullptr,
LinearGradient::style_function, &lg);
return gTrue;
}
//TODO track state
//TODO connection style
void HTMLRenderer::css_draw(GfxState *state, bool fill)
{
if(!(param->css_draw)) return;
GfxPath * path = state->getPath();
for(int i = 0; i < path->getNumSubpaths(); ++i)
{
GfxSubpath * subpath = path->getSubpath(i);
if(is_horizontal_line(subpath))
{
double x1 = subpath->getX(0);
double x2 = subpath->getX(1);
double y = subpath->getY(0);
if(x1 > x2) swap(x1, x2);
GfxRGB stroke_color;
state->getStrokeRGB(&stroke_color);
double lw = state->getLineWidth();
css_draw_rectangle(x1, y - lw/2, x2-x1, lw, state->getCTM(),
nullptr, 0,
nullptr, &stroke_color);
}
else if(is_vertical_line(subpath))
{
double x = subpath->getX(0);
double y1 = subpath->getY(0);
double y2 = subpath->getY(1);
if(y1 > y2) swap(y1, y2);
GfxRGB stroke_color;
state->getStrokeRGB(&stroke_color);
double lw = state->getLineWidth();
css_draw_rectangle(x-lw/2, y1, lw, y2-y1, state->getCTM(),
nullptr, 0,
nullptr, &stroke_color);
}
else if(is_rectangle(subpath))
{
close_text_line();
double x1 = subpath->getX(0);
double x2 = subpath->getX(2);
double y1 = subpath->getY(0);
double y2 = subpath->getY(2);
if(x1 > x2) swap(x1, x2);
if(y1 > y2) swap(y1, y2);
double x,y,w,h,lw[2];
css_fix_rectangle_border_width(x1, y1, x2, y2, (fill ? 0.0 : state->getLineWidth()),
x,y,w,h,lw[0],lw[1]);
GfxRGB stroke_color;
if(!fill) state->getStrokeRGB(&stroke_color);
GfxRGB fill_color;
if(fill) state->getFillRGB(&fill_color);
int lw_count = 2;
GfxRGB * ps = fill ? nullptr : (&stroke_color);
GfxRGB * pf = fill ? (&fill_color) : nullptr;
if(_equal(h, 0) || _equal(w, 0))
{
// orthogonal line
// TODO: check length
pf = ps;
ps = nullptr;
h += lw[0];
w += lw[1];
}
css_draw_rectangle(x, y, w, h, state->getCTM(),
lw, lw_count,
ps, pf);
}
}
}
void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, const double * tm,
double * line_width_array, int line_width_count,
const GfxRGB * line_color, const GfxRGB * fill_color,
void (*style_function)(void *, ostream &), void * style_function_data)
{
close_text_line();
double new_tm[6];
memcpy(new_tm, tm, sizeof(new_tm));
_tm_transform(new_tm, x, y);
double scale = 1.0;
{
static const double sqrt2 = sqrt(2.0);
double i1 = (new_tm[0] + new_tm[2]) / sqrt2;
double i2 = (new_tm[1] + new_tm[3]) / sqrt2;
scale = hypot(i1, i2);
if(_is_positive(scale))
{
for(int i = 0; i < 4; ++i)
new_tm[i] /= scale;
}
else
{
scale = 1.0;
}
}
html_fout << "<div class=\"Cd t" << install_transform_matrix(new_tm) << "\" style=\"";
if(line_color)
{
html_fout << "border-color:" << *line_color << ";";
html_fout << "border-width:";
for(int i = 0; i < line_width_count; ++i)
{
if(i > 0) html_fout << ' ';
double lw = line_width_array[i] * scale;
html_fout << _round(lw);
if(_is_positive(lw)) html_fout << "px";
}
html_fout << ";";
}
else
{
html_fout << "border:none;";
}
if(fill_color)
{
html_fout << "background-color:" << (*fill_color) << ";";
}
else
{
html_fout << "background-color:transparent;";
}
if(style_function)
{
style_function(style_function_data, html_fout);
}
html_fout << "bottom:" << _round(y) << "px;"
<< "left:" << _round(x) << "px;"
<< "width:" << _round(w * scale) << "px;"
<< "height:" << _round(h * scale) << "px;";
html_fout << "\"></div>";
}
} // namespace pdf2htmlEX

View File

@ -141,9 +141,7 @@ void HTMLRenderer::export_word_space (long long ws_id, double word_space)
void HTMLRenderer::export_color (long long color_id, const GfxRGB * rgb) void HTMLRenderer::export_color (long long color_id, const GfxRGB * rgb)
{ {
css_fout << ".c" << color_id << "{color:rgb(" css_fout << ".c" << color_id << "{color:" << (*rgb) << ";}" << endl;
<< dec << (int)colToByte(rgb->r) << "," << (int)colToByte(rgb->g) << "," << (int)colToByte(rgb->b) << ");}" << hex
<< endl;
} }
void HTMLRenderer::export_whitespace (long long ws_id, double ws_width) void HTMLRenderer::export_whitespace (long long ws_id, double ws_width)

View File

@ -3,7 +3,7 @@
* *
* Handling general stuffs * Handling general stuffs
* *
* by WangLu * Copyright (C) 2012 Lu Wang <coolwanglu@gmail.com>
* 2012.08.14 * 2012.08.14
*/ */
@ -13,8 +13,6 @@
#include <algorithm> #include <algorithm>
#include <vector> #include <vector>
#include <splash/SplashBitmap.h>
#include "HTMLRenderer.h" #include "HTMLRenderer.h"
#include "BackgroundRenderer.h" #include "BackgroundRenderer.h"
#include "namespace.h" #include "namespace.h"
@ -64,10 +62,6 @@ HTMLRenderer::~HTMLRenderer()
delete [] width_list; delete [] width_list;
} }
static GBool annot_cb(Annot *, void *) {
return false;
};
void HTMLRenderer::process(PDFDoc *doc) void HTMLRenderer::process(PDFDoc *doc)
{ {
cur_doc = doc; cur_doc = doc;
@ -84,17 +78,17 @@ void HTMLRenderer::process(PDFDoc *doc)
vector<double> zoom_factors; vector<double> zoom_factors;
if(abs(param->zoom) > EPS) if(_is_positive(param->zoom))
{ {
zoom_factors.push_back(param->zoom); zoom_factors.push_back(param->zoom);
} }
if(abs(param->fit_width) > EPS) if(_is_positive(param->fit_width))
{ {
zoom_factors.push_back((param->fit_width) / preprocessor.get_max_width()); zoom_factors.push_back((param->fit_width) / preprocessor.get_max_width());
} }
if(abs(param->fit_height) > EPS) if(_is_positive(param->fit_height))
{ {
zoom_factors.push_back((param->fit_height) / preprocessor.get_max_height()); zoom_factors.push_back((param->fit_height) / preprocessor.get_max_height());
} }
@ -108,8 +102,8 @@ void HTMLRenderer::process(PDFDoc *doc)
zoom = *min_element(zoom_factors.begin(), zoom_factors.end()); zoom = *min_element(zoom_factors.begin(), zoom_factors.end());
} }
scale_factor1 = max(zoom, param->font_size_multiplier); text_scale_factor1 = max<double>(zoom, param->font_size_multiplier);
scale_factor2 = zoom / scale_factor1; text_scale_factor2 = zoom / text_scale_factor1;
} }
@ -117,12 +111,7 @@ void HTMLRenderer::process(PDFDoc *doc)
BackgroundRenderer * bg_renderer = nullptr; BackgroundRenderer * bg_renderer = nullptr;
if(param->process_nontext) if(param->process_nontext)
{ {
// Render non-text objects as image bg_renderer = new BackgroundRenderer(param);
// copied from poppler
SplashColor color;
color[0] = color[1] = color[2] = 255;
bg_renderer = new BackgroundRenderer(splashModeRGB8, 4, gFalse, color);
bg_renderer->startDoc(doc); bg_renderer->startDoc(doc);
} }
@ -141,22 +130,15 @@ void HTMLRenderer::process(PDFDoc *doc)
if(param->process_nontext) if(param->process_nontext)
{ {
doc->displayPage(bg_renderer, i, param->h_dpi, param->v_dpi, auto fn = str_fmt("%s/p%x.png", (param->single_html ? param->tmp_dir : param->dest_dir).c_str(), i);
0, true, false, false, if(param->single_html)
nullptr, nullptr, &annot_cb, nullptr); add_tmp_file((char*)fn);
{ bg_renderer->render_page(doc, i, (char*)fn);
auto fn = str_fmt("%s/p%x.png", (param->single_html ? param->tmp_dir : param->dest_dir).c_str(), i);
if(param->single_html)
add_tmp_file((char*)fn);
bg_renderer->getBitmap()->writeImgFile(splashFormatPng,
(char*)fn,
param->h_dpi, param->v_dpi);
}
} }
doc->displayPage(this, i, zoom_factor() * DEFAULT_DPI, zoom_factor() * DEFAULT_DPI, doc->displayPage(this, i,
text_zoom_factor() * DEFAULT_DPI, text_zoom_factor() * DEFAULT_DPI,
0, true, false, false, 0, true, false, false,
nullptr, nullptr, nullptr, nullptr); nullptr, nullptr, nullptr, nullptr);
@ -219,15 +201,15 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state)
} }
html_fout << "\">"; html_fout << "\">";
draw_scale = 1.0; draw_text_scale = 1.0;
cur_font_info = install_font(nullptr); cur_font_info = install_font(nullptr);
cur_font_size = draw_font_size = 0; cur_font_size = draw_font_size = 0;
cur_fs_id = install_font_size(cur_font_size); cur_fs_id = install_font_size(cur_font_size);
memcpy(cur_ctm, id_matrix, sizeof(cur_ctm)); memcpy(cur_text_tm, id_matrix, sizeof(cur_text_tm));
memcpy(draw_ctm, id_matrix, sizeof(draw_ctm)); memcpy(draw_text_tm, id_matrix, sizeof(draw_text_tm));
cur_tm_id = install_transform_matrix(draw_ctm); cur_ttm_id = install_transform_matrix(draw_text_tm);
cur_letter_space = cur_word_space = 0; cur_letter_space = cur_word_space = 0;
cur_ls_id = install_letter_space(cur_letter_space); cur_ls_id = install_letter_space(cur_letter_space);
@ -247,7 +229,7 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state)
} }
void HTMLRenderer::endPage() { void HTMLRenderer::endPage() {
close_line(); close_text_line();
// process links before the page is closed // process links before the page is closed
cur_doc->processLinks(this, pageNum); cur_doc->processLinks(this, pageNum);
@ -404,7 +386,9 @@ void HTMLRenderer::post_process()
void HTMLRenderer::fix_stream (std::ostream & out) void HTMLRenderer::fix_stream (std::ostream & out)
{ {
out << hex; // we output all ID's in hex
// browsers are not happy with scientific notations
out << hex << fixed;
} }
void HTMLRenderer::add_tmp_file(const string & fn) void HTMLRenderer::add_tmp_file(const string & fn)

View File

@ -215,9 +215,11 @@ long long HTMLRenderer::install_font_size(double font_size)
long long HTMLRenderer::install_transform_matrix(const double * tm) long long HTMLRenderer::install_transform_matrix(const double * tm)
{ {
TM m(tm); Matrix m;
memcpy(m.m, tm, sizeof(m.m));
auto iter = transform_matrix_map.lower_bound(m); auto iter = transform_matrix_map.lower_bound(m);
if((iter != transform_matrix_map.end()) && (m == (iter->first))) if((iter != transform_matrix_map.end()) && (_tm_equal(m.m, iter->first.m, 4)))
return iter->second; return iter->second;
long long new_tm_id = transform_matrix_map.size(); long long new_tm_id = transform_matrix_map.size();

View File

@ -22,62 +22,6 @@ using std::ostringstream;
using std::min; using std::min;
using std::max; using std::max;
static void _transform(const double * ctm, double & x, double & y)
{
double xx = x, yy = y;
x = ctm[0] * xx + ctm[2] * yy + ctm[4];
y = ctm[1] * xx + ctm[3] * yy + ctm[5];
}
static void _get_transformed_rect(AnnotLink * link, const double * ctm, double & x1, double & y1, double & x2, double & y2)
{
double _x1, _x2, _y1, _y2;
link->getRect(&_x1, &_y1, &_x2, &_y2);
_transform(ctm, _x1, _y1);
_transform(ctm, _x2, _y2);
x1 = min(_x1, _x2);
x2 = max(_x1, _x2);
y1 = min(_y1, _y2);
y2 = max(_y1, _y2);
}
/*
* In PDF, edges of the rectangle are in the middle of the borders
* In HTML, edges are completely outside the rectangle
*/
static void _fix_border_width(double & x1, double & y1, double & x2, double & y2,
double border_width, double & border_top_bottom_width, double & border_left_right_width)
{
double w = x2 - x1;
if(w > border_width)
{
x1 += border_width / 2;
x2 -= border_width / 2;
border_left_right_width = border_width;
}
else
{
x1 += w / 2;
x2 -= w / 2;
border_left_right_width = border_width + w/2;
}
double h = y2 - y1;
if(h > border_width)
{
y1 += border_width / 2;
y2 -= border_width / 2;
border_top_bottom_width = border_width;
}
else
{
y1 += h / 2;
y2 -= h / 2;
border_top_bottom_width = border_width + h/2;
}
}
/* /*
* The detailed rectangle area of the link destination * The detailed rectangle area of the link destination
* Will be parsed and performed by Javascript * Will be parsed and performed by Javascript
@ -164,6 +108,7 @@ static string get_dest_detail_str(int pageno, LinkDest * dest)
/* /*
* Based on pdftohtml from poppler * Based on pdftohtml from poppler
* TODO: CSS for link rectangles * TODO: CSS for link rectangles
* TODO: share rectangle draw with css-draw
*/ */
void HTMLRenderer::processLink(AnnotLink * al) void HTMLRenderer::processLink(AnnotLink * al)
{ {
@ -239,10 +184,17 @@ void HTMLRenderer::processLink(AnnotLink * al)
html_fout << ">"; html_fout << ">";
} }
html_fout << "<div style=\""; html_fout << "<div class=\"Cd t"
<< install_transform_matrix(default_ctm)
<< "\" style=\"";
double x,y,w,h;
double x1, y1, x2, y2; double x1, y1, x2, y2;
_get_transformed_rect(al, default_ctm, x1, y1, x2, y2); al->getRect(&x1, &y1, &x2, &y2);
x = min<double>(x1, x2);
y = min<double>(y1, y2);
w = max<double>(x1, x2) - x;
h = max<double>(y1, y2) - y;
double border_width = 0; double border_width = 0;
double border_top_bottom_width = 0; double border_top_bottom_width = 0;
@ -250,12 +202,14 @@ void HTMLRenderer::processLink(AnnotLink * al)
auto * border = al->getBorder(); auto * border = al->getBorder();
if(border) if(border)
{ {
border_width = border->getWidth() * zoom_factor(); border_width = border->getWidth();
if(border_width > 0) if(border_width > 0)
{ {
{ {
_fix_border_width(x1, y1, x2, y1, css_fix_rectangle_border_width(x1, y1, x2, y2, border_width,
border_width, border_top_bottom_width, border_left_right_width); x, y, w, h,
border_top_bottom_width, border_left_right_width);
if(abs(border_top_bottom_width - border_left_right_width) < EPS) if(abs(border_top_bottom_width - border_left_right_width) < EPS)
html_fout << "border-width:" << _round(border_top_bottom_width) << "px;"; html_fout << "border-width:" << _round(border_top_bottom_width) << "px;";
else else
@ -313,12 +267,13 @@ void HTMLRenderer::processLink(AnnotLink * al)
html_fout << "border-style:none;"; html_fout << "border-style:none;";
} }
_tm_transform(default_ctm, x, y);
html_fout << "position:absolute;" html_fout << "position:absolute;"
<< "left:" << _round(x1- border_left_right_width) << "px;" << "left:" << _round(x) << "px;"
<< "bottom:" << _round(y1 - border_top_bottom_width) << "px;" << "bottom:" << _round(y) << "px;"
<< "width:" << _round(x2-x1) << "px;" << "width:" << _round(w) << "px;"
<< "height:" << _round(y2-y1) << "px;"; << "height:" << _round(h) << "px;";
// fix for IE // fix for IE
html_fout << "background-color:rgba(255,255,255,0.000001);"; html_fout << "background-color:rgba(255,255,255,0.000001);";

View File

@ -12,6 +12,7 @@
* optimize lines using nested <span> (reuse classes) * optimize lines using nested <span> (reuse classes)
*/ */
#include <cmath>
#include <algorithm> #include <algorithm>
#include "HTMLRenderer.h" #include "HTMLRenderer.h"
@ -22,6 +23,7 @@ namespace pdf2htmlEX {
using std::max; using std::max;
using std::abs; using std::abs;
using std::hypot;
void HTMLRenderer::updateAll(GfxState * state) void HTMLRenderer::updateAll(GfxState * state)
{ {
@ -82,7 +84,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
bool need_recheck_position = false; bool need_recheck_position = false;
bool need_rescale_font = false; bool need_rescale_font = false;
bool draw_scale_changed = false; bool draw_text_scale_changed = false;
// text position // text position
// we've been tracking the text position positively in the update*** functions // we've been tracking the text position positively in the update*** functions
@ -98,7 +100,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
if(!(new_font_info->id == cur_font_info->id)) if(!(new_font_info->id == cur_font_info->id))
{ {
new_line_state = max(new_line_state, NLS_SPAN); new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
cur_font_info = new_font_info; cur_font_info = new_font_info;
} }
@ -112,7 +114,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
// backup the current ctm for need_recheck_position // backup the current ctm for need_recheck_position
double old_ctm[6]; double old_ctm[6];
memcpy(old_ctm, cur_ctm, sizeof(old_ctm)); memcpy(old_ctm, cur_text_tm, sizeof(old_ctm));
// ctm & text ctm & hori scale // ctm & text ctm & hori scale
if(all_changed || ctm_changed || text_mat_changed || hori_scale_changed) if(all_changed || ctm_changed || text_mat_changed || hori_scale_changed)
@ -131,52 +133,52 @@ void HTMLRenderer::check_state_change(GfxState * state)
new_ctm[5] = m1[1] * m2[4] + m1[3] * m2[5] + m1[5]; new_ctm[5] = m1[1] * m2[4] + m1[3] * m2[5] + m1[5];
//new_ctm[4] = new_ctm[5] = 0; //new_ctm[4] = new_ctm[5] = 0;
if(!_tm_equal(new_ctm, cur_ctm)) if(!_tm_equal(new_ctm, cur_text_tm))
{ {
need_recheck_position = true; need_recheck_position = true;
need_rescale_font = true; need_rescale_font = true;
memcpy(cur_ctm, new_ctm, sizeof(cur_ctm)); memcpy(cur_text_tm, new_ctm, sizeof(cur_text_tm));
} }
} }
// draw_ctm, draw_scale // draw_text_tm, draw_text_scale
// depends: font size & ctm & text_ctm & hori scale // depends: font size & ctm & text_ctm & hori scale
if(need_rescale_font) if(need_rescale_font)
{ {
double new_draw_ctm[6]; double new_draw_text_tm[6];
memcpy(new_draw_ctm, cur_ctm, sizeof(new_draw_ctm)); memcpy(new_draw_text_tm, cur_text_tm, sizeof(new_draw_text_tm));
double new_draw_scale = 1.0/scale_factor2 * sqrt(new_draw_ctm[2] * new_draw_ctm[2] + new_draw_ctm[3] * new_draw_ctm[3]); double new_draw_text_scale = 1.0/text_scale_factor2 * hypot(new_draw_text_tm[2], new_draw_text_tm[3]);
double new_draw_font_size = cur_font_size; double new_draw_font_size = cur_font_size;
if(_is_positive(new_draw_scale)) if(_is_positive(new_draw_text_scale))
{ {
new_draw_font_size *= new_draw_scale; new_draw_font_size *= new_draw_text_scale;
for(int i = 0; i < 4; ++i) for(int i = 0; i < 4; ++i)
new_draw_ctm[i] /= new_draw_scale; new_draw_text_tm[i] /= new_draw_text_scale;
} }
else else
{ {
new_draw_scale = 1.0; new_draw_text_scale = 1.0;
} }
if(!(_equal(new_draw_scale, draw_scale))) if(!(_equal(new_draw_text_scale, draw_text_scale)))
{ {
draw_scale_changed = true; draw_text_scale_changed = true;
draw_scale = new_draw_scale; draw_text_scale = new_draw_text_scale;
} }
if(!(_equal(new_draw_font_size, draw_font_size))) if(!(_equal(new_draw_font_size, draw_font_size)))
{ {
new_line_state = max(new_line_state, NLS_SPAN); new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
draw_font_size = new_draw_font_size; draw_font_size = new_draw_font_size;
cur_fs_id = install_font_size(draw_font_size); cur_fs_id = install_font_size(draw_font_size);
} }
if(!(_tm_equal(new_draw_ctm, draw_ctm, 4))) if(!(_tm_equal(new_draw_text_tm, draw_text_tm, 4)))
{ {
new_line_state = max(new_line_state, NLS_DIV); new_line_state = max<NewLineState>(new_line_state, NLS_DIV);
memcpy(draw_ctm, new_draw_ctm, sizeof(draw_ctm)); memcpy(draw_text_tm, new_draw_text_tm, sizeof(draw_text_tm));
cur_tm_id = install_transform_matrix(draw_ctm); cur_ttm_id = install_transform_matrix(draw_text_tm);
} }
} }
@ -198,29 +200,29 @@ void HTMLRenderer::check_state_change(GfxState * state)
*/ */
bool merged = false; bool merged = false;
if(_tm_equal(old_ctm, cur_ctm, 4)) if(_tm_equal(old_ctm, cur_text_tm, 4))
{ {
double dy = cur_ty - draw_ty; double dy = cur_ty - draw_ty;
double tdx = old_ctm[4] - cur_ctm[4] - cur_ctm[2] * dy; double tdx = old_ctm[4] - cur_text_tm[4] - cur_text_tm[2] * dy;
double tdy = old_ctm[5] - cur_ctm[5] - cur_ctm[3] * dy; double tdy = old_ctm[5] - cur_text_tm[5] - cur_text_tm[3] * dy;
if(_equal(cur_ctm[0] * tdy, cur_ctm[1] * tdx)) if(_equal(cur_text_tm[0] * tdy, cur_text_tm[1] * tdx))
{ {
if(abs(cur_ctm[0]) > EPS) if(_is_positive(cur_text_tm[0]))
{ {
draw_tx += tdx / cur_ctm[0]; draw_tx += tdx / cur_text_tm[0];
draw_ty += dy; draw_ty += dy;
merged = true; merged = true;
} }
else if (abs(cur_ctm[1]) > EPS) else if (_is_positive(cur_text_tm[1]))
{ {
draw_tx += tdy / cur_ctm[1]; draw_tx += tdy / cur_text_tm[1];
draw_ty += dy; draw_ty += dy;
merged = true; merged = true;
} }
else else
{ {
if((abs(tdx) < EPS) && (abs(tdy) < EPS)) if((_equal(tdx,0)) && (_equal(tdy,0)))
{ {
// free // free
draw_tx = cur_tx; draw_tx = cur_tx;
@ -236,33 +238,33 @@ void HTMLRenderer::check_state_change(GfxState * state)
if(!merged) if(!merged)
{ {
new_line_state = max(new_line_state, NLS_DIV); new_line_state = max<NewLineState>(new_line_state, NLS_DIV);
} }
} }
// letter space // letter space
// depends: draw_scale // depends: draw_text_scale
if(all_changed || letter_space_changed || draw_scale_changed) if(all_changed || letter_space_changed || draw_text_scale_changed)
{ {
double new_letter_space = state->getCharSpace(); double new_letter_space = state->getCharSpace();
if(!_equal(cur_letter_space, new_letter_space)) if(!_equal(cur_letter_space, new_letter_space))
{ {
new_line_state = max(new_line_state, NLS_SPAN); new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
cur_letter_space = new_letter_space; cur_letter_space = new_letter_space;
cur_ls_id = install_letter_space(cur_letter_space * draw_scale); cur_ls_id = install_letter_space(cur_letter_space * draw_text_scale);
} }
} }
// word space // word space
// depends draw_scale // depends draw_text_scale
if(all_changed || word_space_changed || draw_scale_changed) if(all_changed || word_space_changed || draw_text_scale_changed)
{ {
double new_word_space = state->getWordSpace(); double new_word_space = state->getWordSpace();
if(!_equal(cur_word_space, new_word_space)) if(!_equal(cur_word_space, new_word_space))
{ {
new_line_state = max(new_line_state, NLS_SPAN); new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
cur_word_space = new_word_space; cur_word_space = new_word_space;
cur_ws_id = install_word_space(cur_word_space * draw_scale); cur_ws_id = install_word_space(cur_word_space * draw_text_scale);
} }
} }
@ -273,22 +275,22 @@ void HTMLRenderer::check_state_change(GfxState * state)
state->getFillRGB(&new_color); state->getFillRGB(&new_color);
if(!((new_color.r == cur_color.r) && (new_color.g == cur_color.g) && (new_color.b == cur_color.b))) if(!((new_color.r == cur_color.r) && (new_color.g == cur_color.g) && (new_color.b == cur_color.b)))
{ {
new_line_state = max(new_line_state, NLS_SPAN); new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
cur_color = new_color; cur_color = new_color;
cur_color_id = install_color(&new_color); cur_color_id = install_color(&new_color);
} }
} }
// rise // rise
// depends draw_scale // depends draw_text_scale
if(all_changed || rise_changed || draw_scale_changed) if(all_changed || rise_changed || draw_text_scale_changed)
{ {
double new_rise = state->getRise(); double new_rise = state->getRise();
if(!_equal(cur_rise, new_rise)) if(!_equal(cur_rise, new_rise))
{ {
new_line_state = max(new_line_state, NLS_SPAN); new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
cur_rise = new_rise; cur_rise = new_rise;
cur_rise_id = install_rise(new_rise * draw_scale); cur_rise_id = install_rise(new_rise * draw_text_scale);
} }
} }
@ -312,7 +314,7 @@ void HTMLRenderer::reset_state_change()
color_changed = false; color_changed = false;
} }
void HTMLRenderer::prepare_line(GfxState * state) void HTMLRenderer::prepare_text_line(GfxState * state)
{ {
if(!line_opened) if(!line_opened)
{ {
@ -321,7 +323,7 @@ void HTMLRenderer::prepare_line(GfxState * state)
if(new_line_state == NLS_DIV) if(new_line_state == NLS_DIV)
{ {
close_line(); close_text_line();
line_buf.reset(state); line_buf.reset(state);
@ -333,7 +335,7 @@ void HTMLRenderer::prepare_line(GfxState * state)
{ {
// align horizontal position // align horizontal position
// try to merge with the last line if possible // try to merge with the last line if possible
double target = (cur_tx - draw_tx) * draw_scale; double target = (cur_tx - draw_tx) * draw_text_scale;
if(abs(target) < param->h_eps) if(abs(target) < param->h_eps)
{ {
// ignore it // ignore it
@ -341,7 +343,7 @@ void HTMLRenderer::prepare_line(GfxState * state)
else else
{ {
line_buf.append_offset(target); line_buf.append_offset(target);
draw_tx += target / draw_scale; draw_tx += target / draw_text_scale;
} }
} }
@ -353,7 +355,7 @@ void HTMLRenderer::prepare_line(GfxState * state)
line_opened = true; line_opened = true;
} }
void HTMLRenderer::close_line() void HTMLRenderer::close_text_line()
{ {
if(line_opened) if(line_opened)
{ {

View File

@ -310,7 +310,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
memset(width_list, -1, 0x10000 * sizeof(*width_list)); memset(width_list, -1, 0x10000 * sizeof(*width_list));
if(code2GID) if(code2GID)
maxcode = min(maxcode, code2GID_len - 1); maxcode = min<int>(maxcode, code2GID_len - 1);
bool is_truetype = is_truetype_suffix(suffix); bool is_truetype = is_truetype_suffix(suffix);
int max_key = maxcode; int max_key = maxcode;
@ -484,7 +484,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
// see if the line has to be closed due to state change // see if the line has to be closed due to state change
check_state_change(state); check_state_change(state);
prepare_line(state); prepare_text_line(state);
// Now ready to output // Now ready to output
// get the unicodes // get the unicodes
@ -522,7 +522,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
if(is_space && (param->space_as_offset)) if(is_space && (param->space_as_offset))
{ {
// ignore horiz_scaling, as it's merged in CTM // ignore horiz_scaling, as it's merged in CTM
line_buf.append_offset((dx1 * cur_font_size + cur_letter_space + cur_word_space) * draw_scale); line_buf.append_offset((dx1 * cur_font_size + cur_letter_space + cur_word_space) * draw_text_scale);
} }
else else
{ {

View File

@ -82,8 +82,8 @@ void Preprocessor::drawChar(GfxState *state, double x, double y,
void Preprocessor::startPage(int pageNum, GfxState *state) void Preprocessor::startPage(int pageNum, GfxState *state)
{ {
max_width = max(max_width, state->getPageWidth()); max_width = max<double>(max_width, state->getPageWidth());
max_height = max(max_height, state->getPageHeight()); max_height = max<double>(max_height, state->getPageHeight());
} }
const char * Preprocessor::get_code_map (long long font_id) const const char * Preprocessor::get_code_map (long long font_id) const

View File

@ -0,0 +1,40 @@
/*
* SplashBackgroundRenderer.cc
*
* Copyright (C) 2012 Lu Wang <coolwanglu@gmail.com>
*/
#include <PDFDoc.h>
#include "SplashBackgroundRenderer.h"
namespace pdf2htmlEX {
using std::string;
const SplashColor SplashBackgroundRenderer::white = {255,255,255};
void SplashBackgroundRenderer::drawChar(GfxState *state, double x, double y,
double dx, double dy,
double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen)
{
// SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen);
}
static GBool annot_cb(Annot *, void *) {
return false;
};
void SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno, const string & filename)
{
doc->displayPage(this, pageno, param->h_dpi, param->v_dpi,
0, true, false, false,
nullptr, nullptr, &annot_cb, nullptr);
getBitmap()->writeImgFile(splashFormatPng,
(char*)filename.c_str(),
param->h_dpi, param->v_dpi);
}
} // namespace pdf2htmlEX

View File

@ -332,15 +332,11 @@ void ffw_set_widths(int * width_list, int mapping_len, int stretch_narrow, int s
SplineChar * sc = sf->glyphs[j]; SplineChar * sc = sf->glyphs[j];
if(sc == NULL) continue; if(sc == NULL) continue;
DBounds bb; if(((sc->width > EPS)
SplineCharFindBounds(sc, &bb); && (((sc->width > width_list[i] + EPS) && (squeeze_wide))
|| ((sc->width < width_list[i] - EPS) && (stretch_narrow)))))
double glyph_width = bb.maxx - bb.minx;
if((glyph_width > EPS)
&& (((glyph_width > width_list[i] + EPS) && (squeeze_wide))
|| ((glyph_width < width_list[i] - EPS) && (stretch_narrow))))
{ {
real transform[6]; transform[0] = ((double)width_list[i]) / glyph_width; real transform[6]; transform[0] = ((double)width_list[i]) / (sc->width);
transform[3] = 1.0; transform[3] = 1.0;
transform[1] = transform[2] = transform[4] = transform[5] = 0; transform[1] = transform[2] = transform[4] = transform[5] = 0;
FVTrans(cur_fv, sc, transform, NULL, fvt_alllayers | fvt_dontmovewidth); FVTrans(cur_fv, sc, transform, NULL, fvt_alllayers | fvt_dontmovewidth);

View File

@ -2,37 +2,31 @@
* Background renderer * Background renderer
* Render all those things not supported as Image * Render all those things not supported as Image
* *
* by WangLu * Copyright (C) 2012 Lu Wang <coolwanglu@gmail.com>
* 2012.08.06
*/ */
#ifndef BACKGROUND_RENDERER_H__ #ifndef BACKGROUND_RENDERER_H__
#define BACKGROUND_RENDERER_H__ #define BACKGROUND_RENDERER_H__
#include <SplashOutputDev.h> #include "pdf2htmlEX-config.h"
#if HAVE_CAIRO
#include "CairoBackgroundRenderer.h"
namespace pdf2htmlEX { namespace pdf2htmlEX {
typedef CairoBackgroundRenderer BackgroundRenderer;
// Based on BackgroundRenderer from poppler
class BackgroundRenderer : public SplashOutputDev {
public:
BackgroundRenderer(SplashColorMode colorModeA, int bitmapRowPadA,
GBool reverseVideoA, SplashColorPtr paperColorA,
GBool bitmapTopDownA = gTrue,
GBool allowAntialiasA = gTrue)
: SplashOutputDev(colorModeA,
bitmapRowPadA, reverseVideoA, paperColorA, bitmapTopDownA,
allowAntialiasA)
{ }
virtual ~BackgroundRenderer() { }
virtual void drawChar(GfxState *state, double x, double y,
double dx, double dy,
double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen);
};
} }
#else
#include "SplashBackgroundRenderer.h"
namespace pdf2htmlEX {
typedef SplashBackgroundRenderer BackgroundRenderer;
}
#endif // HAVE_CAIRO
#endif //BACKGROUND_RENDERER_H__ #endif //BACKGROUND_RENDERER_H__

View File

@ -0,0 +1,42 @@
/*
* Cairo Background renderer
* Render all those things not supported as Image, with Cairo
*
* Copyright (C) 2012 Lu Wang <coolwanglu@gmail.com>
*/
#ifndef CAIRO_BACKGROUND_RENDERER_H__
#define CAIRO_BACKGROUND_RENDERER_H__
#include <CairoOutputDev.h>
#include "Param.h"
namespace pdf2htmlEX {
// Based on BackgroundRenderer from poppler
class CairoBackgroundRenderer : public CairoOutputDev
{
public:
CairoBackgroundRenderer(const Param * param)
:CairoOutputDev()
, param(param)
{ }
virtual ~CairoBackgroundRenderer() { }
virtual void drawChar(GfxState *state, double x, double y,
double dx, double dy,
double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen);
void render_page(PDFDoc * doc, int pageno, const std::string & filename);
protected:
const Param * param;
};
}
#endif //CAIRO_BACKGROUND_RENDERER_H__

View File

@ -42,6 +42,8 @@
* j - Js data * j - Js data
* p - Page * p - Page
* *
* Cd - CSS Draw
*
* Reusable CSS classes * Reusable CSS classes
* *
* t<hex> - Transform matrix * t<hex> - Transform matrix
@ -77,12 +79,18 @@ class HTMLRenderer : public OutputDev
// Does this device use drawChar() or drawString()? // Does this device use drawChar() or drawString()?
virtual GBool useDrawChar() { return gFalse; } virtual GBool useDrawChar() { return gFalse; }
// Does this device use functionShadedFill(), axialShadedFill(), and
// radialShadedFill()? If this returns false, these shaded fills
// will be reduced to a series of other drawing operations.
virtual GBool useShadedFills(int type) { return type == 2; }
// Does this device use beginType3Char/endType3Char? Otherwise, // Does this device use beginType3Char/endType3Char? Otherwise,
// text in Type 3 fonts will be drawn with drawChar/drawString. // text in Type 3 fonts will be drawn with drawChar/drawString.
virtual GBool interpretType3Chars() { return gFalse; } virtual GBool interpretType3Chars() { return gFalse; }
// Does this device need non-text content? // Does this device need non-text content?
virtual GBool needNonText() { return gFalse; } virtual GBool needNonText() { return gTrue; }
virtual void setDefaultCTM(double *ctm); virtual void setDefaultCTM(double *ctm);
@ -121,6 +129,10 @@ class HTMLRenderer : public OutputDev
virtual void drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg); virtual void drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg);
virtual void stroke(GfxState *state) { css_draw(state, false); }
virtual void fill(GfxState *state) { css_draw(state, true); }
virtual GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax);
virtual void processLink(AnnotLink * al); virtual void processLink(AnnotLink * al);
protected: protected:
@ -190,8 +202,25 @@ class HTMLRenderer : public OutputDev
void reset_state_change(); void reset_state_change();
// prepare the line context, (close old tags, open new tags) // prepare the line context, (close old tags, open new tags)
// make sure the current HTML style consistent with PDF // make sure the current HTML style consistent with PDF
void prepare_line(GfxState * state); void prepare_text_line(GfxState * state);
void close_line(); void close_text_line();
////////////////////////////////////////////////////
// CSS drawing
////////////////////////////////////////////////////
void css_draw(GfxState *state, bool fill);
/*
* coordinates are to transformed by state->getCTM()
* (x,y) should be the bottom-left corner INCLUDING border
* w,h should be the metrics WITHOUT border
*
* line_color & fill_color may be specified as nullptr to indicate none
* style_function & style_function_data may be provided to provide more styles
*/
void css_draw_rectangle(double x, double y, double w, double h, const double * tm,
double * line_width_array, int line_width_count,
const GfxRGB * line_color, const GfxRGB * fill_color,
void (*style_function)(void *, std::ostream &) = nullptr, void * style_function_data = nullptr );
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
@ -216,9 +245,9 @@ class HTMLRenderer : public OutputDev
* factor1 & factor 2 are determined according to zoom and font-size-multiplier * factor1 & factor 2 are determined according to zoom and font-size-multiplier
* *
*/ */
double zoom_factor (void) const { return scale_factor1 * scale_factor2; } double text_zoom_factor (void) const { return text_scale_factor1 * text_scale_factor2; }
double scale_factor1; double text_scale_factor1;
double scale_factor2; double text_scale_factor2;
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
@ -246,16 +275,15 @@ class HTMLRenderer : public OutputDev
bool font_changed; bool font_changed;
// transform matrix // transform matrix
long long cur_tm_id; long long cur_ttm_id;
bool ctm_changed; bool ctm_changed;
bool text_mat_changed; bool text_mat_changed;
// horizontal scaling // horizontal scaling
bool hori_scale_changed; bool hori_scale_changed;
// this is CTM * TextMAT in PDF, not only CTM // this is CTM * TextMAT in PDF
// [4] and [5] are ignored, // [4] and [5] are ignored,
// as we'll calculate the position of the origin separately // as we'll calculate the position of the origin separately
// TODO: changed this for images double cur_text_tm[6]; // unscaled
double cur_ctm[6]; // unscaled
// letter spacing // letter spacing
long long cur_ls_id; long long cur_ls_id;
@ -281,11 +309,11 @@ class HTMLRenderer : public OutputDev
// we try to render the final font size directly // we try to render the final font size directly
// to reduce the effect of ctm as much as possible // to reduce the effect of ctm as much as possible
// draw_ctm is cur_ctm scaled by 1/draw_scale, // draw_ctm is cur_ctm scaled by 1/draw_text_scale,
// so everything redenered should be multiplied by draw_scale // so everything redenered should be multiplied by draw_text_scale
double draw_ctm[6]; double draw_text_tm[6];
double draw_font_size; double draw_font_size;
double draw_scale; double draw_text_scale;
// the position of next char, in text coords // the position of next char, in text coords
// this is actual position (in HTML), which might be different from cur_tx/ty (in PDF) // this is actual position (in HTML), which might be different from cur_tx/ty (in PDF)
@ -376,7 +404,7 @@ class HTMLRenderer : public OutputDev
std::unordered_map<long long, FontInfo> font_name_map; std::unordered_map<long long, FontInfo> font_name_map;
std::map<double, long long> font_size_map; std::map<double, long long> font_size_map;
std::map<TM, long long> transform_matrix_map; std::map<Matrix, long long, Matrix_less> transform_matrix_map;
std::map<double, long long> letter_space_map; std::map<double, long long> letter_space_map;
std::map<double, long long> word_space_map; std::map<double, long long> word_space_map;
std::unordered_map<GfxRGB, long long, GfxRGB_hash, GfxRGB_equal> color_map; std::unordered_map<GfxRGB, long long, GfxRGB_hash, GfxRGB_equal> color_map;

View File

@ -55,6 +55,9 @@ struct Param
int debug; int debug;
int clean_tmp; int clean_tmp;
// experimental
int css_draw;
}; };
} // namespace pdf2htmlEX } // namespace pdf2htmlEX

View File

@ -0,0 +1,48 @@
/*
* Splash Background renderer
* Render all those things not supported as Image, with Splash
*
* by WangLu
* 2012.08.06
*/
#ifndef SPLASH_BACKGROUND_RENDERER_H__
#define SPLASH_BACKGROUND_RENDERER_H__
#include <string>
#include <splash/SplashBitmap.h>
#include <SplashOutputDev.h>
#include "Param.h"
namespace pdf2htmlEX {
// Based on BackgroundRenderer from poppler
class SplashBackgroundRenderer : public SplashOutputDev
{
public:
static const SplashColor white;
SplashBackgroundRenderer(const Param * param)
: SplashOutputDev(splashModeRGB8, 4, gFalse, (SplashColorPtr)&white, gTrue, gTrue)
, param(param)
{ }
virtual ~SplashBackgroundRenderer() { }
virtual void drawChar(GfxState *state, double x, double y,
double dx, double dy,
double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen);
void render_page(PDFDoc * doc, int pageno, const std::string & filename);
protected:
const Param * param;
};
} // namespace pdf2htmlEX
#endif // SPLASH_BACKGROUND_RENDERER_H__

View File

@ -0,0 +1,24 @@
/*
* config.h
* Compile time constants
*
* by WangLu
*/
#ifndef PDF2HTMLEX_CONFIG_H__
#define PDF2HTMLEX_CONFIG_H__
#include <string>
#define HAVE_CAIRO 0
namespace pdf2htmlEX {
static const std::string PDF2HTMLEX_VERSION = "0.5";
static const std::string PDF2HTMLEX_PREFIX = "/usr/local";
static const std::string PDF2HTMLEX_DATA_PATH = "/usr/local""/share/pdf2htmlEX";
} // namespace pdf2htmlEX
#endif //PDF2HTMLEX_CONFIG_H__

View File

@ -11,6 +11,8 @@
#include <string> #include <string>
#define HAVE_CAIRO @HAVE_CAIRO@
namespace pdf2htmlEX { namespace pdf2htmlEX {
static const std::string PDF2HTMLEX_VERSION = "@PDF2HTMLEX_VERSION@"; static const std::string PDF2HTMLEX_VERSION = "@PDF2HTMLEX_VERSION@";

View File

@ -47,6 +47,9 @@ static inline bool _tm_equal(const double * tm1, const double * tm2, int size =
return true; return true;
} }
void _tm_transform(const double * tm, double & x, double & y, bool is_delta = false);
void _tm_multiply(double * tm_left, const double * tm_right);
static inline long long hash_ref(const Ref * id) static inline long long hash_ref(const Ref * id)
{ {
return (((long long)(id->num)) << (sizeof(id->gen)*8)) | (id->gen); return (((long long)(id->num)) << (sizeof(id->gen)*8)) | (id->gen);
@ -102,28 +105,21 @@ public:
bool has_space; // whether space is included in the font bool has_space; // whether space is included in the font
}; };
// wrapper of the transform matrix double[6] class Matrix_less
// Transform Matrix
class TM
{ {
public: public:
TM() {} bool operator () (const Matrix & m1, const Matrix & m2) const
TM(const double * m) {memcpy(_, m, sizeof(_));} {
bool operator < (const TM & m) const {
// Note that we only care about the first 4 elements // Note that we only care about the first 4 elements
for(int i = 0; i < 4; ++i) for(int i = 0; i < 4; ++i)
{ {
if(_[i] < m._[i] - EPS) if(m1.m[i] < m2.m[i] - EPS)
return true; return true;
if(_[i] > m._[i] + EPS) if(m1.m[i] > m2.m[i] + EPS)
return false; return false;
} }
return false; return false;
} }
bool operator == (const TM & m) const {
return _tm_equal(_, m._, 4);
}
double _[6];
}; };
class base64stream class base64stream
@ -201,7 +197,7 @@ public:
va_end(vlist); va_end(vlist);
if(l >= (int)buf.capacity()) if(l >= (int)buf.capacity())
{ {
buf.reserve(std::max((long)(l+1), (long)buf.capacity() * 2)); buf.reserve(std::max<long>((long)(l+1), (long)buf.capacity() * 2));
va_start(vlist, format); va_start(vlist, format);
l = vsnprintf(&buf.front(), buf.capacity(), format, vlist); l = vsnprintf(&buf.front(), buf.capacity(), format, vlist);
va_end(vlist); va_end(vlist);
@ -223,5 +219,17 @@ bool is_truetype_suffix(const std::string & suffix);
std::string get_filename(const std::string & path); std::string get_filename(const std::string & path);
std::string get_suffix(const std::string & path); std::string get_suffix(const std::string & path);
/*
* In PDF, edges of the rectangle are in the middle of the borders
* In HTML, edges are completely outside the rectangle
*/
void css_fix_rectangle_border_width(double x1, double y1, double x2, double y2,
double border_width,
double & x, double & y, double & w, double & h,
double & border_top_bottom_width,
double & border_left_right_width);
std::ostream & operator << (std::ostream & out, const GfxRGB & rgb);
} // namespace util } // namespace util
#endif //UTIL_H__ #endif //UTIL_H__

View File

@ -82,7 +82,7 @@ void parse_options (int argc, char **argv)
.add("tounicode", &param.tounicode, 0, "Specify how to deal with ToUnicode map, 0 for auto, 1 for forced, -1 for disabled") .add("tounicode", &param.tounicode, 0, "Specify how to deal with ToUnicode map, 0 for auto, 1 for forced, -1 for disabled")
.add("space-as-offset", &param.space_as_offset, 0, "treat space characters as offsets") .add("space-as-offset", &param.space_as_offset, 0, "treat space characters as offsets")
.add("stretch_narrow_glyph", &param.stretch_narrow_glyph, 0, "stretch narrow glyphs instead of padding space") .add("stretch_narrow_glyph", &param.stretch_narrow_glyph, 0, "stretch narrow glyphs instead of padding space")
.add("squeeze_wide_glyph", &param.squeeze_wide_glyph, 0, "squeeze wide glyphs instead of truncating") .add("squeeze-wide-glyph", &param.squeeze_wide_glyph, 0, "squeeze wide glyphs instead of truncating")
.add("css-filename", &param.css_filename, "", "Specify the file name of the generated css file") .add("css-filename", &param.css_filename, "", "Specify the file name of the generated css file")
.add("font-suffix", &param.font_suffix, ".ttf", "suffix for extracted font files") .add("font-suffix", &param.font_suffix, ".ttf", "suffix for extracted font files")
@ -91,6 +91,7 @@ void parse_options (int argc, char **argv)
.add("debug", &param.debug, 0, "output debug information") .add("debug", &param.debug, 0, "output debug information")
.add("clean-tmp", &param.clean_tmp, 1, "clean temporary files after processing") .add("clean-tmp", &param.clean_tmp, 1, "clean temporary files after processing")
.add("css-draw", &param.css_draw, 0, "[Experimental and Unsupported] CSS Drawing")
.add("", &param.input_filename, "", "") .add("", &param.input_filename, "", "")
.add("", &param.output_filename, "", "") .add("", &param.output_filename, "", "")
; ;
@ -182,8 +183,8 @@ int main(int argc, char **argv)
throw "Copying of text from this document is not allowed."; throw "Copying of text from this document is not allowed.";
} }
param.first_page = min(max(param.first_page, 1), doc->getNumPages()); param.first_page = min<int>(max<int>(param.first_page, 1), doc->getNumPages());
param.last_page = min(max(param.last_page, param.first_page), doc->getNumPages()); param.last_page = min<int>(max<int>(param.last_page, param.first_page), doc->getNumPages());
if(param.output_filename == "") if(param.output_filename == "")
{ {

View File

@ -54,6 +54,31 @@ const std::map<std::pair<std::string, bool>, std::pair<std::string, std::string>
{{".js", 1}, {"<script type=\"text/javascript\">", "</script>"}} {{".js", 1}, {"<script type=\"text/javascript\">", "</script>"}}
}); });
void _tm_transform(const double * tm, double & x, double & y, bool is_delta)
{
double xx = x, yy = y;
x = tm[0] * xx + tm[2] * yy;
y = tm[1] * xx + tm[3] * yy;
if(!is_delta)
{
x += tm[4];
y += tm[5];
}
}
void _tm_multiply(double * tm_left, const double * tm_right)
{
double old[4];
memcpy(old, tm_left, sizeof(old));
tm_left[0] = old[0] * tm_right[0] + old[2] * tm_right[1];
tm_left[1] = old[1] * tm_right[0] + old[3] * tm_right[1];
tm_left[2] = old[0] * tm_right[2] + old[2] * tm_right[3];
tm_left[3] = old[1] * tm_right[2] + old[3] * tm_right[3];
tm_left[4] += old[0] * tm_right[4] + old[2] * tm_right[5];
tm_left[5] += old[1] * tm_right[4] + old[3] * tm_right[5];
}
bool isLegalUnicode(Unicode u) bool isLegalUnicode(Unicode u)
{ {
/* /*
@ -249,4 +274,49 @@ string get_suffix(const string & path)
} }
} }
void css_fix_rectangle_border_width(double x1, double y1,
double x2, double y2,
double border_width,
double & x, double & y, double & w, double & h,
double & border_top_bottom_width,
double & border_left_right_width)
{
w = x2 - x1;
if(w > border_width)
{
w -= border_width;
border_left_right_width = border_width;
}
else
{
border_left_right_width = border_width + w/2;
w = 0;
}
x = x1 - border_width / 2;
h = y2 - y1;
if(h > border_width)
{
h -= border_width;
border_top_bottom_width = border_width;
}
else
{
border_top_bottom_width = border_width + h/2;
h = 0;
}
y = y1 - border_width / 2;
}
ostream & operator << (ostream & out, const GfxRGB & rgb)
{
auto flags= out.flags();
out << std::dec << "rgb("
<< (int)colToByte(rgb.r) << ","
<< (int)colToByte(rgb.g) << ","
<< (int)colToByte(rgb.b) << ")";
out.flags(flags);
return out;
}
} // namespace pdf2htmlEX } // namespace pdf2htmlEX

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
DIR = 'pdf' DIR = 'pdf'
DIR = '../../pdf.js/test/pdfs'
import os import os
@ -11,7 +12,8 @@ with open('out.html','w') as outf:
if not f.lower().endswith('.pdf'): if not f.lower().endswith('.pdf'):
continue continue
print f print f
os.system('pdf2htmlEX --dest-dir html --auto-hint=1 --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) #os.system('pdf2htmlEX --dest-dir html --auto-hint=1 --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f))
os.system('pdf2htmlEX --dest-dir html --process-nontext 0 --css-draw 1 "%s/%s"' % (DIR,f))
ff = f[:-3] ff = f[:-3]
outf.write('<a href="html/%shtml" target="pdf">%s</a><br/>' % (ff,ff)) outf.write('<a href="html/%shtml" target="pdf">%s</a><br/>' % (ff,ff))
outf.flush(); outf.flush();