1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 04:50:09 +00:00

Merge branch 'devv'

This commit is contained in:
Lu Wang 2012-10-03 12:54:24 +08:00
commit 0eb1f962f7
28 changed files with 894 additions and 253 deletions

View File

@ -20,6 +20,22 @@ include_directories(${POPPLER_INCLUDE_DIRS})
link_directories(${POPPLER_LIBRARY_DIRS})
set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${POPPLER_LIBRARIES})
# disable CAIRO for now
if(0)
pkg_check_modules(POPPLER_CAIRO poppler-cairo>=0.20.0)
if(POPPLER_CAIRO_FOUND)
set(HAVE_CAIRO 1)
include_directories(${POPPLER_CAIRO_INCLUDE_DIRS})
link_directories(${POPPLER_CAIRO_LIBRARY_DIRS})
set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${POPPLER_CAIRO_LIBRARIES})
else()
set(HAVE_CAIRO 0)
endif()
else()
set(HAVE_CAIRO 0)
endif()
# fontforge starts using pkg-config 'correctly' since 2.0.0
pkg_check_modules(FONTFORGE libfontforge>=2.0.0)
if(FONTFORGE_FOUND)
@ -127,13 +143,17 @@ add_executable(pdf2htmlEX
src/HTMLRenderer/export.cc
src/HTMLRenderer/text.cc
src/HTMLRenderer/image.cc
src/HTMLRenderer/draw.cc
src/HTMLRenderer/link.cc
src/include/namespace.h
src/HTMLRenderer/LineBuffer.cc
src/include/ffw.h
src/ffw.c
src/include/BackgroundRenderer.h
src/BackgroundRenderer.cc
src/include/SplashBackgroundRenderer.h
src/SplashBackgroundRenderer.cc
src/include/CairoBackgroundRenderer.h
src/CairoBackgroundRenderer.cc
src/include/Preprocessor.h
src/Preprocessor.cc
src/include/util.h

View File

@ -38,6 +38,7 @@ This program is designed for scientific papers with complicate formulas and figu
* Proper styling (Color, Transformation...)
* Links
* Optimization for Web
* [EXPERIMENTAL] Path drawing with CSS
### Not supported yet
@ -89,6 +90,12 @@ I have tested with CYGWIN without any problem, and I believe it also works on Mi
pdf2htmlEX --help
### For Geeks
* Experimental and unsupported
pdf2htmlEX --process-nontext 0 --css-draw 0 /path/to/foobar.pdf
## FAQ
[here](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ)

5
TODO
View File

@ -1,3 +1,6 @@
Integrate splash/cairo
native support for image
native support for draw
about glyph width:
- IE
@ -29,5 +32,3 @@ combine lines (unwarp)
Printing
multiple charcode mapped to a same glyph
check if we can add information to the font, and let browsers show ligatures automatically
native support for image
native support for draw

View File

@ -81,4 +81,12 @@ span {
}
.a {
}
.Cd {
position:absolute;
transform-origin:0% 100%;
-ms-transform-origin:0% 100%;
-moz-transform-origin:0% 100%;
-webkit-transform-origin:0% 100%;
-o-transform-origin:0% 100%;
}
/* Base CSS END */

View File

@ -1,23 +0,0 @@
/*
* BackgroundRenderer.cc
*
* Copyright (C) 2012 by Lu Wang coolwanglu<at>gmail.com
*/
#include <algorithm>
#include "GfxFont.h"
#include "BackgroundRenderer.h"
#include "util.h"
using namespace pdf2htmlEX;
void BackgroundRenderer::drawChar(GfxState *state, double x, double y,
double dx, double dy,
double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen)
{
// SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen);
}

View File

@ -0,0 +1,30 @@
/*
* CairoBackgroundRenderer.cc
*
* Copyright (C) 2012 Lu Wang <coolwanglu@gmail.com>
*/
#include "pdf2htmlEX-config.h"
#if HAVE_CAIRO
#include "CairoBackgroundRenderer.h"
namespace pdf2htmlEX {
void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y,
double dx, double dy,
double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen)
{
// CairoOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen);
}
void CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno, const std::string & filename)
{
}
} // namespace pdf2htmlEX
#endif // HAVE_CAIRO

View File

@ -22,7 +22,7 @@ using std::ostream;
void HTMLRenderer::LineBuffer::reset(GfxState * state)
{
state->transform(state->getCurX(), state->getCurY(), &x, &y);
tm_id = renderer->cur_tm_id;
tm_id = renderer->cur_ttm_id;
}
void HTMLRenderer::LineBuffer::append_unicodes(const Unicode * u, int l)
@ -75,7 +75,7 @@ void HTMLRenderer::LineBuffer::flush(void)
for(auto iter = states.begin(); iter != states.end(); ++iter)
{
const auto & s = *iter;
max_ascent = max(max_ascent, s.ascent * s.draw_font_size);
max_ascent = max<double>(max_ascent, s.ascent * s.draw_font_size);
}
ostream & out = renderer->html_fout;
@ -155,7 +155,7 @@ void HTMLRenderer::LineBuffer::flush(void)
++ cur_offset_iter;
}
size_t next_text_idx = min(cur_state_iter->start_idx, cur_offset_iter->start_idx);
size_t next_text_idx = min<size_t>(cur_state_iter->start_idx, cur_offset_iter->start_idx);
outputUnicodes(out, (&text.front()) + cur_text_idx, next_text_idx - cur_text_idx);
cur_text_idx = next_text_idx;

399
src/HTMLRenderer/draw.cc Normal file
View File

@ -0,0 +1,399 @@
/*
* Draw.cc
*
* Handling path drawing
*
* by WangLu
* 2012.10.01
*/
#include <algorithm>
#include <cmath>
#include <sstream>
#include <vector>
#include <iostream>
#include "HTMLRenderer.h"
#include "util.h"
#include "namespace.h"
namespace pdf2htmlEX {
using std::swap;
using std::min;
using std::max;
using std::acos;
using std::asin;
using std::ostringstream;
using std::sqrt;
using std::vector;
using std::ostream;
static bool is_horizontal_line(GfxSubpath * path)
{
return ((path->getNumPoints() == 2)
&& (!path->getCurve(1))
&& (_equal(path->getY(0), path->getY(1))));
}
static bool is_vertical_line(GfxSubpath * path)
{
return ((path->getNumPoints() == 2)
&& (!path->getCurve(1))
&& (_equal(path->getX(0), path->getX(1))));
}
static bool is_rectangle(GfxSubpath * path)
{
if (!(((path->getNumPoints() != 4) && (path->isClosed()))
|| ((path->getNumPoints() == 5)
&& _equal(path->getX(0), path->getX(4))
&& _equal(path->getY(0), path->getY(4)))))
return false;
for(int i = 1; i < path->getNumPoints(); ++i)
if(path->getCurve(i))
return false;
return (_equal(path->getY(0), path->getY(1))
&& _equal(path->getX(1), path->getX(2))
&& _equal(path->getY(2), path->getY(3))
&& _equal(path->getX(3), path->getX(0)))
|| (_equal(path->getX(0), path->getX(1))
&& _equal(path->getY(1), path->getY(2))
&& _equal(path->getX(2), path->getX(3))
&& _equal(path->getY(3), path->getY(0)));
}
static void get_shading_bbox(GfxState * state, GfxShading * shading,
double & x1, double & y1, double & x2, double & y2)
{
// from SplashOutputDev.cc in poppler
if(shading->getHasBBox())
{
shading->getBBox(&x1, &y1, &x2, &y2);
}
else
{
state->getClipBBox(&x1, &y1, &x2, &y2);
Matrix ctm, ictm;
state->getCTM(&ctm);
ctm.invertTo(&ictm);
double x[4], y[4];
ictm.transform(x1, y1, &x[0], &y[0]);
ictm.transform(x2, y1, &x[1], &y[1]);
ictm.transform(x1, y2, &x[2], &y[2]);
ictm.transform(x2, y2, &x[3], &y[3]);
x1 = x2 = x[0];
y1 = y2 = y[0];
for(int i = 1; i < 4; ++i)
{
x1 = min<double>(x1, x[i]);
y1 = min<double>(y1, y[i]);
x2 = max<double>(x2, x[i]);
y2 = max<double>(y2, y[i]);
}
}
}
/*
* Note that the coordinate system in HTML and PDF are different
*/
static double get_angle(double dx, double dy)
{
double r = hypot(dx, dy);
/*
* acos always returns [0, pi]
*/
double ang = acos(dx / r);
/*
* for angle below x-axis
*/
if(dy < 0)
ang = -ang;
return ang;
}
class LinearGradient
{
public:
LinearGradient(GfxAxialShading * shading,
double x1, double y1, double x2, double y2);
void dumpto (ostream & out);
static void style_function (void * p, ostream & out)
{
static_cast<LinearGradient*>(p)->dumpto(out);
}
// TODO, add alpha
class ColorStop
{
public:
GfxRGB rgb;
double pos; // [0,1]
};
vector<ColorStop> stops;
double angle;
};
LinearGradient::LinearGradient (GfxAxialShading * shading,
double x1, double y1, double x2, double y2)
{
// coordinate for t = 0 and t = 1
double t0x, t0y, t1x, t1y;
shading->getCoords(&t0x, &t0y, &t1x, &t1y);
angle = get_angle(t1x - t0x, t1y - t0y);
// get the range of t in the box
// from GfxState.cc in poppler
double box_tmin, box_tmax;
{
double idx = t1x - t0x;
double idy = t1y - t0y;
double inv_len = 1.0 / (idx * idx + idy * idy);
idx *= inv_len;
idy *= inv_len;
// t of (x1,y1)
box_tmin = box_tmax = (x1 - t0x) * idx + (y1 - t0y) * idy;
double tdx = (x2 - x1) * idx;
if(tdx < 0)
box_tmin += tdx;
else
box_tmax += tdx;
double tdy = (y2 - y1) * idy;
if(tdy < 0)
box_tmin += tdy;
else
box_tmax += tdy;
}
// get the domain of t in the box
double domain_tmin = max<double>(box_tmin, shading->getDomain0());
double domain_tmax = min<double>(box_tmax, shading->getDomain1());
// TODO: better sampling
// TODO: check background color
{
stops.clear();
double tstep = (domain_tmax - domain_tmin) / 13.0;
for(double t = domain_tmin; t <= domain_tmax; t += tstep)
{
GfxColor color;
shading->getColor(t, &color);
ColorStop stop;
shading->getColorSpace()->getRGB(&color, &stop.rgb);
stop.pos = (t - box_tmin) / (box_tmax - box_tmin);
stops.push_back(stop);
}
}
}
void LinearGradient::dumpto (ostream & out)
{
auto prefixes = {"", "-ms-", "-moz-", "-webkit-", "-o-"};
for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter)
{
out << "background-image:" << (*iter) << "linear-gradient(" << _round(angle) << "rad";
for(auto iter2 = stops.begin(); iter2 != stops.end(); ++iter2)
{
out << "," << (iter2->rgb) << " " << _round((iter2->pos) * 100) << "%";
}
out << ");";
}
}
GBool HTMLRenderer::axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax)
{
if(!(param->css_draw)) return gFalse;
double x1, y1, x2, y2;
get_shading_bbox(state, shading, x1, y1, x2, y2);
LinearGradient lg(shading, x1, y1, x2, y2);
// TODO: check background color
css_draw_rectangle(x1, y1, x2-x1, y2-y1, state->getCTM(),
nullptr, 0,
nullptr, nullptr,
LinearGradient::style_function, &lg);
return gTrue;
}
//TODO track state
//TODO connection style
void HTMLRenderer::css_draw(GfxState *state, bool fill)
{
if(!(param->css_draw)) return;
GfxPath * path = state->getPath();
for(int i = 0; i < path->getNumSubpaths(); ++i)
{
GfxSubpath * subpath = path->getSubpath(i);
if(is_horizontal_line(subpath))
{
double x1 = subpath->getX(0);
double x2 = subpath->getX(1);
double y = subpath->getY(0);
if(x1 > x2) swap(x1, x2);
GfxRGB stroke_color;
state->getStrokeRGB(&stroke_color);
double lw = state->getLineWidth();
css_draw_rectangle(x1, y - lw/2, x2-x1, lw, state->getCTM(),
nullptr, 0,
nullptr, &stroke_color);
}
else if(is_vertical_line(subpath))
{
double x = subpath->getX(0);
double y1 = subpath->getY(0);
double y2 = subpath->getY(1);
if(y1 > y2) swap(y1, y2);
GfxRGB stroke_color;
state->getStrokeRGB(&stroke_color);
double lw = state->getLineWidth();
css_draw_rectangle(x-lw/2, y1, lw, y2-y1, state->getCTM(),
nullptr, 0,
nullptr, &stroke_color);
}
else if(is_rectangle(subpath))
{
close_text_line();
double x1 = subpath->getX(0);
double x2 = subpath->getX(2);
double y1 = subpath->getY(0);
double y2 = subpath->getY(2);
if(x1 > x2) swap(x1, x2);
if(y1 > y2) swap(y1, y2);
double x,y,w,h,lw[2];
css_fix_rectangle_border_width(x1, y1, x2, y2, (fill ? 0.0 : state->getLineWidth()),
x,y,w,h,lw[0],lw[1]);
GfxRGB stroke_color;
if(!fill) state->getStrokeRGB(&stroke_color);
GfxRGB fill_color;
if(fill) state->getFillRGB(&fill_color);
int lw_count = 2;
GfxRGB * ps = fill ? nullptr : (&stroke_color);
GfxRGB * pf = fill ? (&fill_color) : nullptr;
if(_equal(h, 0) || _equal(w, 0))
{
// orthogonal line
// TODO: check length
pf = ps;
ps = nullptr;
h += lw[0];
w += lw[1];
}
css_draw_rectangle(x, y, w, h, state->getCTM(),
lw, lw_count,
ps, pf);
}
}
}
void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, const double * tm,
double * line_width_array, int line_width_count,
const GfxRGB * line_color, const GfxRGB * fill_color,
void (*style_function)(void *, ostream &), void * style_function_data)
{
close_text_line();
double new_tm[6];
memcpy(new_tm, tm, sizeof(new_tm));
_tm_transform(new_tm, x, y);
double scale = 1.0;
{
static const double sqrt2 = sqrt(2.0);
double i1 = (new_tm[0] + new_tm[2]) / sqrt2;
double i2 = (new_tm[1] + new_tm[3]) / sqrt2;
scale = hypot(i1, i2);
if(_is_positive(scale))
{
for(int i = 0; i < 4; ++i)
new_tm[i] /= scale;
}
else
{
scale = 1.0;
}
}
html_fout << "<div class=\"Cd t" << install_transform_matrix(new_tm) << "\" style=\"";
if(line_color)
{
html_fout << "border-color:" << *line_color << ";";
html_fout << "border-width:";
for(int i = 0; i < line_width_count; ++i)
{
if(i > 0) html_fout << ' ';
double lw = line_width_array[i] * scale;
html_fout << _round(lw);
if(_is_positive(lw)) html_fout << "px";
}
html_fout << ";";
}
else
{
html_fout << "border:none;";
}
if(fill_color)
{
html_fout << "background-color:" << (*fill_color) << ";";
}
else
{
html_fout << "background-color:transparent;";
}
if(style_function)
{
style_function(style_function_data, html_fout);
}
html_fout << "bottom:" << _round(y) << "px;"
<< "left:" << _round(x) << "px;"
<< "width:" << _round(w * scale) << "px;"
<< "height:" << _round(h * scale) << "px;";
html_fout << "\"></div>";
}
} // namespace pdf2htmlEX

View File

@ -141,9 +141,7 @@ void HTMLRenderer::export_word_space (long long ws_id, double word_space)
void HTMLRenderer::export_color (long long color_id, const GfxRGB * rgb)
{
css_fout << ".c" << color_id << "{color:rgb("
<< dec << (int)colToByte(rgb->r) << "," << (int)colToByte(rgb->g) << "," << (int)colToByte(rgb->b) << ");}" << hex
<< endl;
css_fout << ".c" << color_id << "{color:" << (*rgb) << ";}" << endl;
}
void HTMLRenderer::export_whitespace (long long ws_id, double ws_width)

View File

@ -3,7 +3,7 @@
*
* Handling general stuffs
*
* by WangLu
* Copyright (C) 2012 Lu Wang <coolwanglu@gmail.com>
* 2012.08.14
*/
@ -13,8 +13,6 @@
#include <algorithm>
#include <vector>
#include <splash/SplashBitmap.h>
#include "HTMLRenderer.h"
#include "BackgroundRenderer.h"
#include "namespace.h"
@ -64,10 +62,6 @@ HTMLRenderer::~HTMLRenderer()
delete [] width_list;
}
static GBool annot_cb(Annot *, void *) {
return false;
};
void HTMLRenderer::process(PDFDoc *doc)
{
cur_doc = doc;
@ -84,17 +78,17 @@ void HTMLRenderer::process(PDFDoc *doc)
vector<double> zoom_factors;
if(abs(param->zoom) > EPS)
if(_is_positive(param->zoom))
{
zoom_factors.push_back(param->zoom);
}
if(abs(param->fit_width) > EPS)
if(_is_positive(param->fit_width))
{
zoom_factors.push_back((param->fit_width) / preprocessor.get_max_width());
}
if(abs(param->fit_height) > EPS)
if(_is_positive(param->fit_height))
{
zoom_factors.push_back((param->fit_height) / preprocessor.get_max_height());
}
@ -108,8 +102,8 @@ void HTMLRenderer::process(PDFDoc *doc)
zoom = *min_element(zoom_factors.begin(), zoom_factors.end());
}
scale_factor1 = max(zoom, param->font_size_multiplier);
scale_factor2 = zoom / scale_factor1;
text_scale_factor1 = max<double>(zoom, param->font_size_multiplier);
text_scale_factor2 = zoom / text_scale_factor1;
}
@ -117,12 +111,7 @@ void HTMLRenderer::process(PDFDoc *doc)
BackgroundRenderer * bg_renderer = nullptr;
if(param->process_nontext)
{
// Render non-text objects as image
// copied from poppler
SplashColor color;
color[0] = color[1] = color[2] = 255;
bg_renderer = new BackgroundRenderer(splashModeRGB8, 4, gFalse, color);
bg_renderer = new BackgroundRenderer(param);
bg_renderer->startDoc(doc);
}
@ -141,22 +130,15 @@ void HTMLRenderer::process(PDFDoc *doc)
if(param->process_nontext)
{
doc->displayPage(bg_renderer, i, param->h_dpi, param->v_dpi,
0, true, false, false,
nullptr, nullptr, &annot_cb, nullptr);
auto fn = str_fmt("%s/p%x.png", (param->single_html ? param->tmp_dir : param->dest_dir).c_str(), i);
if(param->single_html)
add_tmp_file((char*)fn);
{
auto fn = str_fmt("%s/p%x.png", (param->single_html ? param->tmp_dir : param->dest_dir).c_str(), i);
if(param->single_html)
add_tmp_file((char*)fn);
bg_renderer->getBitmap()->writeImgFile(splashFormatPng,
(char*)fn,
param->h_dpi, param->v_dpi);
}
bg_renderer->render_page(doc, i, (char*)fn);
}
doc->displayPage(this, i, zoom_factor() * DEFAULT_DPI, zoom_factor() * DEFAULT_DPI,
doc->displayPage(this, i,
text_zoom_factor() * DEFAULT_DPI, text_zoom_factor() * DEFAULT_DPI,
0, true, false, false,
nullptr, nullptr, nullptr, nullptr);
@ -219,15 +201,15 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state)
}
html_fout << "\">";
draw_scale = 1.0;
draw_text_scale = 1.0;
cur_font_info = install_font(nullptr);
cur_font_size = draw_font_size = 0;
cur_fs_id = install_font_size(cur_font_size);
memcpy(cur_ctm, id_matrix, sizeof(cur_ctm));
memcpy(draw_ctm, id_matrix, sizeof(draw_ctm));
cur_tm_id = install_transform_matrix(draw_ctm);
memcpy(cur_text_tm, id_matrix, sizeof(cur_text_tm));
memcpy(draw_text_tm, id_matrix, sizeof(draw_text_tm));
cur_ttm_id = install_transform_matrix(draw_text_tm);
cur_letter_space = cur_word_space = 0;
cur_ls_id = install_letter_space(cur_letter_space);
@ -247,7 +229,7 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state)
}
void HTMLRenderer::endPage() {
close_line();
close_text_line();
// process links before the page is closed
cur_doc->processLinks(this, pageNum);
@ -404,7 +386,9 @@ void HTMLRenderer::post_process()
void HTMLRenderer::fix_stream (std::ostream & out)
{
out << hex;
// we output all ID's in hex
// browsers are not happy with scientific notations
out << hex << fixed;
}
void HTMLRenderer::add_tmp_file(const string & fn)

View File

@ -215,9 +215,11 @@ long long HTMLRenderer::install_font_size(double font_size)
long long HTMLRenderer::install_transform_matrix(const double * tm)
{
TM m(tm);
Matrix m;
memcpy(m.m, tm, sizeof(m.m));
auto iter = transform_matrix_map.lower_bound(m);
if((iter != transform_matrix_map.end()) && (m == (iter->first)))
if((iter != transform_matrix_map.end()) && (_tm_equal(m.m, iter->first.m, 4)))
return iter->second;
long long new_tm_id = transform_matrix_map.size();

View File

@ -22,62 +22,6 @@ using std::ostringstream;
using std::min;
using std::max;
static void _transform(const double * ctm, double & x, double & y)
{
double xx = x, yy = y;
x = ctm[0] * xx + ctm[2] * yy + ctm[4];
y = ctm[1] * xx + ctm[3] * yy + ctm[5];
}
static void _get_transformed_rect(AnnotLink * link, const double * ctm, double & x1, double & y1, double & x2, double & y2)
{
double _x1, _x2, _y1, _y2;
link->getRect(&_x1, &_y1, &_x2, &_y2);
_transform(ctm, _x1, _y1);
_transform(ctm, _x2, _y2);
x1 = min(_x1, _x2);
x2 = max(_x1, _x2);
y1 = min(_y1, _y2);
y2 = max(_y1, _y2);
}
/*
* In PDF, edges of the rectangle are in the middle of the borders
* In HTML, edges are completely outside the rectangle
*/
static void _fix_border_width(double & x1, double & y1, double & x2, double & y2,
double border_width, double & border_top_bottom_width, double & border_left_right_width)
{
double w = x2 - x1;
if(w > border_width)
{
x1 += border_width / 2;
x2 -= border_width / 2;
border_left_right_width = border_width;
}
else
{
x1 += w / 2;
x2 -= w / 2;
border_left_right_width = border_width + w/2;
}
double h = y2 - y1;
if(h > border_width)
{
y1 += border_width / 2;
y2 -= border_width / 2;
border_top_bottom_width = border_width;
}
else
{
y1 += h / 2;
y2 -= h / 2;
border_top_bottom_width = border_width + h/2;
}
}
/*
* The detailed rectangle area of the link destination
* Will be parsed and performed by Javascript
@ -164,6 +108,7 @@ static string get_dest_detail_str(int pageno, LinkDest * dest)
/*
* Based on pdftohtml from poppler
* TODO: CSS for link rectangles
* TODO: share rectangle draw with css-draw
*/
void HTMLRenderer::processLink(AnnotLink * al)
{
@ -239,10 +184,17 @@ void HTMLRenderer::processLink(AnnotLink * al)
html_fout << ">";
}
html_fout << "<div style=\"";
html_fout << "<div class=\"Cd t"
<< install_transform_matrix(default_ctm)
<< "\" style=\"";
double x,y,w,h;
double x1, y1, x2, y2;
_get_transformed_rect(al, default_ctm, x1, y1, x2, y2);
al->getRect(&x1, &y1, &x2, &y2);
x = min<double>(x1, x2);
y = min<double>(y1, y2);
w = max<double>(x1, x2) - x;
h = max<double>(y1, y2) - y;
double border_width = 0;
double border_top_bottom_width = 0;
@ -250,12 +202,14 @@ void HTMLRenderer::processLink(AnnotLink * al)
auto * border = al->getBorder();
if(border)
{
border_width = border->getWidth() * zoom_factor();
border_width = border->getWidth();
if(border_width > 0)
{
{
_fix_border_width(x1, y1, x2, y1,
border_width, border_top_bottom_width, border_left_right_width);
css_fix_rectangle_border_width(x1, y1, x2, y2, border_width,
x, y, w, h,
border_top_bottom_width, border_left_right_width);
if(abs(border_top_bottom_width - border_left_right_width) < EPS)
html_fout << "border-width:" << _round(border_top_bottom_width) << "px;";
else
@ -313,12 +267,13 @@ void HTMLRenderer::processLink(AnnotLink * al)
html_fout << "border-style:none;";
}
_tm_transform(default_ctm, x, y);
html_fout << "position:absolute;"
<< "left:" << _round(x1- border_left_right_width) << "px;"
<< "bottom:" << _round(y1 - border_top_bottom_width) << "px;"
<< "width:" << _round(x2-x1) << "px;"
<< "height:" << _round(y2-y1) << "px;";
<< "left:" << _round(x) << "px;"
<< "bottom:" << _round(y) << "px;"
<< "width:" << _round(w) << "px;"
<< "height:" << _round(h) << "px;";
// fix for IE
html_fout << "background-color:rgba(255,255,255,0.000001);";

View File

@ -12,6 +12,7 @@
* optimize lines using nested <span> (reuse classes)
*/
#include <cmath>
#include <algorithm>
#include "HTMLRenderer.h"
@ -22,6 +23,7 @@ namespace pdf2htmlEX {
using std::max;
using std::abs;
using std::hypot;
void HTMLRenderer::updateAll(GfxState * state)
{
@ -82,7 +84,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
bool need_recheck_position = false;
bool need_rescale_font = false;
bool draw_scale_changed = false;
bool draw_text_scale_changed = false;
// text position
// we've been tracking the text position positively in the update*** functions
@ -98,7 +100,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
if(!(new_font_info->id == cur_font_info->id))
{
new_line_state = max(new_line_state, NLS_SPAN);
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
cur_font_info = new_font_info;
}
@ -112,7 +114,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
// backup the current ctm for need_recheck_position
double old_ctm[6];
memcpy(old_ctm, cur_ctm, sizeof(old_ctm));
memcpy(old_ctm, cur_text_tm, sizeof(old_ctm));
// ctm & text ctm & hori scale
if(all_changed || ctm_changed || text_mat_changed || hori_scale_changed)
@ -131,52 +133,52 @@ void HTMLRenderer::check_state_change(GfxState * state)
new_ctm[5] = m1[1] * m2[4] + m1[3] * m2[5] + m1[5];
//new_ctm[4] = new_ctm[5] = 0;
if(!_tm_equal(new_ctm, cur_ctm))
if(!_tm_equal(new_ctm, cur_text_tm))
{
need_recheck_position = true;
need_rescale_font = true;
memcpy(cur_ctm, new_ctm, sizeof(cur_ctm));
memcpy(cur_text_tm, new_ctm, sizeof(cur_text_tm));
}
}
// draw_ctm, draw_scale
// draw_text_tm, draw_text_scale
// depends: font size & ctm & text_ctm & hori scale
if(need_rescale_font)
{
double new_draw_ctm[6];
memcpy(new_draw_ctm, cur_ctm, sizeof(new_draw_ctm));
double new_draw_text_tm[6];
memcpy(new_draw_text_tm, cur_text_tm, sizeof(new_draw_text_tm));
double new_draw_scale = 1.0/scale_factor2 * sqrt(new_draw_ctm[2] * new_draw_ctm[2] + new_draw_ctm[3] * new_draw_ctm[3]);
double new_draw_text_scale = 1.0/text_scale_factor2 * hypot(new_draw_text_tm[2], new_draw_text_tm[3]);
double new_draw_font_size = cur_font_size;
if(_is_positive(new_draw_scale))
if(_is_positive(new_draw_text_scale))
{
new_draw_font_size *= new_draw_scale;
new_draw_font_size *= new_draw_text_scale;
for(int i = 0; i < 4; ++i)
new_draw_ctm[i] /= new_draw_scale;
new_draw_text_tm[i] /= new_draw_text_scale;
}
else
{
new_draw_scale = 1.0;
new_draw_text_scale = 1.0;
}
if(!(_equal(new_draw_scale, draw_scale)))
if(!(_equal(new_draw_text_scale, draw_text_scale)))
{
draw_scale_changed = true;
draw_scale = new_draw_scale;
draw_text_scale_changed = true;
draw_text_scale = new_draw_text_scale;
}
if(!(_equal(new_draw_font_size, draw_font_size)))
{
new_line_state = max(new_line_state, NLS_SPAN);
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
draw_font_size = new_draw_font_size;
cur_fs_id = install_font_size(draw_font_size);
}
if(!(_tm_equal(new_draw_ctm, draw_ctm, 4)))
if(!(_tm_equal(new_draw_text_tm, draw_text_tm, 4)))
{
new_line_state = max(new_line_state, NLS_DIV);
memcpy(draw_ctm, new_draw_ctm, sizeof(draw_ctm));
cur_tm_id = install_transform_matrix(draw_ctm);
new_line_state = max<NewLineState>(new_line_state, NLS_DIV);
memcpy(draw_text_tm, new_draw_text_tm, sizeof(draw_text_tm));
cur_ttm_id = install_transform_matrix(draw_text_tm);
}
}
@ -198,29 +200,29 @@ void HTMLRenderer::check_state_change(GfxState * state)
*/
bool merged = false;
if(_tm_equal(old_ctm, cur_ctm, 4))
if(_tm_equal(old_ctm, cur_text_tm, 4))
{
double dy = cur_ty - draw_ty;
double tdx = old_ctm[4] - cur_ctm[4] - cur_ctm[2] * dy;
double tdy = old_ctm[5] - cur_ctm[5] - cur_ctm[3] * dy;
double tdx = old_ctm[4] - cur_text_tm[4] - cur_text_tm[2] * dy;
double tdy = old_ctm[5] - cur_text_tm[5] - cur_text_tm[3] * dy;
if(_equal(cur_ctm[0] * tdy, cur_ctm[1] * tdx))
if(_equal(cur_text_tm[0] * tdy, cur_text_tm[1] * tdx))
{
if(abs(cur_ctm[0]) > EPS)
if(_is_positive(cur_text_tm[0]))
{
draw_tx += tdx / cur_ctm[0];
draw_tx += tdx / cur_text_tm[0];
draw_ty += dy;
merged = true;
}
else if (abs(cur_ctm[1]) > EPS)
else if (_is_positive(cur_text_tm[1]))
{
draw_tx += tdy / cur_ctm[1];
draw_tx += tdy / cur_text_tm[1];
draw_ty += dy;
merged = true;
}
else
{
if((abs(tdx) < EPS) && (abs(tdy) < EPS))
if((_equal(tdx,0)) && (_equal(tdy,0)))
{
// free
draw_tx = cur_tx;
@ -236,33 +238,33 @@ void HTMLRenderer::check_state_change(GfxState * state)
if(!merged)
{
new_line_state = max(new_line_state, NLS_DIV);
new_line_state = max<NewLineState>(new_line_state, NLS_DIV);
}
}
// letter space
// depends: draw_scale
if(all_changed || letter_space_changed || draw_scale_changed)
// depends: draw_text_scale
if(all_changed || letter_space_changed || draw_text_scale_changed)
{
double new_letter_space = state->getCharSpace();
if(!_equal(cur_letter_space, new_letter_space))
{
new_line_state = max(new_line_state, NLS_SPAN);
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
cur_letter_space = new_letter_space;
cur_ls_id = install_letter_space(cur_letter_space * draw_scale);
cur_ls_id = install_letter_space(cur_letter_space * draw_text_scale);
}
}
// word space
// depends draw_scale
if(all_changed || word_space_changed || draw_scale_changed)
// depends draw_text_scale
if(all_changed || word_space_changed || draw_text_scale_changed)
{
double new_word_space = state->getWordSpace();
if(!_equal(cur_word_space, new_word_space))
{
new_line_state = max(new_line_state, NLS_SPAN);
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
cur_word_space = new_word_space;
cur_ws_id = install_word_space(cur_word_space * draw_scale);
cur_ws_id = install_word_space(cur_word_space * draw_text_scale);
}
}
@ -273,22 +275,22 @@ void HTMLRenderer::check_state_change(GfxState * state)
state->getFillRGB(&new_color);
if(!((new_color.r == cur_color.r) && (new_color.g == cur_color.g) && (new_color.b == cur_color.b)))
{
new_line_state = max(new_line_state, NLS_SPAN);
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
cur_color = new_color;
cur_color_id = install_color(&new_color);
}
}
// rise
// depends draw_scale
if(all_changed || rise_changed || draw_scale_changed)
// depends draw_text_scale
if(all_changed || rise_changed || draw_text_scale_changed)
{
double new_rise = state->getRise();
if(!_equal(cur_rise, new_rise))
{
new_line_state = max(new_line_state, NLS_SPAN);
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
cur_rise = new_rise;
cur_rise_id = install_rise(new_rise * draw_scale);
cur_rise_id = install_rise(new_rise * draw_text_scale);
}
}
@ -312,7 +314,7 @@ void HTMLRenderer::reset_state_change()
color_changed = false;
}
void HTMLRenderer::prepare_line(GfxState * state)
void HTMLRenderer::prepare_text_line(GfxState * state)
{
if(!line_opened)
{
@ -321,7 +323,7 @@ void HTMLRenderer::prepare_line(GfxState * state)
if(new_line_state == NLS_DIV)
{
close_line();
close_text_line();
line_buf.reset(state);
@ -333,7 +335,7 @@ void HTMLRenderer::prepare_line(GfxState * state)
{
// align horizontal position
// try to merge with the last line if possible
double target = (cur_tx - draw_tx) * draw_scale;
double target = (cur_tx - draw_tx) * draw_text_scale;
if(abs(target) < param->h_eps)
{
// ignore it
@ -341,7 +343,7 @@ void HTMLRenderer::prepare_line(GfxState * state)
else
{
line_buf.append_offset(target);
draw_tx += target / draw_scale;
draw_tx += target / draw_text_scale;
}
}
@ -353,7 +355,7 @@ void HTMLRenderer::prepare_line(GfxState * state)
line_opened = true;
}
void HTMLRenderer::close_line()
void HTMLRenderer::close_text_line()
{
if(line_opened)
{

View File

@ -310,7 +310,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
memset(width_list, -1, 0x10000 * sizeof(*width_list));
if(code2GID)
maxcode = min(maxcode, code2GID_len - 1);
maxcode = min<int>(maxcode, code2GID_len - 1);
bool is_truetype = is_truetype_suffix(suffix);
int max_key = maxcode;
@ -484,7 +484,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
// see if the line has to be closed due to state change
check_state_change(state);
prepare_line(state);
prepare_text_line(state);
// Now ready to output
// get the unicodes
@ -522,7 +522,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
if(is_space && (param->space_as_offset))
{
// ignore horiz_scaling, as it's merged in CTM
line_buf.append_offset((dx1 * cur_font_size + cur_letter_space + cur_word_space) * draw_scale);
line_buf.append_offset((dx1 * cur_font_size + cur_letter_space + cur_word_space) * draw_text_scale);
}
else
{

View File

@ -82,8 +82,8 @@ void Preprocessor::drawChar(GfxState *state, double x, double y,
void Preprocessor::startPage(int pageNum, GfxState *state)
{
max_width = max(max_width, state->getPageWidth());
max_height = max(max_height, state->getPageHeight());
max_width = max<double>(max_width, state->getPageWidth());
max_height = max<double>(max_height, state->getPageHeight());
}
const char * Preprocessor::get_code_map (long long font_id) const

View File

@ -0,0 +1,40 @@
/*
* SplashBackgroundRenderer.cc
*
* Copyright (C) 2012 Lu Wang <coolwanglu@gmail.com>
*/
#include <PDFDoc.h>
#include "SplashBackgroundRenderer.h"
namespace pdf2htmlEX {
using std::string;
const SplashColor SplashBackgroundRenderer::white = {255,255,255};
void SplashBackgroundRenderer::drawChar(GfxState *state, double x, double y,
double dx, double dy,
double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen)
{
// SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen);
}
static GBool annot_cb(Annot *, void *) {
return false;
};
void SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno, const string & filename)
{
doc->displayPage(this, pageno, param->h_dpi, param->v_dpi,
0, true, false, false,
nullptr, nullptr, &annot_cb, nullptr);
getBitmap()->writeImgFile(splashFormatPng,
(char*)filename.c_str(),
param->h_dpi, param->v_dpi);
}
} // namespace pdf2htmlEX

View File

@ -332,15 +332,11 @@ void ffw_set_widths(int * width_list, int mapping_len, int stretch_narrow, int s
SplineChar * sc = sf->glyphs[j];
if(sc == NULL) continue;
DBounds bb;
SplineCharFindBounds(sc, &bb);
double glyph_width = bb.maxx - bb.minx;
if((glyph_width > EPS)
&& (((glyph_width > width_list[i] + EPS) && (squeeze_wide))
|| ((glyph_width < width_list[i] - EPS) && (stretch_narrow))))
if(((sc->width > EPS)
&& (((sc->width > width_list[i] + EPS) && (squeeze_wide))
|| ((sc->width < width_list[i] - EPS) && (stretch_narrow)))))
{
real transform[6]; transform[0] = ((double)width_list[i]) / glyph_width;
real transform[6]; transform[0] = ((double)width_list[i]) / (sc->width);
transform[3] = 1.0;
transform[1] = transform[2] = transform[4] = transform[5] = 0;
FVTrans(cur_fv, sc, transform, NULL, fvt_alllayers | fvt_dontmovewidth);

View File

@ -2,37 +2,31 @@
* Background renderer
* Render all those things not supported as Image
*
* by WangLu
* 2012.08.06
* Copyright (C) 2012 Lu Wang <coolwanglu@gmail.com>
*/
#ifndef BACKGROUND_RENDERER_H__
#define BACKGROUND_RENDERER_H__
#include <SplashOutputDev.h>
#include "pdf2htmlEX-config.h"
#if HAVE_CAIRO
#include "CairoBackgroundRenderer.h"
namespace pdf2htmlEX {
// Based on BackgroundRenderer from poppler
class BackgroundRenderer : public SplashOutputDev {
public:
BackgroundRenderer(SplashColorMode colorModeA, int bitmapRowPadA,
GBool reverseVideoA, SplashColorPtr paperColorA,
GBool bitmapTopDownA = gTrue,
GBool allowAntialiasA = gTrue)
: SplashOutputDev(colorModeA,
bitmapRowPadA, reverseVideoA, paperColorA, bitmapTopDownA,
allowAntialiasA)
{ }
virtual ~BackgroundRenderer() { }
virtual void drawChar(GfxState *state, double x, double y,
double dx, double dy,
double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen);
};
typedef CairoBackgroundRenderer BackgroundRenderer;
}
#else
#include "SplashBackgroundRenderer.h"
namespace pdf2htmlEX {
typedef SplashBackgroundRenderer BackgroundRenderer;
}
#endif // HAVE_CAIRO
#endif //BACKGROUND_RENDERER_H__

View File

@ -0,0 +1,42 @@
/*
* Cairo Background renderer
* Render all those things not supported as Image, with Cairo
*
* Copyright (C) 2012 Lu Wang <coolwanglu@gmail.com>
*/
#ifndef CAIRO_BACKGROUND_RENDERER_H__
#define CAIRO_BACKGROUND_RENDERER_H__
#include <CairoOutputDev.h>
#include "Param.h"
namespace pdf2htmlEX {
// Based on BackgroundRenderer from poppler
class CairoBackgroundRenderer : public CairoOutputDev
{
public:
CairoBackgroundRenderer(const Param * param)
:CairoOutputDev()
, param(param)
{ }
virtual ~CairoBackgroundRenderer() { }
virtual void drawChar(GfxState *state, double x, double y,
double dx, double dy,
double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen);
void render_page(PDFDoc * doc, int pageno, const std::string & filename);
protected:
const Param * param;
};
}
#endif //CAIRO_BACKGROUND_RENDERER_H__

View File

@ -42,6 +42,8 @@
* j - Js data
* p - Page
*
* Cd - CSS Draw
*
* Reusable CSS classes
*
* t<hex> - Transform matrix
@ -77,12 +79,18 @@ class HTMLRenderer : public OutputDev
// Does this device use drawChar() or drawString()?
virtual GBool useDrawChar() { return gFalse; }
// Does this device use functionShadedFill(), axialShadedFill(), and
// radialShadedFill()? If this returns false, these shaded fills
// will be reduced to a series of other drawing operations.
virtual GBool useShadedFills(int type) { return type == 2; }
// Does this device use beginType3Char/endType3Char? Otherwise,
// text in Type 3 fonts will be drawn with drawChar/drawString.
virtual GBool interpretType3Chars() { return gFalse; }
// Does this device need non-text content?
virtual GBool needNonText() { return gFalse; }
virtual GBool needNonText() { return gTrue; }
virtual void setDefaultCTM(double *ctm);
@ -121,6 +129,10 @@ class HTMLRenderer : public OutputDev
virtual void drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg);
virtual void stroke(GfxState *state) { css_draw(state, false); }
virtual void fill(GfxState *state) { css_draw(state, true); }
virtual GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax);
virtual void processLink(AnnotLink * al);
protected:
@ -190,8 +202,25 @@ class HTMLRenderer : public OutputDev
void reset_state_change();
// prepare the line context, (close old tags, open new tags)
// make sure the current HTML style consistent with PDF
void prepare_line(GfxState * state);
void close_line();
void prepare_text_line(GfxState * state);
void close_text_line();
////////////////////////////////////////////////////
// CSS drawing
////////////////////////////////////////////////////
void css_draw(GfxState *state, bool fill);
/*
* coordinates are to transformed by state->getCTM()
* (x,y) should be the bottom-left corner INCLUDING border
* w,h should be the metrics WITHOUT border
*
* line_color & fill_color may be specified as nullptr to indicate none
* style_function & style_function_data may be provided to provide more styles
*/
void css_draw_rectangle(double x, double y, double w, double h, const double * tm,
double * line_width_array, int line_width_count,
const GfxRGB * line_color, const GfxRGB * fill_color,
void (*style_function)(void *, std::ostream &) = nullptr, void * style_function_data = nullptr );
////////////////////////////////////////////////////
@ -216,9 +245,9 @@ class HTMLRenderer : public OutputDev
* factor1 & factor 2 are determined according to zoom and font-size-multiplier
*
*/
double zoom_factor (void) const { return scale_factor1 * scale_factor2; }
double scale_factor1;
double scale_factor2;
double text_zoom_factor (void) const { return text_scale_factor1 * text_scale_factor2; }
double text_scale_factor1;
double text_scale_factor2;
////////////////////////////////////////////////////
@ -246,16 +275,15 @@ class HTMLRenderer : public OutputDev
bool font_changed;
// transform matrix
long long cur_tm_id;
long long cur_ttm_id;
bool ctm_changed;
bool text_mat_changed;
// horizontal scaling
bool hori_scale_changed;
// this is CTM * TextMAT in PDF, not only CTM
// this is CTM * TextMAT in PDF
// [4] and [5] are ignored,
// as we'll calculate the position of the origin separately
// TODO: changed this for images
double cur_ctm[6]; // unscaled
double cur_text_tm[6]; // unscaled
// letter spacing
long long cur_ls_id;
@ -281,11 +309,11 @@ class HTMLRenderer : public OutputDev
// we try to render the final font size directly
// to reduce the effect of ctm as much as possible
// draw_ctm is cur_ctm scaled by 1/draw_scale,
// so everything redenered should be multiplied by draw_scale
double draw_ctm[6];
// draw_ctm is cur_ctm scaled by 1/draw_text_scale,
// so everything redenered should be multiplied by draw_text_scale
double draw_text_tm[6];
double draw_font_size;
double draw_scale;
double draw_text_scale;
// the position of next char, in text coords
// this is actual position (in HTML), which might be different from cur_tx/ty (in PDF)
@ -376,7 +404,7 @@ class HTMLRenderer : public OutputDev
std::unordered_map<long long, FontInfo> font_name_map;
std::map<double, long long> font_size_map;
std::map<TM, long long> transform_matrix_map;
std::map<Matrix, long long, Matrix_less> transform_matrix_map;
std::map<double, long long> letter_space_map;
std::map<double, long long> word_space_map;
std::unordered_map<GfxRGB, long long, GfxRGB_hash, GfxRGB_equal> color_map;

View File

@ -55,6 +55,9 @@ struct Param
int debug;
int clean_tmp;
// experimental
int css_draw;
};
} // namespace pdf2htmlEX

View File

@ -0,0 +1,48 @@
/*
* Splash Background renderer
* Render all those things not supported as Image, with Splash
*
* by WangLu
* 2012.08.06
*/
#ifndef SPLASH_BACKGROUND_RENDERER_H__
#define SPLASH_BACKGROUND_RENDERER_H__
#include <string>
#include <splash/SplashBitmap.h>
#include <SplashOutputDev.h>
#include "Param.h"
namespace pdf2htmlEX {
// Based on BackgroundRenderer from poppler
class SplashBackgroundRenderer : public SplashOutputDev
{
public:
static const SplashColor white;
SplashBackgroundRenderer(const Param * param)
: SplashOutputDev(splashModeRGB8, 4, gFalse, (SplashColorPtr)&white, gTrue, gTrue)
, param(param)
{ }
virtual ~SplashBackgroundRenderer() { }
virtual void drawChar(GfxState *state, double x, double y,
double dx, double dy,
double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen);
void render_page(PDFDoc * doc, int pageno, const std::string & filename);
protected:
const Param * param;
};
} // namespace pdf2htmlEX
#endif // SPLASH_BACKGROUND_RENDERER_H__

View File

@ -0,0 +1,24 @@
/*
* config.h
* Compile time constants
*
* by WangLu
*/
#ifndef PDF2HTMLEX_CONFIG_H__
#define PDF2HTMLEX_CONFIG_H__
#include <string>
#define HAVE_CAIRO 0
namespace pdf2htmlEX {
static const std::string PDF2HTMLEX_VERSION = "0.5";
static const std::string PDF2HTMLEX_PREFIX = "/usr/local";
static const std::string PDF2HTMLEX_DATA_PATH = "/usr/local""/share/pdf2htmlEX";
} // namespace pdf2htmlEX
#endif //PDF2HTMLEX_CONFIG_H__

View File

@ -11,6 +11,8 @@
#include <string>
#define HAVE_CAIRO @HAVE_CAIRO@
namespace pdf2htmlEX {
static const std::string PDF2HTMLEX_VERSION = "@PDF2HTMLEX_VERSION@";

View File

@ -47,6 +47,9 @@ static inline bool _tm_equal(const double * tm1, const double * tm2, int size =
return true;
}
void _tm_transform(const double * tm, double & x, double & y, bool is_delta = false);
void _tm_multiply(double * tm_left, const double * tm_right);
static inline long long hash_ref(const Ref * id)
{
return (((long long)(id->num)) << (sizeof(id->gen)*8)) | (id->gen);
@ -102,28 +105,21 @@ public:
bool has_space; // whether space is included in the font
};
// wrapper of the transform matrix double[6]
// Transform Matrix
class TM
class Matrix_less
{
public:
TM() {}
TM(const double * m) {memcpy(_, m, sizeof(_));}
bool operator < (const TM & m) const {
bool operator () (const Matrix & m1, const Matrix & m2) const
{
// Note that we only care about the first 4 elements
for(int i = 0; i < 4; ++i)
{
if(_[i] < m._[i] - EPS)
if(m1.m[i] < m2.m[i] - EPS)
return true;
if(_[i] > m._[i] + EPS)
if(m1.m[i] > m2.m[i] + EPS)
return false;
}
return false;
}
bool operator == (const TM & m) const {
return _tm_equal(_, m._, 4);
}
double _[6];
};
class base64stream
@ -201,7 +197,7 @@ public:
va_end(vlist);
if(l >= (int)buf.capacity())
{
buf.reserve(std::max((long)(l+1), (long)buf.capacity() * 2));
buf.reserve(std::max<long>((long)(l+1), (long)buf.capacity() * 2));
va_start(vlist, format);
l = vsnprintf(&buf.front(), buf.capacity(), format, vlist);
va_end(vlist);
@ -223,5 +219,17 @@ bool is_truetype_suffix(const std::string & suffix);
std::string get_filename(const std::string & path);
std::string get_suffix(const std::string & path);
/*
* In PDF, edges of the rectangle are in the middle of the borders
* In HTML, edges are completely outside the rectangle
*/
void css_fix_rectangle_border_width(double x1, double y1, double x2, double y2,
double border_width,
double & x, double & y, double & w, double & h,
double & border_top_bottom_width,
double & border_left_right_width);
std::ostream & operator << (std::ostream & out, const GfxRGB & rgb);
} // namespace util
#endif //UTIL_H__

View File

@ -82,7 +82,7 @@ void parse_options (int argc, char **argv)
.add("tounicode", &param.tounicode, 0, "Specify how to deal with ToUnicode map, 0 for auto, 1 for forced, -1 for disabled")
.add("space-as-offset", &param.space_as_offset, 0, "treat space characters as offsets")
.add("stretch_narrow_glyph", &param.stretch_narrow_glyph, 0, "stretch narrow glyphs instead of padding space")
.add("squeeze_wide_glyph", &param.squeeze_wide_glyph, 0, "squeeze wide glyphs instead of truncating")
.add("squeeze-wide-glyph", &param.squeeze_wide_glyph, 0, "squeeze wide glyphs instead of truncating")
.add("css-filename", &param.css_filename, "", "Specify the file name of the generated css file")
.add("font-suffix", &param.font_suffix, ".ttf", "suffix for extracted font files")
@ -91,6 +91,7 @@ void parse_options (int argc, char **argv)
.add("debug", &param.debug, 0, "output debug information")
.add("clean-tmp", &param.clean_tmp, 1, "clean temporary files after processing")
.add("css-draw", &param.css_draw, 0, "[Experimental and Unsupported] CSS Drawing")
.add("", &param.input_filename, "", "")
.add("", &param.output_filename, "", "")
;
@ -182,8 +183,8 @@ int main(int argc, char **argv)
throw "Copying of text from this document is not allowed.";
}
param.first_page = min(max(param.first_page, 1), doc->getNumPages());
param.last_page = min(max(param.last_page, param.first_page), doc->getNumPages());
param.first_page = min<int>(max<int>(param.first_page, 1), doc->getNumPages());
param.last_page = min<int>(max<int>(param.last_page, param.first_page), doc->getNumPages());
if(param.output_filename == "")
{

View File

@ -54,6 +54,31 @@ const std::map<std::pair<std::string, bool>, std::pair<std::string, std::string>
{{".js", 1}, {"<script type=\"text/javascript\">", "</script>"}}
});
void _tm_transform(const double * tm, double & x, double & y, bool is_delta)
{
double xx = x, yy = y;
x = tm[0] * xx + tm[2] * yy;
y = tm[1] * xx + tm[3] * yy;
if(!is_delta)
{
x += tm[4];
y += tm[5];
}
}
void _tm_multiply(double * tm_left, const double * tm_right)
{
double old[4];
memcpy(old, tm_left, sizeof(old));
tm_left[0] = old[0] * tm_right[0] + old[2] * tm_right[1];
tm_left[1] = old[1] * tm_right[0] + old[3] * tm_right[1];
tm_left[2] = old[0] * tm_right[2] + old[2] * tm_right[3];
tm_left[3] = old[1] * tm_right[2] + old[3] * tm_right[3];
tm_left[4] += old[0] * tm_right[4] + old[2] * tm_right[5];
tm_left[5] += old[1] * tm_right[4] + old[3] * tm_right[5];
}
bool isLegalUnicode(Unicode u)
{
/*
@ -249,4 +274,49 @@ string get_suffix(const string & path)
}
}
void css_fix_rectangle_border_width(double x1, double y1,
double x2, double y2,
double border_width,
double & x, double & y, double & w, double & h,
double & border_top_bottom_width,
double & border_left_right_width)
{
w = x2 - x1;
if(w > border_width)
{
w -= border_width;
border_left_right_width = border_width;
}
else
{
border_left_right_width = border_width + w/2;
w = 0;
}
x = x1 - border_width / 2;
h = y2 - y1;
if(h > border_width)
{
h -= border_width;
border_top_bottom_width = border_width;
}
else
{
border_top_bottom_width = border_width + h/2;
h = 0;
}
y = y1 - border_width / 2;
}
ostream & operator << (ostream & out, const GfxRGB & rgb)
{
auto flags= out.flags();
out << std::dec << "rgb("
<< (int)colToByte(rgb.r) << ","
<< (int)colToByte(rgb.g) << ","
<< (int)colToByte(rgb.b) << ")";
out.flags(flags);
return out;
}
} // namespace pdf2htmlEX

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python
DIR = 'pdf'
DIR = '../../pdf.js/test/pdfs'
import os
@ -11,7 +12,8 @@ with open('out.html','w') as outf:
if not f.lower().endswith('.pdf'):
continue
print f
os.system('pdf2htmlEX --dest-dir html --auto-hint=1 --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f))
#os.system('pdf2htmlEX --dest-dir html --auto-hint=1 --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f))
os.system('pdf2htmlEX --dest-dir html --process-nontext 0 --css-draw 1 "%s/%s"' % (DIR,f))
ff = f[:-3]
outf.write('<a href="html/%shtml" target="pdf">%s</a><br/>' % (ff,ff))
outf.flush();