1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-21 04:30:08 +00:00

Merge branch 'incoming'

This commit is contained in:
Lu Wang 2015-04-29 21:25:46 +08:00
commit 2edf41eaa1
87 changed files with 2301 additions and 5664 deletions

14
.gitignore vendored
View File

@ -3,20 +3,18 @@ CMakeFiles/*
cmake_install.cmake
CTestTestfile.cmake
gmon.out
Makefile
install_manifest.txt
Makefile
pdf2htmlEX
pdf2htmlEX.1
*.pyc
share/base.css
share/base.min.css
share/fancy.css
share/js_src/css_class_names.js
share/fancy.min.css
share/pdf2htmlEX.js
share/pdf2htmlEX.min.js
src/pdf2htmlEX-config.h
src/util/css_const.h
test/*
test export-ignore
Testing/*
wiki/*
doc/*
/"\\"
/share/base.min.css
/share/fancy.min.css

View File

@ -1,25 +1,32 @@
language: cpp
compiler: gcc
addons:
sauce_connect: true
before_install:
- sudo add-apt-repository ppa:fontforge/fontforge --yes
- sudo add-apt-repository ppa:coolwanglu/pdf2htmlex --yes
- sudo apt-get update -qq
- sudo apt-get install -qq libpoppler-dev libspiro-dev libcairo-dev libfreetype6-dev libltdl-dev
install:
- export LIBRARY_PATH=/usr/local/lib
- export LD_LIBRARY_PATH=/usr/local/lib
- pushd ..
- wget 'https://github.com/coolwanglu/fontforge/archive/pdf2htmlEX.tar.gz' -O - | tar -zxf -
- pushd fontforge-pdf2htmlEX && ./autogen.sh && ./configure && make && sudo make install && popd
- sudo apt-get install -qq libpoppler-dev libpoppler-private-dev libspiro-dev libcairo-dev libpango1.0-dev libfreetype6-dev libltdl-dev libfontforge-dev python-imaging python-pip firefox xvfb
- sudo pip install selenium sauceclient
- export DISPLAY=:99.0
- test/start_xvfb.sh
- pushd /
- python -m SimpleHTTPServer 8000 >/dev/null 2>&1 &
- popd
- sleep 5
before_script:
- cmake -DENABLE_SVG=ON .
script:
- make
- make test
- P2H_TEST_REMOTE=1 ctest --output-on-failure
- sudo make install
- /usr/local/bin/pdf2htmlEX -v
branches:
only:
- master
- incoming
- travis
- wl
env:
global:
- secure: V0yGXROTAsRc3ExcECj7X/CrJLbodUeqZyfQGkA6x0iLV7Lh8/hgTjSsvuj7ef/DIWMqJ5cAIzZuXiF0KIxiVllF1v0I3w+LScxynT7B1NsyH16hvGIc7EvrsRmGVeTv8n9I+cCIwQxjtliNKfeZjV4Rk2+u6LioUzTszmW2etc=
- secure: Q5ZSrdFEgN0JvUp90nY5Wh58iukmGZQ2EW7crOibWH2yuUsxAnMELxpY+9yV3+eA7kbjJf/I0NCa5ZY1gkxK60ugUj+zuUDTL+BV1XCbO37e0uwh3ae99iyQWpXc8e8wBp10sthoX7U6Hvypa5tD9r1JJib8jxJV/MzIFpb7H9s=

View File

@ -1,803 +0,0 @@
//========================================================================
//
// CairoFontEngine.cc
//
// Copyright 2003 Glyph & Cog, LLC
// Copyright 2004 Red Hat, Inc
//
//========================================================================
//========================================================================
//
// Modified under the Poppler project - http://poppler.freedesktop.org
//
// All changes made under the Poppler project to this file are licensed
// under GPL version 2 or later
//
// Copyright (C) 2005-2007 Jeff Muizelaar <jeff@infidigm.net>
// Copyright (C) 2005, 2006 Kristian Høgsberg <krh@redhat.com>
// Copyright (C) 2005 Martin Kretzschmar <martink@gnome.org>
// Copyright (C) 2005, 2009, 2012 Albert Astals Cid <aacid@kde.org>
// Copyright (C) 2006, 2007, 2010, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
// Copyright (C) 2007 Koji Otani <sho@bbr.jp>
// Copyright (C) 2008, 2009 Chris Wilson <chris@chris-wilson.co.uk>
// Copyright (C) 2008, 2012 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2009 Darren Kenny <darren.kenny@sun.com>
// Copyright (C) 2010 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
// Copyright (C) 2010 Jan Kümmel <jan+freedesktop@snorc.org>
// Copyright (C) 2012 Hib Eris <hib@hiberis.nl>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================
#include <poppler-config.h>
#include <string.h>
#include "CairoFontEngine.h"
#include "CairoOutputDev.h"
#include "GlobalParams.h"
#include <fofi/FoFiTrueType.h>
#include <fofi/FoFiType1C.h>
#include "goo/gfile.h"
#include "Error.h"
#include "XRef.h"
#include "Gfx.h"
#include "Page.h"
#if HAVE_FCNTL_H && HAVE_SYS_MMAN_H && HAVE_SYS_STAT_H
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/mman.h>
#define CAN_CHECK_OPEN_FACES 1
#endif
#ifdef USE_GCC_PRAGMAS
#pragma implementation
#endif
//------------------------------------------------------------------------
// CairoFont
//------------------------------------------------------------------------
CairoFont::CairoFont(Ref ref,
cairo_font_face_t *cairo_font_face,
int *codeToGID,
Guint codeToGIDLen,
GBool substitute,
GBool printing) : ref(ref),
cairo_font_face(cairo_font_face),
codeToGID(codeToGID),
codeToGIDLen(codeToGIDLen),
substitute(substitute),
printing(printing) { }
CairoFont::~CairoFont() {
cairo_font_face_destroy (cairo_font_face);
gfree(codeToGID);
}
GBool
CairoFont::matches(Ref &other, GBool printingA) {
return (other.num == ref.num && other.gen == ref.gen);
}
cairo_font_face_t *
CairoFont::getFontFace(void) {
return cairo_font_face;
}
unsigned long
CairoFont::getGlyph(CharCode code,
Unicode *u, int uLen) {
FT_UInt gid;
if (codeToGID && code < codeToGIDLen) {
gid = (FT_UInt)codeToGID[code];
} else {
gid = (FT_UInt)code;
}
return gid;
}
double
CairoFont::getSubstitutionCorrection(GfxFont *gfxFont)
{
double w1, w2;
CharCode code;
char *name;
// for substituted fonts: adjust the font matrix -- compare the
// width of 'm' in the original font and the substituted font
if (isSubstitute() && !gfxFont->isCIDFont()) {
for (code = 0; code < 256; ++code) {
if ((name = ((Gfx8BitFont *)gfxFont)->getCharName(code)) &&
name[0] == 'm' && name[1] == '\0') {
break;
}
}
if (code < 256) {
w1 = ((Gfx8BitFont *)gfxFont)->getWidth(code);
{
cairo_matrix_t m;
cairo_matrix_init_identity(&m);
cairo_font_options_t *options = cairo_font_options_create();
cairo_font_options_set_hint_style(options, CAIRO_HINT_STYLE_NONE);
cairo_font_options_set_hint_metrics(options, CAIRO_HINT_METRICS_OFF);
cairo_scaled_font_t *scaled_font = cairo_scaled_font_create(cairo_font_face, &m, &m, options);
cairo_text_extents_t extents;
cairo_scaled_font_text_extents(scaled_font, "m", &extents);
cairo_scaled_font_destroy(scaled_font);
cairo_font_options_destroy(options);
w2 = extents.x_advance;
}
if (!gfxFont->isSymbolic()) {
// if real font is substantially narrower than substituted
// font, reduce the font size accordingly
if (w1 > 0.01 && w1 < 0.9 * w2) {
w1 /= w2;
return w1;
}
}
}
}
return 1.0;
}
//------------------------------------------------------------------------
// CairoFreeTypeFont
//------------------------------------------------------------------------
static cairo_user_data_key_t _ft_cairo_key;
static void
_ft_done_face_uncached (void *closure)
{
FT_Face face = (FT_Face) closure;
FT_Done_Face (face);
}
static GBool
_ft_new_face_uncached (FT_Library lib,
const char *filename,
char *font_data,
int font_data_len,
FT_Face *face_out,
cairo_font_face_t **font_face_out)
{
FT_Face face;
cairo_font_face_t *font_face;
if (font_data == NULL) {
if (FT_New_Face (lib, filename, 0, &face))
return gFalse;
} else {
if (FT_New_Memory_Face (lib, (unsigned char *)font_data, font_data_len, 0, &face))
return gFalse;
}
font_face = cairo_ft_font_face_create_for_ft_face (face,
FT_LOAD_NO_HINTING |
FT_LOAD_NO_BITMAP);
if (cairo_font_face_set_user_data (font_face,
&_ft_cairo_key,
face,
_ft_done_face_uncached))
{
_ft_done_face_uncached (face);
cairo_font_face_destroy (font_face);
return gFalse;
}
*face_out = face;
*font_face_out = font_face;
return gTrue;
}
#if CAN_CHECK_OPEN_FACES
static struct _ft_face_data {
struct _ft_face_data *prev, *next, **head;
int fd;
unsigned long hash;
size_t size;
unsigned char *bytes;
FT_Library lib;
FT_Face face;
cairo_font_face_t *font_face;
} *_ft_open_faces;
static unsigned long
_djb_hash (const unsigned char *bytes, size_t len)
{
unsigned long hash = 5381;
while (len--) {
unsigned char c = *bytes++;
hash *= 33;
hash ^= c;
}
return hash;
}
static GBool
_ft_face_data_equal (struct _ft_face_data *a, struct _ft_face_data *b)
{
if (a->lib != b->lib)
return gFalse;
if (a->size != b->size)
return gFalse;
if (a->hash != b->hash)
return gFalse;
return memcmp (a->bytes, b->bytes, a->size) == 0;
}
static void
_ft_done_face (void *closure)
{
struct _ft_face_data *data = (struct _ft_face_data *) closure;
if (data->next)
data->next->prev = data->prev;
if (data->prev)
data->prev->next = data->next;
else
_ft_open_faces = data->next;
#if defined(__SUNPRO_CC) && defined(__sun) && defined(__SVR4)
munmap ((char*)data->bytes, data->size);
#else
munmap (data->bytes, data->size);
#endif
close (data->fd);
FT_Done_Face (data->face);
gfree (data);
}
static GBool
_ft_new_face (FT_Library lib,
const char *filename,
char *font_data,
int font_data_len,
FT_Face *face_out,
cairo_font_face_t **font_face_out)
{
struct _ft_face_data *l;
struct stat st;
struct _ft_face_data tmpl;
tmpl.fd = -1;
if (font_data == NULL) {
/* if we fail to mmap the file, just pass it to FreeType instead */
tmpl.fd = open (filename, O_RDONLY);
if (tmpl.fd == -1)
return _ft_new_face_uncached (lib, filename, font_data, font_data_len, face_out, font_face_out);
if (fstat (tmpl.fd, &st) == -1) {
close (tmpl.fd);
return _ft_new_face_uncached (lib, filename, font_data, font_data_len, face_out, font_face_out);
}
tmpl.bytes = (unsigned char *) mmap (NULL, st.st_size,
PROT_READ, MAP_PRIVATE,
tmpl.fd, 0);
if (tmpl.bytes == MAP_FAILED) {
close (tmpl.fd);
return _ft_new_face_uncached (lib, filename, font_data, font_data_len, face_out, font_face_out);
}
tmpl.size = st.st_size;
} else {
tmpl.bytes = (unsigned char*) font_data;
tmpl.size = font_data_len;
}
/* check to see if this is a duplicate of any of the currently open fonts */
tmpl.lib = lib;
tmpl.hash = _djb_hash (tmpl.bytes, tmpl.size);
for (l = _ft_open_faces; l; l = l->next) {
if (_ft_face_data_equal (l, &tmpl)) {
if (tmpl.fd != -1) {
#if defined(__SUNPRO_CC) && defined(__sun) && defined(__SVR4)
munmap ((char*)tmpl.bytes, tmpl.size);
#else
munmap (tmpl.bytes, tmpl.size);
#endif
close (tmpl.fd);
}
*face_out = l->face;
*font_face_out = cairo_font_face_reference (l->font_face);
return gTrue;
}
}
/* not a dup, open and insert into list */
if (FT_New_Memory_Face (lib,
(FT_Byte *) tmpl.bytes, tmpl.size,
0, &tmpl.face))
{
if (tmpl.fd != -1) {
#if defined(__SUNPRO_CC) && defined(__sun) && defined(__SVR4)
munmap ((char*)tmpl.bytes, tmpl.size);
#else
munmap (tmpl.bytes, tmpl.size);
#endif
close (tmpl.fd);
}
return gFalse;
}
l = (struct _ft_face_data *) gmallocn (1, sizeof (struct _ft_face_data));
*l = tmpl;
l->prev = NULL;
l->next = _ft_open_faces;
if (_ft_open_faces)
_ft_open_faces->prev = l;
_ft_open_faces = l;
l->font_face = cairo_ft_font_face_create_for_ft_face (tmpl.face,
FT_LOAD_NO_HINTING |
FT_LOAD_NO_BITMAP);
if (cairo_font_face_set_user_data (l->font_face,
&_ft_cairo_key,
l,
_ft_done_face))
{
cairo_font_face_destroy (l->font_face);
_ft_done_face (l);
return gFalse;
}
*face_out = l->face;
*font_face_out = l->font_face;
return gTrue;
}
#else
#define _ft_new_face _ft_new_face_uncached
#endif
CairoFreeTypeFont::CairoFreeTypeFont(Ref ref,
cairo_font_face_t *cairo_font_face,
int *codeToGID,
Guint codeToGIDLen,
GBool substitute) : CairoFont(ref,
cairo_font_face,
codeToGID,
codeToGIDLen,
substitute,
gTrue) { }
CairoFreeTypeFont::~CairoFreeTypeFont() { }
CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref,
FT_Library lib, GBool useCIDs) {
Object refObj, strObj;
GooString *fileName;
char *fileNameC;
char *font_data;
int font_data_len;
int i, n;
GfxFontType fontType;
GfxFontLoc *fontLoc;
char **enc;
char *name;
FoFiTrueType *ff;
FoFiType1C *ff1c;
Ref ref;
FT_Face face;
cairo_font_face_t *font_face;
int *codeToGID;
Guint codeToGIDLen;
codeToGID = NULL;
codeToGIDLen = 0;
font_data = NULL;
font_data_len = 0;
fileName = NULL;
fileNameC = NULL;
GBool substitute = gFalse;
ref = *gfxFont->getID();
fontType = gfxFont->getType();
if (!(fontLoc = gfxFont->locateFont(xref, gFalse))) {
error(errSyntaxError, -1, "Couldn't find a font for '{0:s}'",
gfxFont->getName() ? gfxFont->getName()->getCString()
: "(unnamed)");
goto err2;
}
// embedded font
if (fontLoc->locType == gfxFontLocEmbedded) {
font_data = gfxFont->readEmbFontFile(xref, &font_data_len);
if (NULL == font_data)
goto err2;
// external font
} else { // gfxFontLocExternal
fileName = fontLoc->path;
fontType = fontLoc->fontType;
substitute = gTrue;
}
if (fileName != NULL) {
fileNameC = fileName->getCString();
}
switch (fontType) {
case fontType1:
case fontType1C:
case fontType1COT:
if (! _ft_new_face (lib, fileNameC, font_data, font_data_len, &face, &font_face)) {
error(errSyntaxError, -1, "could not create type1 face");
goto err2;
}
enc = ((Gfx8BitFont *)gfxFont)->getEncoding();
codeToGID = (int *)gmallocn(256, sizeof(int));
codeToGIDLen = 256;
for (i = 0; i < 256; ++i) {
codeToGID[i] = 0;
if ((name = enc[i])) {
codeToGID[i] = FT_Get_Name_Index(face, name);
}
}
break;
case fontCIDType2:
case fontCIDType2OT:
codeToGID = NULL;
n = 0;
if (((GfxCIDFont *)gfxFont)->getCIDToGID()) {
n = ((GfxCIDFont *)gfxFont)->getCIDToGIDLen();
if (n) {
codeToGID = (int *)gmallocn(n, sizeof(int));
memcpy(codeToGID, ((GfxCIDFont *)gfxFont)->getCIDToGID(),
n * sizeof(int));
}
} else {
if (font_data != NULL) {
ff = FoFiTrueType::make(font_data, font_data_len);
} else {
ff = FoFiTrueType::load(fileNameC);
}
if (! ff)
goto err2;
codeToGID = ((GfxCIDFont *)gfxFont)->getCodeToGIDMap(ff, &n);
delete ff;
}
codeToGIDLen = n;
/* Fall through */
case fontTrueType:
if (font_data != NULL) {
ff = FoFiTrueType::make(font_data, font_data_len);
} else {
ff = FoFiTrueType::load(fileNameC);
}
if (! ff) {
error(errSyntaxError, -1, "failed to load truetype font\n");
goto err2;
}
/* This might be set already for the CIDType2 case */
if (fontType == fontTrueType) {
codeToGID = ((Gfx8BitFont *)gfxFont)->getCodeToGIDMap(ff);
codeToGIDLen = 256;
}
delete ff;
if (! _ft_new_face (lib, fileNameC, font_data, font_data_len, &face, &font_face)) {
error(errSyntaxError, -1, "could not create truetype face\n");
goto err2;
}
break;
case fontCIDType0:
case fontCIDType0C:
codeToGID = NULL;
codeToGIDLen = 0;
if (!useCIDs)
{
if (font_data != NULL) {
ff1c = FoFiType1C::make(font_data, font_data_len);
} else {
ff1c = FoFiType1C::load(fileNameC);
}
if (ff1c) {
codeToGID = ff1c->getCIDToGIDMap((int *)&codeToGIDLen);
delete ff1c;
}
}
if (! _ft_new_face (lib, fileNameC, font_data, font_data_len, &face, &font_face)) {
gfree(codeToGID);
codeToGID = NULL;
error(errSyntaxError, -1, "could not create cid face\n");
goto err2;
}
break;
default:
fprintf (stderr, "font type %d not handled\n", (int)fontType);
goto err2;
break;
}
delete fontLoc;
return new CairoFreeTypeFont(ref,
font_face,
codeToGID, codeToGIDLen,
substitute);
err2:
/* hmm? */
delete fontLoc;
fprintf (stderr, "some font thing failed\n");
return NULL;
}
//------------------------------------------------------------------------
// CairoType3Font
//------------------------------------------------------------------------
static const cairo_user_data_key_t type3_font_key = {0};
typedef struct _type3_font_info {
GfxFont *font;
PDFDoc *doc;
CairoFontEngine *fontEngine;
GBool printing;
} type3_font_info_t;
static void
_free_type3_font_info(void *closure)
{
type3_font_info_t *info = (type3_font_info_t *) closure;
info->font->decRefCnt();
free (info);
}
static cairo_status_t
_init_type3_glyph (cairo_scaled_font_t *scaled_font,
cairo_t *cr,
cairo_font_extents_t *extents)
{
type3_font_info_t *info;
GfxFont *font;
double *mat;
info = (type3_font_info_t *)
cairo_font_face_get_user_data (cairo_scaled_font_get_font_face (scaled_font),
&type3_font_key);
font = info->font;
mat = font->getFontBBox();
extents->ascent = mat[3]; /* y2 */
extents->descent = -mat[3]; /* -y1 */
extents->height = extents->ascent + extents->descent;
extents->max_x_advance = mat[2] - mat[1]; /* x2 - x1 */
extents->max_y_advance = 0;
return CAIRO_STATUS_SUCCESS;
}
static cairo_status_t
_render_type3_glyph (cairo_scaled_font_t *scaled_font,
unsigned long glyph,
cairo_t *cr,
cairo_text_extents_t *metrics)
{
Dict *charProcs;
Object charProc;
CairoOutputDev *output_dev;
cairo_matrix_t matrix, invert_y_axis;
double *mat;
double wx, wy;
PDFRectangle box;
type3_font_info_t *info;
GfxFont *font;
Dict *resDict;
Gfx *gfx;
info = (type3_font_info_t *)
cairo_font_face_get_user_data (cairo_scaled_font_get_font_face (scaled_font),
&type3_font_key);
font = info->font;
resDict = ((Gfx8BitFont *)font)->getResources();
charProcs = ((Gfx8BitFont *)(info->font))->getCharProcs();
if (!charProcs)
return CAIRO_STATUS_USER_FONT_ERROR;
if ((int)glyph >= charProcs->getLength())
return CAIRO_STATUS_USER_FONT_ERROR;
mat = font->getFontMatrix();
matrix.xx = mat[0];
matrix.yx = mat[1];
matrix.xy = mat[2];
matrix.yy = mat[3];
matrix.x0 = mat[4];
matrix.y0 = mat[5];
cairo_matrix_init_scale (&invert_y_axis, 1, -1);
cairo_matrix_multiply (&matrix, &matrix, &invert_y_axis);
cairo_transform (cr, &matrix);
output_dev = new CairoOutputDev();
output_dev->setCairo(cr);
output_dev->setPrinting(info->printing);
mat = font->getFontBBox();
box.x1 = mat[0];
box.y1 = mat[1];
box.x2 = mat[2];
box.y2 = mat[3];
gfx = new Gfx(info->doc, output_dev, resDict, &box, NULL);
output_dev->startDoc(info->doc, info->fontEngine);
output_dev->startPage (1, gfx->getState());
output_dev->setInType3Char(gTrue);
gfx->display(charProcs->getVal(glyph, &charProc));
output_dev->getType3GlyphWidth (&wx, &wy);
cairo_matrix_transform_distance (&matrix, &wx, &wy);
metrics->x_advance = wx;
metrics->y_advance = wy;
if (output_dev->hasType3GlyphBBox()) {
double *bbox = output_dev->getType3GlyphBBox();
cairo_matrix_transform_point (&matrix, &bbox[0], &bbox[1]);
cairo_matrix_transform_point (&matrix, &bbox[2], &bbox[3]);
metrics->x_bearing = bbox[0];
metrics->y_bearing = bbox[1];
metrics->width = bbox[2] - bbox[0];
metrics->height = bbox[3] - bbox[1];
}
delete gfx;
delete output_dev;
charProc.free();
return CAIRO_STATUS_SUCCESS;
}
CairoType3Font *CairoType3Font::create(GfxFont *gfxFont, PDFDoc *doc,
CairoFontEngine *fontEngine,
GBool printing) {
Object refObj, strObj;
type3_font_info_t *info;
cairo_font_face_t *font_face;
Ref ref;
int *codeToGID;
Guint codeToGIDLen;
int i, j;
char **enc;
Dict *charProcs;
char *name;
charProcs = ((Gfx8BitFont *)gfxFont)->getCharProcs();
info = (type3_font_info_t *) malloc(sizeof(*info));
ref = *gfxFont->getID();
font_face = cairo_user_font_face_create();
cairo_user_font_face_set_init_func (font_face, _init_type3_glyph);
cairo_user_font_face_set_render_glyph_func (font_face, _render_type3_glyph);
gfxFont->incRefCnt();
info->font = gfxFont;
info->doc = doc;
info->fontEngine = fontEngine;
info->printing = printing;
cairo_font_face_set_user_data (font_face, &type3_font_key, (void *) info, _free_type3_font_info);
enc = ((Gfx8BitFont *)gfxFont)->getEncoding();
codeToGID = (int *)gmallocn(256, sizeof(int));
codeToGIDLen = 256;
for (i = 0; i < 256; ++i) {
codeToGID[i] = 0;
if (charProcs && (name = enc[i])) {
for (j = 0; j < charProcs->getLength(); j++) {
if (strcmp(name, charProcs->getKey(j)) == 0) {
codeToGID[i] = j;
}
}
}
}
return new CairoType3Font(ref, doc, font_face, codeToGID, codeToGIDLen, printing);
}
CairoType3Font::CairoType3Font(Ref ref,
PDFDoc *doc,
cairo_font_face_t *cairo_font_face,
int *codeToGID,
Guint codeToGIDLen,
GBool printing) : CairoFont(ref,
cairo_font_face,
codeToGID,
codeToGIDLen,
gFalse,
printing),
doc(doc) { }
CairoType3Font::~CairoType3Font() { }
GBool
CairoType3Font::matches(Ref &other, GBool printingA) {
return (other.num == ref.num && other.gen == ref.gen && printing == printingA);
}
//------------------------------------------------------------------------
// CairoFontEngine
//------------------------------------------------------------------------
CairoFontEngine::CairoFontEngine(FT_Library libA) {
int i;
lib = libA;
for (i = 0; i < cairoFontCacheSize; ++i) {
fontCache[i] = NULL;
}
FT_Int major, minor, patch;
// as of FT 2.1.8, CID fonts are indexed by CID instead of GID
FT_Library_Version(lib, &major, &minor, &patch);
useCIDs = major > 2 ||
(major == 2 && (minor > 1 || (minor == 1 && patch > 7)));
}
CairoFontEngine::~CairoFontEngine() {
int i;
for (i = 0; i < cairoFontCacheSize; ++i) {
if (fontCache[i])
delete fontCache[i];
}
}
CairoFont *
CairoFontEngine::getFont(GfxFont *gfxFont, PDFDoc *doc, GBool printing) {
int i, j;
Ref ref;
CairoFont *font;
GfxFontType fontType;
ref = *gfxFont->getID();
for (i = 0; i < cairoFontCacheSize; ++i) {
font = fontCache[i];
if (font && font->matches(ref, printing)) {
for (j = i; j > 0; --j) {
fontCache[j] = fontCache[j-1];
}
fontCache[0] = font;
return font;
}
}
fontType = gfxFont->getType();
if (fontType == fontType3)
font = CairoType3Font::create (gfxFont, doc, this, printing);
else
font = CairoFreeTypeFont::create (gfxFont, doc->getXRef(), lib, useCIDs);
//XXX: if font is null should we still insert it into the cache?
if (fontCache[cairoFontCacheSize - 1]) {
delete fontCache[cairoFontCacheSize - 1];
}
for (j = cairoFontCacheSize - 1; j > 0; --j) {
fontCache[j] = fontCache[j-1];
}
fontCache[0] = font;
return font;
}

View File

@ -1,124 +0,0 @@
//========================================================================
//
// CairoFontEngine.h
//
// Copyright 2003 Glyph & Cog, LLC
// Copyright 2004 Red Hat, Inc
//
//========================================================================
//========================================================================
//
// Modified under the Poppler project - http://poppler.freedesktop.org
//
// All changes made under the Poppler project to this file are licensed
// under GPL version 2 or later
//
// Copyright (C) 2005, 2006 Kristian Høgsberg <krh@redhat.com>
// Copyright (C) 2005 Albert Astals Cid <aacid@kde.org>
// Copyright (C) 2006, 2007 Jeff Muizelaar <jeff@infidigm.net>
// Copyright (C) 2006, 2010 Carlos Garcia Campos <carlosgc@gnome.org>
// Copyright (C) 2008 Adrian Johnson <ajohnson@redneon.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================
#ifndef CAIROFONTENGINE_H
#define CAIROFONTENGINE_H
#ifdef USE_GCC_PRAGMAS
#pragma interface
#endif
#include "goo/gtypes.h"
#include <cairo-ft.h>
#include "GfxFont.h"
#include "PDFDoc.h"
class CairoFontEngine;
class CairoFont {
public:
CairoFont(Ref ref,
cairo_font_face_t *face,
int *codeToGID,
Guint codeToGIDLen,
GBool substitute,
GBool printing);
virtual ~CairoFont();
virtual GBool matches(Ref &other, GBool printing);
cairo_font_face_t *getFontFace(void);
unsigned long getGlyph(CharCode code, Unicode *u, int uLen);
double getSubstitutionCorrection(GfxFont *gfxFont);
GBool isSubstitute() { return substitute; }
protected:
Ref ref;
cairo_font_face_t *cairo_font_face;
int *codeToGID;
Guint codeToGIDLen;
GBool substitute;
GBool printing;
};
//------------------------------------------------------------------------
class CairoFreeTypeFont : public CairoFont {
public:
static CairoFreeTypeFont *create(GfxFont *gfxFont, XRef *xref, FT_Library lib, GBool useCIDs);
virtual ~CairoFreeTypeFont();
private:
CairoFreeTypeFont(Ref ref, cairo_font_face_t *cairo_font_face,
int *codeToGID, Guint codeToGIDLen, GBool substitute);
};
//------------------------------------------------------------------------
class CairoType3Font : public CairoFont {
public:
static CairoType3Font *create(GfxFont *gfxFont, PDFDoc *doc,
CairoFontEngine *fontEngine,
GBool printing);
virtual ~CairoType3Font();
virtual GBool matches(Ref &other, GBool printing);
private:
CairoType3Font(Ref ref, PDFDoc *doc,
cairo_font_face_t *cairo_font_face,
int *codeToGID, Guint codeToGIDLen,
GBool printing);
PDFDoc *doc;
};
//------------------------------------------------------------------------
#define cairoFontCacheSize 64
//------------------------------------------------------------------------
// CairoFontEngine
//------------------------------------------------------------------------
class CairoFontEngine {
public:
// Create a font engine.
CairoFontEngine(FT_Library libA);
~CairoFontEngine();
CairoFont *getFont(GfxFont *gfxFont, PDFDoc *doc, GBool printing);
private:
CairoFont *fontCache[cairoFontCacheSize];
FT_Library lib;
GBool useCIDs;
};
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,503 +0,0 @@
//========================================================================
//
// CairoOutputDev.h
//
// Copyright 2003 Glyph & Cog, LLC
// Copyright 2004 Red Hat, INC
//
//========================================================================
//========================================================================
//
// Modified under the Poppler project - http://poppler.freedesktop.org
//
// All changes made under the Poppler project to this file are licensed
// under GPL version 2 or later
//
// Copyright (C) 2005-2008 Jeff Muizelaar <jeff@infidigm.net>
// Copyright (C) 2005, 2006 Kristian Høgsberg <krh@redhat.com>
// Copyright (C) 2005 Nickolay V. Shmyrev <nshmyrev@yandex.ru>
// Copyright (C) 2006-2011 Carlos Garcia Campos <carlosgc@gnome.org>
// Copyright (C) 2008, 2009, 2011, 2012 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2008 Michael Vrable <mvrable@cs.ucsd.edu>
// Copyright (C) 2010-2012 Thomas Freitag <Thomas.Freitag@alfa.de>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================
#ifndef CAIROOUTPUTDEV_H
#define CAIROOUTPUTDEV_H
#ifdef USE_GCC_PRAGMAS
#pragma interface
#endif
#include "goo/gtypes.h"
#include <cairo-ft.h>
#include "OutputDev.h"
#include "TextOutputDev.h"
#include "GfxState.h"
class PDFDoc;
class GfxState;
class GfxPath;
class Gfx8BitFont;
struct GfxRGB;
class CairoFontEngine;
class CairoFont;
//------------------------------------------------------------------------
//------------------------------------------------------------------------
// CairoImage
//------------------------------------------------------------------------
class CairoImage {
public:
// Constructor.
CairoImage (double x1, double y1, double x2, double y2);
// Destructor.
~CairoImage ();
// Set the image cairo surface
void setImage (cairo_surface_t *image);
// Get the image cairo surface
cairo_surface_t *getImage () const { return image; }
// Get the image rectangle
void getRect (double *xa1, double *ya1, double *xa2, double *ya2)
{ *xa1 = x1; *ya1 = y1; *xa2 = x2; *ya2 = y2; }
private:
cairo_surface_t *image; // image cairo surface
double x1, y1; // upper left corner
double x2, y2; // lower right corner
};
//------------------------------------------------------------------------
// CairoOutputDev
//------------------------------------------------------------------------
class CairoOutputDev: public OutputDev {
public:
// Constructor.
CairoOutputDev();
// Destructor.
virtual ~CairoOutputDev();
//----- get info about output device
// Does this device use upside-down coordinates?
// (Upside-down means (0,0) is the top left corner of the page.)
virtual GBool upsideDown() { return gTrue; }
// Does this device use drawChar() or drawString()?
virtual GBool useDrawChar() { return gTrue; }
// Does this device use tilingPatternFill()? If this returns false,
// tiling pattern fills will be reduced to a series of other drawing
// operations.
virtual GBool useTilingPatternFill() { return gTrue; }
// Does this device use functionShadedFill(), axialShadedFill(), and
// radialShadedFill()? If this returns false, these shaded fills
// will be reduced to a series of other drawing operations.
#if CAIRO_VERSION >= CAIRO_VERSION_ENCODE(1, 12, 0)
virtual GBool useShadedFills(int type) { return type <= 7; }
#else
virtual GBool useShadedFills(int type) { return type < 4; }
#endif
// Does this device use FillColorStop()?
virtual GBool useFillColorStop() { return gTrue; }
// Does this device use beginType3Char/endType3Char? Otherwise,
// text in Type 3 fonts will be drawn with drawChar/drawString.
virtual GBool interpretType3Chars() { return gFalse; }
//----- initialization and control
// Start a page.
virtual void startPage(int pageNum, GfxState *state);
// End a page.
virtual void endPage();
//----- save/restore graphics state
virtual void saveState(GfxState *state);
virtual void restoreState(GfxState *state);
//----- update graphics state
virtual void updateAll(GfxState *state);
virtual void setDefaultCTM(double *ctm);
virtual void updateCTM(GfxState *state, double m11, double m12,
double m21, double m22, double m31, double m32);
virtual void updateLineDash(GfxState *state);
virtual void updateFlatness(GfxState *state);
virtual void updateLineJoin(GfxState *state);
virtual void updateLineCap(GfxState *state);
virtual void updateMiterLimit(GfxState *state);
virtual void updateLineWidth(GfxState *state);
virtual void updateFillColor(GfxState *state);
virtual void updateStrokeColor(GfxState *state);
virtual void updateFillOpacity(GfxState *state);
virtual void updateStrokeOpacity(GfxState *state);
virtual void updateFillColorStop(GfxState *state, double offset);
virtual void updateBlendMode(GfxState *state);
//----- update text state
virtual void updateFont(GfxState *state);
//----- path painting
virtual void stroke(GfxState *state);
virtual void fill(GfxState *state);
virtual void eoFill(GfxState *state);
virtual void clipToStrokePath(GfxState *state);
virtual GBool tilingPatternFill(GfxState *state, Gfx *gfx, Catalog *cat, Object *str,
double *pmat, int paintType, int tilingType, Dict *resDict,
double *mat, double *bbox,
int x0, int y0, int x1, int y1,
double xStep, double yStep);
virtual GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax);
virtual GBool axialShadedSupportExtend(GfxState *state, GfxAxialShading *shading);
virtual GBool radialShadedFill(GfxState *state, GfxRadialShading *shading, double sMin, double sMax);
virtual GBool radialShadedSupportExtend(GfxState *state, GfxRadialShading *shading);
#if CAIRO_VERSION >= CAIRO_VERSION_ENCODE(1, 12, 0)
virtual GBool gouraudTriangleShadedFill(GfxState *state, GfxGouraudTriangleShading *shading);
virtual GBool patchMeshShadedFill(GfxState *state, GfxPatchMeshShading *shading);
#endif
//----- path clipping
virtual void clip(GfxState *state);
virtual void eoClip(GfxState *state);
//----- text drawing
void beginString(GfxState *state, GooString *s);
void endString(GfxState *state);
void drawChar(GfxState *state, double x, double y,
double dx, double dy,
double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen);
void beginActualText(GfxState *state, GooString *text);
void endActualText(GfxState *state);
virtual GBool beginType3Char(GfxState *state, double x, double y,
double dx, double dy,
CharCode code, Unicode *u, int uLen);
virtual void endType3Char(GfxState *state);
virtual void beginTextObject(GfxState *state);
virtual GBool deviceHasTextClip(GfxState *state) { return textClipPath; }
virtual void endTextObject(GfxState *state);
//----- image drawing
virtual void drawImageMask(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert, GBool interpolate,
GBool inlineImg);
virtual void setSoftMaskFromImageMask(GfxState *state,
Object *ref, Stream *str,
int width, int height, GBool invert,
GBool inlineImg, double *baseMatrix);
virtual void unsetSoftMaskFromImageMask(GfxState *state, double *baseMatrix);
void drawImageMaskPrescaled(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert, GBool interpolate,
GBool inlineImg);
void drawImageMaskRegular(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert, GBool interpolate,
GBool inlineImg);
virtual void drawImage(GfxState *state, Object *ref, Stream *str,
int width, int height, GfxImageColorMap *colorMap,
GBool interpolate, int *maskColors, GBool inlineImg);
virtual void drawSoftMaskedImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap,
GBool interpolate,
Stream *maskStr,
int maskWidth, int maskHeight,
GfxImageColorMap *maskColorMap,
GBool maskInterpolate);
virtual void drawMaskedImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap,
GBool interpolate,
Stream *maskStr,
int maskWidth, int maskHeight,
GBool maskInvert, GBool maskInterpolate);
//----- transparency groups and soft masks
virtual void beginTransparencyGroup(GfxState * /*state*/, double * /*bbox*/,
GfxColorSpace * /*blendingColorSpace*/,
GBool /*isolated*/, GBool /*knockout*/,
GBool /*forSoftMask*/);
virtual void endTransparencyGroup(GfxState * /*state*/);
void popTransparencyGroup();
virtual void paintTransparencyGroup(GfxState * /*state*/, double * /*bbox*/);
virtual void setSoftMask(GfxState * /*state*/, double * /*bbox*/, GBool /*alpha*/,
Function * /*transferFunc*/, GfxColor * /*backdropColor*/);
virtual void clearSoftMask(GfxState * /*state*/);
//----- Type 3 font operators
virtual void type3D0(GfxState *state, double wx, double wy);
virtual void type3D1(GfxState *state, double wx, double wy,
double llx, double lly, double urx, double ury);
//----- special access
// Called to indicate that a new PDF document has been loaded.
void startDoc(PDFDoc *docA, CairoFontEngine *fontEngine = NULL);
GBool isReverseVideo() { return gFalse; }
void setCairo (cairo_t *cr);
void setTextPage (TextPage *text);
void setPrinting (GBool printing) { this->printing = printing; needFontUpdate = gTrue; }
void setInType3Char(GBool inType3Char) { this->inType3Char = inType3Char; }
void getType3GlyphWidth (double *wx, double *wy) { *wx = t3_glyph_wx; *wy = t3_glyph_wy; }
GBool hasType3GlyphBBox () { return t3_glyph_has_bbox; }
double *getType3GlyphBBox () { return t3_glyph_bbox; }
protected:
void doPath(cairo_t *cairo, GfxState *state, GfxPath *path);
cairo_surface_t *downscaleSurface(cairo_surface_t *orig_surface);
void getScaledSize(int orig_width, int orig_height,
int *scaledWidth, int *scaledHeight);
cairo_filter_t getFilterForSurface(cairo_surface_t *image,
GBool interpolate);
GBool getStreamData (Stream *str, char **buffer, int *length);
void setMimeData(Stream *str, Object *ref, cairo_surface_t *image);
void fillToStrokePathClip(GfxState *state);
void alignStrokeCoords(GfxSubpath *subpath, int i, double *x, double *y);
GfxRGB fill_color, stroke_color;
cairo_pattern_t *fill_pattern, *stroke_pattern;
double fill_opacity;
double stroke_opacity;
GBool stroke_adjust;
GBool adjusted_stroke_width;
GBool align_stroke_coords;
CairoFont *currentFont;
struct StrokePathClip {
GfxPath *path;
cairo_matrix_t ctm;
double line_width;
double *dashes;
int dash_count;
double dash_offset;
cairo_line_cap_t cap;
cairo_line_join_t join;
double miter;
} *strokePathClip;
PDFDoc *doc; // the current document
static FT_Library ft_lib;
static GBool ft_lib_initialized;
CairoFontEngine *fontEngine;
GBool fontEngine_owner;
cairo_t *cairo;
cairo_matrix_t orig_matrix;
GBool needFontUpdate; // set when the font needs to be updated
GBool printing;
GBool use_show_text_glyphs;
cairo_surface_t *surface;
cairo_glyph_t *glyphs;
int glyphCount;
cairo_text_cluster_t *clusters;
int clusterCount;
char *utf8;
int utf8Count;
int utf8Max;
cairo_path_t *textClipPath;
GBool inType3Char; // inside a Type 3 CharProc
double t3_glyph_wx, t3_glyph_wy;
GBool t3_glyph_has_bbox;
double t3_glyph_bbox[4];
GBool prescaleImages;
TextPage *text; // text for the current page
ActualText *actualText;
cairo_pattern_t *group;
cairo_pattern_t *shape;
cairo_pattern_t *mask;
cairo_matrix_t mask_matrix;
cairo_surface_t *cairo_shape_surface;
cairo_t *cairo_shape;
int knockoutCount;
struct ColorSpaceStack {
GBool knockout;
GfxColorSpace *cs;
cairo_matrix_t group_matrix;
struct ColorSpaceStack *next;
} * groupColorSpaceStack;
struct MaskStack {
cairo_pattern_t *mask;
cairo_matrix_t mask_matrix;
struct MaskStack *next;
} *maskStack;
};
//------------------------------------------------------------------------
// CairoImageOutputDev
//------------------------------------------------------------------------
//XXX: this should ideally not inherit from CairoOutputDev but use it instead perhaps
class CairoImageOutputDev: public CairoOutputDev {
public:
// Constructor.
CairoImageOutputDev();
// Destructor.
virtual ~CairoImageOutputDev();
//----- get info about output device
// Does this device use upside-down coordinates?
// (Upside-down means (0,0) is the top left corner of the page.)
virtual GBool upsideDown() { return gTrue; }
// Does this device use drawChar() or drawString()?
virtual GBool useDrawChar() { return gFalse; }
// Does this device use tilingPatternFill()? If this returns false,
// tiling pattern fills will be reduced to a series of other drawing
// operations.
virtual GBool useTilingPatternFill() { return gTrue; }
// Does this device use functionShadedFill(), axialShadedFill(), and
// radialShadedFill()? If this returns false, these shaded fills
// will be reduced to a series of other drawing operations.
#if CAIRO_VERSION >= CAIRO_VERSION_ENCODE(1, 11, 2)
virtual GBool useShadedFills(int type) { return type <= 7; }
#else
virtual GBool useShadedFills(int type) { return type < 4; }
#endif
// Does this device use FillColorStop()?
virtual GBool useFillColorStop() { return gFalse; }
// Does this device use beginType3Char/endType3Char? Otherwise,
// text in Type 3 fonts will be drawn with drawChar/drawString.
virtual GBool interpretType3Chars() { return gFalse; }
// Does this device need non-text content?
virtual GBool needNonText() { return gTrue; }
//----- save/restore graphics state
virtual void saveState(GfxState *state) { }
virtual void restoreState(GfxState *state) { }
//----- update graphics state
virtual void updateAll(GfxState *state) { }
virtual void setDefaultCTM(double *ctm) { }
virtual void updateCTM(GfxState *state, double m11, double m12,
double m21, double m22, double m31, double m32) { }
virtual void updateLineDash(GfxState *state) { }
virtual void updateFlatness(GfxState *state) { }
virtual void updateLineJoin(GfxState *state) { }
virtual void updateLineCap(GfxState *state) { }
virtual void updateMiterLimit(GfxState *state) { }
virtual void updateLineWidth(GfxState *state) { }
virtual void updateFillColor(GfxState *state) { }
virtual void updateStrokeColor(GfxState *state) { }
virtual void updateFillOpacity(GfxState *state) { }
virtual void updateStrokeOpacity(GfxState *state) { }
virtual void updateBlendMode(GfxState *state) { }
//----- update text state
virtual void updateFont(GfxState *state) { }
//----- path painting
virtual void stroke(GfxState *state) { }
virtual void fill(GfxState *state) { }
virtual void eoFill(GfxState *state) { }
virtual void clipToStrokePath(GfxState *state) { }
virtual GBool tilingPatternFill(GfxState *state, Gfx *gfx, Catalog *cat, Object *str,
double *pmat, int paintType, int tilingType, Dict *resDict,
double *mat, double *bbox,
int x0, int y0, int x1, int y1,
double xStep, double yStep) { return gTrue; }
virtual GBool axialShadedFill(GfxState *state,
GfxAxialShading *shading,
double tMin, double tMax) { return gTrue; }
virtual GBool radialShadedFill(GfxState *state,
GfxRadialShading *shading,
double sMin, double sMax) { return gTrue; }
//----- path clipping
virtual void clip(GfxState *state) { }
virtual void eoClip(GfxState *state) { }
//----- image drawing
virtual void drawImageMask(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert,
GBool interpolate, GBool inlineImg);
virtual void drawImage(GfxState *state, Object *ref, Stream *str,
int width, int height, GfxImageColorMap *colorMap,
GBool interpolate, int *maskColors, GBool inlineImg);
virtual void drawSoftMaskedImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap,
GBool interpolate,
Stream *maskStr,
int maskWidth, int maskHeight,
GfxImageColorMap *maskColorMap,
GBool maskInterpolate);
virtual void drawMaskedImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap,
GBool interpolate,
Stream *maskStr,
int maskWidth, int maskHeight,
GBool maskInvert, GBool maskInterpolate);
virtual void setSoftMaskFromImageMask(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert,
GBool inlineImg, double *baseMatrix);
virtual void unsetSoftMaskFromImageMask(GfxState *state, double *baseMatrix) {}
//----- transparency groups and soft masks
virtual void beginTransparencyGroup(GfxState * /*state*/, double * /*bbox*/,
GfxColorSpace * /*blendingColorSpace*/,
GBool /*isolated*/, GBool /*knockout*/,
GBool /*forSoftMask*/) {}
virtual void endTransparencyGroup(GfxState * /*state*/) {}
virtual void paintTransparencyGroup(GfxState * /*state*/, double * /*bbox*/) {}
virtual void setSoftMask(GfxState * /*state*/, double * /*bbox*/, GBool /*alpha*/,
Function * /*transferFunc*/, GfxColor * /*backdropColor*/) {}
virtual void clearSoftMask(GfxState * /*state*/) {}
//----- Image list
// By default images are not rendred
void setImageDrawDecideCbk(GBool (*cbk)(int img_id, void *data),
void *data) { imgDrawCbk = cbk; imgDrawCbkData = data; }
// Iterate through list of images.
int getNumImages() const { return numImages; }
CairoImage *getImage(int i) const { return images[i]; }
private:
void saveImage(CairoImage *image);
CairoImage **images;
int numImages;
int size;
GBool (*imgDrawCbk)(int img_id, void *data);
void *imgDrawCbkData;
};
#endif

View File

@ -1,377 +0,0 @@
/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
/*
* Copyright © 2009 Mozilla Corporation
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Mozilla Corporation not be used in
* advertising or publicity pertaining to distribution of the software without
* specific, written prior permission. Mozilla Corporation makes no
* representations about the suitability of this software for any purpose. It
* is provided "as is" without express or implied warranty.
*
* MOZILLA CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT
* SHALL MOZILLA CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
* DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THIS SOFTWARE.
*
* Author: Jeff Muizelaar, Mozilla Corp.
*/
//========================================================================
//
// Modified under the Poppler project - http://poppler.freedesktop.org
//
// All changes made under the Poppler project to this file are licensed
// under GPL version 2 or later
//
// Copyright (C) 2012 Hib Eris <hib@hiberis.nl>
// Copyright (C) 2012 Adrian Johnson <ajohnson@redneon.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================
/* This implements a box filter that supports non-integer box sizes */
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <math.h>
#include "goo/gmem.h"
#include "CairoRescaleBox.h"
/* we work in fixed point where 1. == 1 << 24 */
#define FIXED_SHIFT 24
static void downsample_row_box_filter (
int start, int width,
uint32_t *src, uint32_t *dest,
int coverage[], int pixel_coverage)
{
/* we need an array of the pixel contribution of each destination pixel on the boundaries.
* we invert the value to get the value on the other size of the box */
/*
value = a * contribution * 1/box_size
value += a * 1/box_size
value += a * 1/box_size
value += a * 1/box_size
value += a * (1 - contribution) * 1/box_size
a * (1/box_size - contribution * 1/box_size)
box size is constant
value = a * contribtion_a * 1/box_size + b * contribution_b * 1/box_size
contribution_b = (1 - contribution_a)
= (1 - contribution_a_next)
*/
/* box size = ceil(src_width/dest_width) */
int x = 0;
/* skip to start */
/* XXX: it might be possible to do this directly instead of iteratively, however
* the iterative solution is simple */
while (x < start)
{
int box = 1 << FIXED_SHIFT;
int start_coverage = coverage[x];
box -= start_coverage;
src++;
while (box >= pixel_coverage)
{
src++;
box -= pixel_coverage;
}
x++;
}
while (x < start + width)
{
uint32_t a = 0;
uint32_t r = 0;
uint32_t g = 0;
uint32_t b = 0;
int box = 1 << FIXED_SHIFT;
int start_coverage = coverage[x];
a = ((*src >> 24) & 0xff) * start_coverage;
r = ((*src >> 16) & 0xff) * start_coverage;
g = ((*src >> 8) & 0xff) * start_coverage;
b = ((*src >> 0) & 0xff) * start_coverage;
src++;
x++;
box -= start_coverage;
while (box >= pixel_coverage)
{
a += ((*src >> 24) & 0xff) * pixel_coverage;
r += ((*src >> 16) & 0xff) * pixel_coverage;
g += ((*src >> 8) & 0xff) * pixel_coverage;
b += ((*src >> 0) & 0xff) * pixel_coverage;
src++;
box -= pixel_coverage;
}
/* multiply by whatever is leftover
* this ensures that we don't bias down.
* i.e. start_coverage + n*pixel_coverage + box == 1 << 24 */
if (box > 0)
{
a += ((*src >> 24) & 0xff) * box;
r += ((*src >> 16) & 0xff) * box;
g += ((*src >> 8) & 0xff) * box;
b += ((*src >> 0) & 0xff) * box;
}
a >>= FIXED_SHIFT;
r >>= FIXED_SHIFT;
g >>= FIXED_SHIFT;
b >>= FIXED_SHIFT;
*dest = (a << 24) | (r << 16) | (g << 8) | b;
dest++;
}
}
static void downsample_columns_box_filter (
int n,
int start_coverage,
int pixel_coverage,
uint32_t *src, uint32_t *dest)
{
int stride = n;
while (n--) {
uint32_t a = 0;
uint32_t r = 0;
uint32_t g = 0;
uint32_t b = 0;
uint32_t *column_src = src;
int box = 1 << FIXED_SHIFT;
a = ((*column_src >> 24) & 0xff) * start_coverage;
r = ((*column_src >> 16) & 0xff) * start_coverage;
g = ((*column_src >> 8) & 0xff) * start_coverage;
b = ((*column_src >> 0) & 0xff) * start_coverage;
column_src += stride;
box -= start_coverage;
while (box >= pixel_coverage)
{
a += ((*column_src >> 24) & 0xff) * pixel_coverage;
r += ((*column_src >> 16) & 0xff) * pixel_coverage;
g += ((*column_src >> 8) & 0xff) * pixel_coverage;
b += ((*column_src >> 0) & 0xff) * pixel_coverage;
column_src += stride;
box -= pixel_coverage;
}
if (box > 0) {
a += ((*column_src >> 24) & 0xff) * box;
r += ((*column_src >> 16) & 0xff) * box;
g += ((*column_src >> 8) & 0xff) * box;
b += ((*column_src >> 0) & 0xff) * box;
}
a >>= FIXED_SHIFT;
r >>= FIXED_SHIFT;
g >>= FIXED_SHIFT;
b >>= FIXED_SHIFT;
*dest = (a << 24) | (r << 16) | (g << 8) | b;
dest++;
src++;
}
}
static int compute_coverage (int coverage[], int src_length, int dest_length)
{
int i;
/* num = src_length/dest_length
total = sum(pixel) / num
pixel * 1/num == pixel * dest_length / src_length
*/
/* the average contribution of each source pixel */
int ratio = ((1 << 24)*(long long int)dest_length)/src_length;
/* because ((1 << 24)*(long long int)dest_length) won't always be divisible by src_length
* we'll need someplace to put the other bits.
*
* We want to ensure a + n*ratio < 1<<24
*
* 1<<24
* */
double scale = (double)src_length/dest_length;
/* for each destination pixel compute the coverage of the left most pixel included in the box */
/* I have a proof of this, which this margin is too narrow to contain */
for (i=0; i<dest_length; i++)
{
float left_side = i*scale;
float right_side = (i+1)*scale;
float right_fract = right_side - floor (right_side);
float left_fract = ceil (left_side) - left_side;
int overage;
/* find out how many source pixels will be used to fill the box */
int count = floor (right_side) - ceil (left_side);
/* what's the maximum value this expression can become?
floor((i+1)*scale) - ceil(i*scale)
(i+1)*scale - i*scale == scale
since floor((i+1)*scale) <= (i+1)*scale
and ceil(i*scale) >= i*scale
floor((i+1)*scale) - ceil(i*scale) <= scale
further since: floor((i+1)*scale) - ceil(i*scale) is an integer
therefore:
floor((i+1)*scale) - ceil(i*scale) <= floor(scale)
*/
if (left_fract == 0.)
count--;
/* compute how much the right-most pixel contributes */
overage = ratio*(right_fract);
/* the remainder is the the amount that the left-most pixel
* contributes */
coverage[i] = (1<<24) - (count * ratio + overage);
}
return ratio;
}
GBool CairoRescaleBox::downScaleImage(unsigned orig_width, unsigned orig_height,
signed scaled_width, signed scaled_height,
unsigned short int start_column, unsigned short int start_row,
unsigned short int width, unsigned short int height,
cairo_surface_t *dest_surface) {
int pixel_coverage_x, pixel_coverage_y;
int dest_y;
int src_y = 0;
uint32_t *scanline;
int *x_coverage = NULL;
int *y_coverage = NULL;
uint32_t *temp_buf = NULL;
GBool retval = gFalse;
unsigned int *dest;
int dst_stride;
dest = (unsigned int *)cairo_image_surface_get_data (dest_surface);
dst_stride = cairo_image_surface_get_stride (dest_surface);
scanline = (uint32_t*)gmallocn3 (orig_width, 1, sizeof(int));
x_coverage = (int *)gmallocn3 (orig_width, 1, sizeof(int));
y_coverage = (int *)gmallocn3 (orig_height, 1, sizeof(int));
/* we need to allocate enough room for ceil(src_height/dest_height)+1
Example:
src_height = 140
dest_height = 50
src_height/dest_height = 2.8
|-------------| 2.8 pixels
|----|----|----|----| 4 pixels
need to sample 3 pixels
|-------------| 2.8 pixels
|----|----|----|----| 4 pixels
need to sample 4 pixels
*/
temp_buf = (uint32_t *)gmallocn3 ((orig_height + scaled_height-1)/scaled_height+1, scaled_width, sizeof(uint32_t));
if (!x_coverage || !y_coverage || !scanline || !temp_buf)
goto cleanup;
pixel_coverage_x = compute_coverage (x_coverage, orig_width, scaled_width);
pixel_coverage_y = compute_coverage (y_coverage, orig_height, scaled_height);
assert (width + start_column <= scaled_width);
/* skip the rows at the beginning */
for (dest_y = 0; dest_y < start_row; dest_y++)
{
int box = 1 << FIXED_SHIFT;
int start_coverage_y = y_coverage[dest_y];
box -= start_coverage_y;
src_y++;
while (box >= pixel_coverage_y)
{
box -= pixel_coverage_y;
src_y++;
}
}
for (; dest_y < start_row + height; dest_y++)
{
int columns = 0;
int box = 1 << FIXED_SHIFT;
int start_coverage_y = y_coverage[dest_y];
getRow(src_y, scanline);
downsample_row_box_filter (start_column, width, scanline, temp_buf + width * columns, x_coverage, pixel_coverage_x);
columns++;
src_y++;
box -= start_coverage_y;
while (box >= pixel_coverage_y)
{
getRow(src_y, scanline);
downsample_row_box_filter (start_column, width, scanline, temp_buf + width * columns, x_coverage, pixel_coverage_x);
columns++;
src_y++;
box -= pixel_coverage_y;
}
/* downsample any leftovers */
if (box > 0)
{
getRow(src_y, scanline);
downsample_row_box_filter (start_column, width, scanline, temp_buf + width * columns, x_coverage, pixel_coverage_x);
columns++;
}
/* now scale the rows we just downsampled in the y direction */
downsample_columns_box_filter (width, start_coverage_y, pixel_coverage_y, temp_buf, dest);
dest += dst_stride / 4;
// assert(width*columns <= ((orig_height + scaled_height-1)/scaled_height+1) * width);
}
// assert (src_y<=orig_height);
retval = gTrue;
cleanup:
free (x_coverage);
free (y_coverage);
free (temp_buf);
free (scanline);
return retval;
}

View File

@ -1,61 +0,0 @@
/*
* Copyright © 2009 Mozilla Corporation
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Mozilla Corporation not be used in
* advertising or publicity pertaining to distribution of the software without
* specific, written prior permission. Mozilla Corporation makes no
* representations about the suitability of this software for any purpose. It
* is provided "as is" without express or implied warranty.
*
* MOZILLA CORPORATION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT
* SHALL MOZILLA CORPORATION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
* CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
* DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
* TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
* OF THIS SOFTWARE.
*
* Author: Jeff Muizelaar, Mozilla Corp.
*/
//========================================================================
//
// Modified under the Poppler project - http://poppler.freedesktop.org
//
// All changes made under the Poppler project to this file are licensed
// under GPL version 2 or later
//
// Copyright (C) 2012 Adrian Johnson <ajohnson@redneon.com>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================
#ifndef CAIRO_RESCALE_BOX_H
#define CAIRO_RESCALE_BOX_H
#include "goo/gtypes.h"
#include <cairo.h>
class CairoRescaleBox {
public:
CairoRescaleBox() {};
virtual ~CairoRescaleBox() {};
virtual GBool downScaleImage(unsigned orig_width, unsigned orig_height,
signed scaled_width, signed scaled_height,
unsigned short int start_column, unsigned short int start_row,
unsigned short int width, unsigned short int height,
cairo_surface_t *dest_surface);
virtual void getRow(int row_num, uint32_t *row_data) = 0;
};
#endif /* CAIRO_RESCALE_BOX_H */

View File

@ -18,6 +18,7 @@ Marc Sanfacon <marc.sanfacon@gmail.com>
Michele Redolfi <michele@tecnicaict.com>
Mick Giles <mick@mickgiles.com>
Ryan Morlok <ryan.morlok@morlok.com>
Simon Chenard <chenard.simon@gmail.com>
Wanmin Liu <wanminliu@gmail.com>
Packagers:

View File

@ -5,11 +5,11 @@ set(CMAKE_BUILD_TYPE Release CACHE STRING "Build configuration (Debug, Release,
project(pdf2htmlEX)
cmake_minimum_required(VERSION 2.6.0 FATAL_ERROR)
option(ENABLE_SVG "Enable SVG support, for generating SVG background images and converting Type 3 fonts" OFF)
option(ENABLE_SVG "Enable SVG support, for generating SVG background images and converting Type 3 fonts" ON)
include_directories(${CMAKE_SOURCE_DIR}/src)
set(PDF2HTMLEX_VERSION "0.12")
set(PDF2HTMLEX_VERSION "0.13.6")
set(ARCHIVE_NAME pdf2htmlex-${PDF2HTMLEX_VERSION})
add_custom_target(dist
COMMAND git archive --prefix=${ARCHIVE_NAME}/ HEAD
@ -19,21 +19,7 @@ add_custom_target(dist
find_package(PkgConfig)
pkg_check_modules(POPPLER poppler>=0.25.0)
if(POPPLER_FOUND)
set(POPPLER_OLDER_THAN_0_25_0 0)
set(POPPLER_OLDER_THAN_0_23_0 0)
else()
set(POPPLER_OLDER_THAN_0_25_0 1)
pkg_check_modules(POPPLER poppler>=0.23.0)
if(POPPLER_FOUND)
set(POPPLER_OLDER_THAN_0_23_0 0)
else()
set(POPPLER_OLDER_THAN_0_23_0 1)
pkg_check_modules(POPPLER REQUIRED poppler>=0.20.0)
endif()
endif()
pkg_check_modules(POPPLER REQUIRED poppler>=0.25.0)
include_directories(${POPPLER_INCLUDE_DIRS})
link_directories(${POPPLER_LIBRARY_DIRS})
set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${POPPLER_LIBRARIES})
@ -47,11 +33,7 @@ if(ENABLE_SVG)
link_directories(${CAIRO_LIBRARY_DIRS})
set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${CAIRO_LIBRARIES})
set(ENABLE_SVG 1)
if(POPPLER_OLDER_THAN_0_23_0)
set(CAIROOUTPUTDEV_PATH 3rdparty/poppler/0.22.5)
else()
set(CAIROOUTPUTDEV_PATH 3rdparty/poppler/git)
endif()
set(CAIROOUTPUTDEV_PATH 3rdparty/poppler/git)
include_directories(${CAIROOUTPUTDEV_PATH})
set(PDF2HTMLEX_SRC ${PDF2HTMLEX_SRC}
${CAIROOUTPUTDEV_PATH}/CairoFontEngine.h
@ -102,10 +84,20 @@ else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
endif()
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("${CMAKE_CXX_FLAGS}" CXX0X_SUPPORT)
# check the C++11 features we need
include(CheckCXXSourceCompiles)
check_cxx_source_compiles("
#include <vector>
int main()
{
char * ptr = nullptr;
std::vector<int> v;
auto f = [&](){ for(auto & i : v) ++i; };
f();
}
" CXX0X_SUPPORT)
if(NOT CXX0X_SUPPORT)
message(FATAL_ERROR "Error: your compiler does not support C++0x, please update it.")
message(FATAL_ERROR "Error: your compiler does not support C++0x/C++11, please update it.")
endif()
@ -127,6 +119,7 @@ set(PDF2HTMLEX_SRC ${PDF2HTMLEX_SRC}
src/HTMLRenderer/general.cc
src/HTMLRenderer/image.cc
src/HTMLRenderer/font.cc
src/HTMLRenderer/form.cc
src/HTMLRenderer/link.cc
src/HTMLRenderer/outline.cc
src/HTMLRenderer/state.cc
@ -221,5 +214,4 @@ install (FILES ${PDF2HTMLEX_RESOURCE} DESTINATION share/pdf2htmlEX)
install (FILES pdf2htmlEX.1 DESTINATION share/man/man1)
enable_testing()
add_test(test_naming
python ${CMAKE_SOURCE_DIR}/test/test_naming.py)
add_test(test python ${CMAKE_SOURCE_DIR}/test/test.py)

View File

@ -1,3 +1,10 @@
Developing
* Do not support Poppler < 0.25.0 any more
* ENABLE_SVG is enabled by default
* Improved DrawingTracer
* Workarounds for chrome/webkit
v0.12
2014.07.24

View File

@ -24,7 +24,7 @@ Learn more about [who](https://github.com/coolwanglu/pdf2htmlEX/wiki/Use-Cases)
* Native HTML text with precise font and location.
* Flexible output: all-in-one HTML or on demand page loading (needs JavaScript).
* Moderate file size, sometimes even smaller than PDF.
* Support for links, outlines (bookmarks), printing, SVG background, Type 3 fonts and [more...](https://github.com/coolwanglu/pdf2htmlEX/wiki/Feature-List)
* Supporting links, outlines (bookmarks), printing, SVG background, Type 3 fonts and [more...](https://github.com/coolwanglu/pdf2htmlEX/wiki/Feature-List)
[Compare to others](https://github.com/coolwanglu/pdf2htmlEX/wiki/Comparison)
@ -52,7 +52,7 @@ Chat with the main author: 王璐 (Lu Wang)
* <coolwanglu@gmail.com>
* [@coolwanglu](https://twitter.com/coolwanglu)
* :bangbang:Questions about pdf2htmlEX? Use the mailling list instead.:bangbang:
* Accepting messages in :cn::us::gb::jp:.
* Accepting messages in :cn::us::gb::jp: (languages).
Want to help without coding? Thank you!
* [:moneybag:Make a donation](http://coolwanglu.github.io/pdf2htmlEX/donate.html)

3
TODO
View File

@ -1,3 +1,5 @@
Redesign textline
save width in Textline, create new <div> in the middle
link in outline: dest-detail vs hashtag
@ -16,7 +18,6 @@ more information on demo page:
- browser requirements
pdf:miui
tmp dir: use pid
view hash
- dots issue

4
debian/control vendored
View File

@ -6,11 +6,11 @@ Build-Depends: cmake (>= 2.6.0),
debhelper (>= 8),
default-jre-headless (>= 1.6),
libcairo2-dev,
libfontforge-dev,
libfontforge-dev (>= 20140000),
libjpeg-dev,
libpango1.0-dev,
libpng12-dev,
libpoppler-dev (>= 0.20.3),
libpoppler-dev (>= 0.25.0),
libspiro-dev,
pkg-config,
python-dev

View File

@ -17,7 +17,7 @@ pdf2htmlEX is a utility that converts PDF files to HTML files.
pdf2htmlEX tries its best to render the PDF precisely, maintain proper styling, while retaining text and optimizing for Web.
Fonts are extracted form PDF and then embedded into HTML (Type 3 fonts are not supported). Text in the converted HTML file is usually selectable and copyable.
Fonts are extracted form PDF and then embedded into HTML, text in the converted HTML file is usually selectable and copyable.
Other objects are rendered as images and also embedded.
@ -138,6 +138,10 @@ Whether to show outline in the generated HTML
.B \-\-process-annotation <0|1> (Default: 0)
Whether to show annotation in the generated HTML
.TP
.B \-\-process-form <0|1> (Default: 0)
Whether to include text fields and radio buttons in the generated HTML
.TP
.B \-\-printing <0|1> (Default: 1)
Enable printing support. Disabling this option may reduce the size of CSS.
@ -261,7 +265,11 @@ This option is only useful when '\-\-bg\-format svg' is specified. Note that nod
.TP
.B \-\-svg\-embed\-bitmap <0|1> (Default: 1)
Whether embed bitmaps in svg background image. 1: embed bitmaps in svg background; 0: dump bitmaps to external files if possible.
JPEG images in a PDF are most possibly dumped. This option is only useful when '\-\-bg\-format svg' is specified.
This option is only useful when '\-\-bg\-format svg' is specified and '\-\-embed\-image' is off.
Currently, RGB or Gray JPEG bitmaps in a PDF can be dumped, while those in other formats or colorspaces are still embedded.
If bitmaps are not dumped as expected, try pre-processing your PDF by ghostscript or acrobat and make sure bitmaps in it are converted to RGB/Gray JPEG format. See the project wiki for more details.
.SS PDF Protection

View File

@ -157,19 +157,23 @@
unicode-bidi:bidi-override;/* For rtl languages, e.g. Hebrew, we don't want the default Unicode behaviour */
-moz-font-feature-settings:"liga" 0;/* We don't want Firefox to recognize ligatures */
}
.@CSS_LINE_CN@:after {
/* Workaround for https://bugs.webkit.org/show_bug.cgi?id=35443 */
.@CSS_LINE_CN@:after { /* webkit #35443 */
content: '';
}
.@CSS_LINE_CN@:before { /* Workaround Blink(up to 41)/Webkit bug of word-spacing with leading spaces (chromium #404444 and pdf2htmlEX #412) */
content: '';
display: inline-block;
}
.@CSS_LINE_CN@ span { /* text blocks within a line */
/* Blink(up to 41)/Webkit have bug with negative word-spacing and inline-block (pdf2htmlEX #416), so keep normal span inline. */
position:relative;
/* _<id> for spaces may need display:inline, which will override this */
display:inline-block;
unicode-bidi:bidi-override; /* For rtl languages, e.g. Hebrew, we don't want the default Unicode behaviour */
}
.@CSS_WHITESPACE_CN@ { /* text shift */
color:transparent;
z-index:-1;
/* Blink(up to 41)/Webkit have bug with inline element, continuous spaces and word-spacing. Workaround by inline-block. */
display: inline-block;
color: transparent;
z-index: -1;
}
/* selection background should not be opaque, for fallback mode */
::selection{
@ -190,4 +194,14 @@
-ms-transform-origin:0% 100%;
-webkit-transform-origin:0% 100%;
}
/* for the forms */
.@CSS_INPUT_TEXT_CN@ {
border: none;
background-color: rgba(255, 255, 255, 0.0);
}
.@CSS_INPUT_RADIO_CN@:hover {
cursor: pointer;
}
/* Base CSS END */

View File

@ -81,5 +81,8 @@
-webkit-animation: swing 1.5s ease-in-out 0.01s infinite alternate none;
animation: swing 1.5s ease-in-out 0.01s infinite alternate none;
}
.@CSS_RADIO_CHECKED_CN@ {
background: no-repeat url();
}
}
/* Fancy CSS END */

View File

@ -11,6 +11,7 @@
#
# Special
# If a line contains """ only, all text until next """ will be included
# #TEST_IGNORE_BEGIN & #TEST_IGNORE_END are used for unittest
#############
# Declaration - Do not modify
@ -28,8 +29,10 @@
# Styles
# base CSS styles - Do not modify
@base.min.css
# fancy CSS styles - Optional
@fancy.min.css
# PDF specific CSS styles - Do not modify
$css
@ -39,7 +42,9 @@ $css
# compatibility.min.js, extracted from PDF.js
# To support old browsers like IE9
#TEST_IGNORE_BEGIN
@compatibility.min.js
#TEST_IGNORE_END
# entry point of pdf2htmlEX.Viewer
# You can override default configuration by passing an object to the constructor of Viewer
@ -48,6 +53,7 @@ $css
# pdf2htmlEX.defaultViewer = new pdf2htmlEX.Viewer({
# 'key_handler' : false
# });
#TEST_IGNORE_BEGIN
@pdf2htmlEX.min.js
"""
<script>
@ -56,6 +62,8 @@ try{
}catch(e){}
</script>
"""
#TEST_IGNORE_END
#############
# Do not modify
@ -71,6 +79,8 @@ try{
# You can add a class 'opened' here if you want it always opened or you don't use pdf2htmlEX.js
# e.g.
# <div id="sidebar" class="opened">
#TEST_IGNORE_BEGIN
"""
<div id="sidebar">
"""
@ -83,6 +93,7 @@ $outline
</div>
</div>
"""
#TEST_IGNORE_END
#############
# The container of PDF pages
@ -100,6 +111,8 @@ $pages
# shown when loading a page via ajax
# The default appearance should be invisible
# The 'active' class will be added when it is used
#TEST_IGNORE_BEGIN
"""
<div class="loading-indicator">
"""
@ -107,6 +120,7 @@ $pages
"""
</div>
"""
#TEST_IGNORE_END
#############
# Do not modify

View File

@ -260,6 +260,18 @@ Viewer.prototype = {
this.pre_hide_pages();
},
initialize_radio_button : function() {
var elements = document.getElementsByClassName('ir');
for(var i = 0; i < elements.length; i++) {
var r = elements[i];
r.addEventListener('click', function() {
this.classList.toggle("checked");
});
}
},
init_after_loading_content : function() {
this.sidebar = document.getElementById(this.config['sidebar_id']);
this.outline = document.getElementById(this.config['outline_id']);
@ -312,6 +324,7 @@ Viewer.prototype = {
ele.addEventListener('click', self.link_handler.bind(self), false);
});
this.initialize_radio_button();
this.render();
},

View File

@ -143,9 +143,9 @@ void ArgParser::parse(int argc, char ** argv) const
void ArgParser::show_usage(ostream & out) const
{
for(auto iter = arg_entries.begin(); iter != arg_entries.end(); ++iter)
for(auto & entry : arg_entries)
{
(*iter)->show_usage(out);
entry->show_usage(out);
}
}

View File

@ -18,34 +18,34 @@
namespace pdf2htmlEX {
BackgroundRenderer * BackgroundRenderer::getBackgroundRenderer(const std::string & format, HTMLRenderer * html_renderer, const Param & param)
std::unique_ptr<BackgroundRenderer> BackgroundRenderer::getBackgroundRenderer(const std::string & format, HTMLRenderer * html_renderer, const Param & param)
{
#ifdef ENABLE_LIBPNG
if(format == "png")
{
return new SplashBackgroundRenderer(format, html_renderer, param);
return std::unique_ptr<BackgroundRenderer>(new SplashBackgroundRenderer(format, html_renderer, param));
}
#endif
#ifdef ENABLE_LIBJPEG
if(format == "jpg")
{
return new SplashBackgroundRenderer(format, html_renderer, param);
return std::unique_ptr<BackgroundRenderer>(new SplashBackgroundRenderer(format, html_renderer, param));
}
#endif
#if ENABLE_SVG
if (format == "svg")
{
return new CairoBackgroundRenderer(html_renderer, param);
return std::unique_ptr<BackgroundRenderer>(new CairoBackgroundRenderer(html_renderer, param));
}
#endif
return nullptr;
}
BackgroundRenderer * BackgroundRenderer::getFallbackBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param)
std::unique_ptr<BackgroundRenderer> BackgroundRenderer::getFallbackBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param)
{
if (param.bg_format == "svg" && param.svg_node_count_limit >= 0)
return new SplashBackgroundRenderer("", html_renderer, param);
return std::unique_ptr<BackgroundRenderer>(new SplashBackgroundRenderer("", html_renderer, param));
return nullptr;
}

View File

@ -24,10 +24,10 @@ class BackgroundRenderer
{
public:
// return nullptr upon failure
static BackgroundRenderer * getBackgroundRenderer(const std::string & format, HTMLRenderer * html_renderer, const Param & param);
static std::unique_ptr<BackgroundRenderer> getBackgroundRenderer(const std::string & format, HTMLRenderer * html_renderer, const Param & param);
// Return a fallback bg renderer according to param.bg_format.
// Currently only svg bg format might need a bitmap fallback.
static BackgroundRenderer * getFallbackBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param);
static std::unique_ptr<BackgroundRenderer> getFallbackBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param);
BackgroundRenderer() {}
virtual ~BackgroundRenderer() {}

View File

@ -34,11 +34,11 @@ CairoBackgroundRenderer::CairoBackgroundRenderer(HTMLRenderer * html_renderer, c
CairoBackgroundRenderer::~CairoBackgroundRenderer()
{
for(auto itr = bitmaps_ref_count.begin(); itr != bitmaps_ref_count.end(); ++itr)
for(auto const& p : bitmaps_ref_count)
{
if (itr->second == 0)
if (p.second == 0)
{
html_renderer->tmp_files.add(this->build_bitmap_path(itr->first));
html_renderer->tmp_files.add(this->build_bitmap_path(p.first));
}
}
}
@ -53,10 +53,12 @@ void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y,
// - OR there is special filling method
// - OR using a writing mode font
// - OR using a Type 3 font while param.process_type3 is not enabled
// - OR the text is used as path
if((param.fallback || param.proof)
|| ( (state->getFont())
&& ( (state->getFont()->getWMode())
|| ((state->getFont()->getType() == fontType3) && (!param.process_type3))
|| (state->getRender() >= 4)
)
)
)
@ -184,8 +186,8 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
}
// the svg file is actually used, so add its bitmaps' ref count.
for (auto itr = bitmaps_in_current_page.begin(); itr != bitmaps_in_current_page.end(); itr++)
++bitmaps_ref_count[*itr];
for (auto id : bitmaps_in_current_page)
++bitmaps_ref_count[id];
return true;
}
@ -246,6 +248,34 @@ void CairoBackgroundRenderer::setMimeData(Stream *str, Object *ref, cairo_surfac
if (ref == nullptr || !ref->isRef())
return;
// We only dump rgb or gray jpeg without /Decode array.
//
// Although jpeg support CMYK, PDF readers do color conversion incompatibly with most other
// programs (including browsers): other programs invert CMYK color if 'Adobe' marker (app14) presents
// in a jpeg file; while PDF readers don't, they solely rely on /Decode array to invert color.
// It's a bit complicated to decide whether a CMYK jpeg is safe to dump, so we don't dump at all.
// See also:
// JPEG file embedded in PDF (CMYK) https://forums.adobe.com/thread/975777
// http://stackoverflow.com/questions/3123574/how-to-convert-from-cmyk-to-rgb-in-java-correctly
//
// In PDF, jpeg stream objects can also specify other color spaces like DeviceN and Separation,
// It is also not safe to dump them directly.
Object obj;
str->getDict()->lookup("ColorSpace", &obj);
if (!obj.isName() || (strcmp(obj.getName(), "DeviceRGB") && strcmp(obj.getName(), "DeviceGray")) )
{
obj.free();
return;
}
obj.free();
str->getDict()->lookup("Decode", &obj);
if (obj.isArray())
{
obj.free();
return;
}
obj.free();
int imgId = ref->getRef().num;
auto uri = strdup((char*) html_renderer->str_fmt("o%d.jpg", imgId));
auto st = cairo_surface_set_mime_data(image, CAIRO_MIME_TYPE_URI,

View File

@ -29,7 +29,7 @@ using std::unique_ptr;
const SplashColor SplashBackgroundRenderer::white = {255,255,255};
SplashBackgroundRenderer::SplashBackgroundRenderer(const string & imgFormat, HTMLRenderer * html_renderer, const Param & param)
: SplashOutputDev(splashModeRGB8, 4, gFalse, (SplashColorPtr)(&white), gTrue, gTrue)
: SplashOutputDev(splashModeRGB8, 4, gFalse, (SplashColorPtr)(&white), gTrue)
, html_renderer(html_renderer)
, param(param)
, format(imgFormat)
@ -56,15 +56,9 @@ SplashBackgroundRenderer::SplashBackgroundRenderer(const string & imgFormat, HTM
* And thus have modified region set to the whole page area
* We do not want that.
*/
#if POPPLER_OLDER_THAN_0_23_0
void SplashBackgroundRenderer::startPage(int pageNum, GfxState *state)
{
SplashOutputDev::startPage(pageNum, state);
#else
void SplashBackgroundRenderer::startPage(int pageNum, GfxState *state, XRef *xrefA)
{
SplashOutputDev::startPage(pageNum, state, xrefA);
#endif
clearModRegion();
}
@ -78,10 +72,12 @@ void SplashBackgroundRenderer::drawChar(GfxState *state, double x, double y,
// - OR there is special filling method
// - OR using a writing mode font
// - OR using a Type 3 font while param.process_type3 is not enabled
// - OR the text is used as path
if((param.fallback || param.proof)
|| ( (state->getFont())
&& ( (state->getFont()->getWMode())
|| ((state->getFont()->getType() == fontType3) && (!param.process_type3))
|| (state->getRender() >= 4)
)
)
)

View File

@ -39,11 +39,7 @@ public:
// text in Type 3 fonts will be drawn with drawChar/drawString.
virtual GBool interpretType3Chars() { return !param.process_type3; }
#if POPPLER_OLDER_THAN_0_23_0
virtual void startPage(int pageNum, GfxState *state);
#else
virtual void startPage(int pageNum, GfxState *state, XRef *xrefA);
#endif
virtual void drawChar(GfxState *state, double x, double y,
double dx, double dy,

View File

@ -10,13 +10,12 @@
#include "util/math.h"
#include "DrawingTracer.h"
//#define DT_DEBUG(x) (x)
#define DT_DEBUG(x)
#if !ENABLE_SVG
#warning "Cairo is disabled because ENABLE_SVG is off, --correct-text-visibility has limited functionality."
#endif
static constexpr bool DT_DEBUG = false;
namespace pdf2htmlEX
{
@ -39,9 +38,18 @@ void DrawingTracer::reset(GfxState *state)
finish();
#if ENABLE_SVG
cairo_rectangle_t page_box {0, 0, width:state->getPageWidth(), height:state->getPageHeight()};
// pbox is defined in device space, which is affected by zooming;
// We want to trace in page space which is stable, so invert pbox by ctm.
double pbox[] { 0, 0, state->getPageWidth(), state->getPageHeight() };
Matrix ctm, ictm;
state->getCTM(&ctm);
ctm.invertTo(&ictm);
tm_transform_bbox(ictm.m, pbox);
cairo_rectangle_t page_box { pbox[0], pbox[1], pbox[2] - pbox[0], pbox[3] - pbox[1] };
cairo_surface_t * surface = cairo_recording_surface_create(CAIRO_CONTENT_COLOR_ALPHA, &page_box);
cairo = cairo_create(surface);
if (DT_DEBUG)
printf("DrawingTracer::reset:page bbox:[%f,%f,%f,%f]\n",pbox[0], pbox[1], pbox[2], pbox[3]);
#endif
}
@ -73,6 +81,13 @@ void DrawingTracer::update_ctm(GfxState *state, double m11, double m12, double m
matrix.x0 = m31;
matrix.y0 = m32;
cairo_transform(cairo, &matrix);
if (DT_DEBUG)
{
cairo_matrix_t mat;
cairo_get_matrix(cairo, &mat);
printf("DrawingTracer::update_ctm:ctm:[%f,%f,%f,%f,%f,%f]\n", mat.xx, mat.yx, mat.xy, mat.yy, mat.x0, mat.y0);
}
#endif
}
@ -84,6 +99,13 @@ void DrawingTracer::clip(GfxState * state, bool even_odd)
do_path(state, state->getPath());
cairo_set_fill_rule(cairo, even_odd? CAIRO_FILL_RULE_EVEN_ODD : CAIRO_FILL_RULE_WINDING);
cairo_clip (cairo);
if (DT_DEBUG)
{
double cbox[4];
cairo_clip_extents(cairo, cbox, cbox + 1, cbox + 2, cbox + 3);
printf("DrawingTracer::clip:extents:[%f,%f,%f,%f]\n", cbox[0],cbox[1],cbox[2],cbox[3]);
}
#endif
}
@ -100,6 +122,8 @@ void DrawingTracer::save()
return;
#if ENABLE_SVG
cairo_save(cairo);
if (DT_DEBUG)
printf("DrawingTracer::save\n");
#endif
}
void DrawingTracer::restore()
@ -108,6 +132,8 @@ void DrawingTracer::restore()
return;
#if ENABLE_SVG
cairo_restore(cairo);
if (DT_DEBUG)
printf("DrawingTracer::restore\n");
#endif
}
@ -119,12 +145,16 @@ void DrawingTracer::do_path(GfxState * state, GfxPath * path)
int i, j;
double x, y;
cairo_new_path(cairo);
if (DT_DEBUG)
printf("DrawingTracer::do_path:new_path\n");
for (i = 0; i < path->getNumSubpaths(); ++i) {
subpath = path->getSubpath(i);
if (subpath->getNumPoints() > 0) {
x = subpath->getX(0);
y = subpath->getY(0);
cairo_move_to(cairo, x, y);
if (DT_DEBUG)
printf("DrawingTracer::do_path:move_to[%f,%f]\n",x,y);
j = 1;
while (j < subpath->getNumPoints()) {
if (subpath->getCurve(j)) {
@ -134,16 +164,22 @@ void DrawingTracer::do_path(GfxState * state, GfxPath * path)
subpath->getX(j), subpath->getY(j),
subpath->getX(j+1), subpath->getY(j+1),
x, y);
if (DT_DEBUG)
printf("DrawingTracer::do_path:curve_to[%f,%f]\n",x,y);
j += 3;
} else {
x = subpath->getX(j);
y = subpath->getY(j);
cairo_line_to(cairo, x, y);
if (DT_DEBUG)
printf("DrawingTracer::do_path:line_to[%f,%f]\n",x,y);
++j;
}
}
if (subpath->isClosed()) {
cairo_close_path (cairo);
if (DT_DEBUG)
printf("DrawingTracer::do_path:close\n");
}
}
}
@ -156,7 +192,8 @@ void DrawingTracer::stroke(GfxState * state)
if (!param.correct_text_visibility)
return;
DT_DEBUG(printf("DrawingTracer::stroke\n"));
if (DT_DEBUG)
printf("DrawingTracer::stroke\n");
cairo_set_line_width(cairo, state->getLineWidth());
@ -197,13 +234,14 @@ void DrawingTracer::stroke(GfxState * state)
++p;
}
DT_DEBUG(printf("DrawingTracer::stroke:new box:\n"));
if (DT_DEBUG)
printf("DrawingTracer::stroke:new box:\n");
double sbox[4];
cairo_stroke_extents(cairo, sbox, sbox + 1, sbox + 2, sbox + 3);
if (sbox[0] != sbox[2] && sbox[1] != sbox[3])
draw_non_char_bbox(state, sbox);
else
DT_DEBUG(printf("DrawingTracer::stroke:zero box!\n"));
else if (DT_DEBUG)
printf("DrawingTracer::stroke:zero box!\n");
if (p == n)
{
@ -243,7 +281,8 @@ void DrawingTracer::draw_non_char_bbox(GfxState * state, double * bbox)
#endif
{
transform_bbox_by_ctm(bbox, state);
DT_DEBUG(printf("DrawingTracer::draw_non_char_bbox:[%f,%f,%f,%f]\n", bbox[0],bbox[1],bbox[2],bbox[3]));
if (DT_DEBUG)
printf("DrawingTracer::draw_non_char_bbox:[%f,%f,%f,%f]\n", bbox[0],bbox[1],bbox[2],bbox[3]);
if (on_non_char_drawn)
on_non_char_drawn(bbox);
}
@ -296,7 +335,8 @@ void DrawingTracer::draw_char_bbox(GfxState * state, double * bbox)
if (on_char_drawn)
on_char_drawn(bbox);
#endif
DT_DEBUG(printf("DrawingTracer::draw_char_bbox:[%f,%f,%f,%f]\n",bbox[0],bbox[1],bbox[2],bbox[3]));
if (DT_DEBUG)
printf("DrawingTracer::draw_char_bbox:[%f,%f,%f,%f]\n",bbox[0],bbox[1],bbox[2],bbox[3]);
}
void DrawingTracer::draw_image(GfxState *state)

View File

@ -10,6 +10,7 @@
#include <unordered_map>
#include <cstdint>
#include <fstream>
#include <memory>
#include <OutputDev.h>
#include <GfxState.h>
@ -20,6 +21,10 @@
#include <GfxFont.h>
#include <Annot.h>
// for form.cc
#include <Page.h>
#include <Form.h>
#include "pdf2htmlEX-config.h"
#include "Param.h"
@ -40,9 +45,8 @@
namespace pdf2htmlEX {
class HTMLRenderer : public OutputDev
struct HTMLRenderer : OutputDev
{
public:
HTMLRenderer(const Param & param);
virtual ~HTMLRenderer();
@ -78,11 +82,7 @@ public:
virtual void setDefaultCTM(double *ctm);
// Start a page.
#if POPPLER_OLDER_THAN_0_23_0
virtual void startPage(int pageNum, GfxState *state);
#else
virtual void startPage(int pageNum, GfxState *state, XRef * xref);
#endif
// End a page.
virtual void endPage();
@ -153,7 +153,7 @@ public:
// Does not fail on out-of-bound conditions, but return false.
bool is_char_covered(int index);
// Currently drawn char (glyph) count in current page.
int get_char_count() { return (int)covered_text_detecor.get_chars_covered().size(); }
int get_char_count() { return (int)covered_text_detector.get_chars_covered().size(); }
protected:
////////////////////////////////////////////////////
@ -165,6 +165,8 @@ protected:
void process_outline(void);
void process_outline_items(GooList * items);
void process_form(std::ofstream & out);
void set_stream_flags (std::ostream & out);
void dump_css(void);
@ -308,9 +310,9 @@ protected:
} new_line_state;
// for font reencoding
int32_t * cur_mapping;
char ** cur_mapping2;
int * width_list;
std::vector<int32_t> cur_mapping;
std::vector<char*> cur_mapping2;
std::vector<int> width_list; // width of each char
Preprocessor preprocessor;
@ -325,8 +327,8 @@ protected:
#if ENABLE_SVG
friend class CairoBackgroundRenderer; // ugly!
#endif
BackgroundRenderer * bg_renderer;
BackgroundRenderer * fallback_bg_renderer;
std::unique_ptr<BackgroundRenderer> bg_renderer, fallback_bg_renderer;
struct {
std::ofstream fs;
@ -337,7 +339,7 @@ protected:
static const std::string MANIFEST_FILENAME;
CoveredTextDetector covered_text_detecor;
CoveredTextDetector covered_text_detector;
DrawingTracer tracer;
};

View File

@ -192,11 +192,7 @@ string HTMLRenderer::dump_type3_font (GfxFont * font, FontInfo & info)
FT_Library ft_lib;
FT_Init_FreeType(&ft_lib);
CairoFontEngine font_engine(ft_lib);
#if POPPLER_OLDER_THAN_0_23_0
auto * cur_font = font_engine.getFont(font, cur_doc, true);
#else
auto * cur_font = font_engine.getFont(font, cur_doc, true, xref);
#endif
auto used_map = preprocessor.get_code_map(hash_ref(font->getID()));
//calculate transformed metrics
@ -398,8 +394,8 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
GfxCIDFont * font_cid = nullptr;
string suffix = get_suffix(filepath);
for(auto iter = suffix.begin(); iter != suffix.end(); ++iter)
*iter = tolower(*iter);
for(auto & c : suffix)
c = tolower(c);
/*
* if parm->tounicode is 0, try the provided tounicode map first
@ -487,7 +483,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
unordered_set<string> nameset;
bool name_conflict_warned = false;
memset(cur_mapping2, 0, 0x100 * sizeof(char*));
std::fill(cur_mapping2.begin(), cur_mapping2.end(), (char*)nullptr);
for(int i = 0; i < 256; ++i)
{
@ -516,7 +512,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
}
}
ffw_reencode_raw2(cur_mapping2, 256, 0);
ffw_reencode_raw2(cur_mapping2.data(), 256, 0);
}
}
else
@ -580,8 +576,8 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
bool name_conflict_warned = false;
auto ctu = font->getToUnicode();
memset(cur_mapping, -1, 0x10000 * sizeof(*cur_mapping));
memset(width_list, -1, 0x10000 * sizeof(*width_list));
std::fill(cur_mapping.begin(), cur_mapping.end(), -1);
std::fill(width_list.begin(), width_list.end(), -1);
if(code2GID)
maxcode = min<int>(maxcode, code2GID_len - 1);
@ -643,9 +639,8 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
retried = true;
codeset.clear();
info.use_tounicode = false;
//TODO: constant for the length
memset(cur_mapping, -1, 0x10000 * sizeof(*cur_mapping));
memset(width_list, -1, 0x10000 * sizeof(*width_list));
std::fill(cur_mapping.begin(), cur_mapping.end(), -1);
std::fill(width_list.begin(), width_list.end(), -1);
cur_code = -1;
if(param.debug)
{
@ -704,9 +699,9 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
}
}
ffw_set_widths(width_list, max_key + 1, param.stretch_narrow_glyph, param.squeeze_wide_glyph);
ffw_set_widths(width_list.data(), max_key + 1, param.stretch_narrow_glyph, param.squeeze_wide_glyph);
ffw_reencode_raw(cur_mapping, max_key + 1, 1);
ffw_reencode_raw(cur_mapping.data(), max_key + 1, 1);
// In some space offsets in HTML, we insert a ' ' there in order to improve text copy&paste
// We need to make sure that ' ' is in the font, otherwise it would be very ugly if you select the text
@ -887,7 +882,7 @@ const FontInfo * HTMLRenderer::install_font(GfxFont * font)
* which does not make much sense in our case
* If we specify gFalse here, font_loc->locaType cannot be gfxFontLocResident
*/
if(auto * font_loc = font->locateFont(xref, gFalse))
if(auto * font_loc = font->locateFont(xref, nullptr))
{
switch(font_loc -> locType)
{
@ -942,7 +937,7 @@ void HTMLRenderer::install_external_font(GfxFont * font, FontInfo & info)
cerr << "Warning: workaround for font names in bad encodings." << endl;
}
GfxFontLoc * localfontloc = font->locateFont(xref, gFalse);
GfxFontLoc * localfontloc = font->locateFont(xref, nullptr);
if(param.embed_external_font)
{
@ -1065,8 +1060,8 @@ void HTMLRenderer::export_local_font(const FontInfo & info, GfxFont * font, cons
f_css.fs << "font-family:" << ((cssfont == "") ? (original_font_name + "," + general_font_family(font)) : cssfont) << ";";
string fn = original_font_name;
for(auto iter = fn.begin(); iter != fn.end(); ++iter)
*iter = tolower(*iter);
for(auto & c : fn)
c = tolower(c);
if(font->isBold() || (fn.find("bold") != string::npos))
f_css.fs << "font-weight:bold;";

76
src/HTMLRenderer/form.cc Normal file
View File

@ -0,0 +1,76 @@
/*
* form.cc
*
* Handling Forms
*
* by Simon Chenard
* 2014.07.25
*/
#include <iostream>
#include <sstream>
#include <string>
#include "HTMLRenderer.h"
#include "util/namespace.h"
#include "util/misc.h"
namespace pdf2htmlEX {
using std::ofstream;
using std::cerr;
void HTMLRenderer::process_form(ofstream & out)
{
FormPageWidgets * widgets = cur_catalog->getPage(pageNum)->getFormWidgets();
int num = widgets->getNumWidgets();
for(int i = 0; i < num; i++)
{
FormWidget * w = widgets->getWidget(i);
double x1, y1, x2, y2;
w->getRect(&x1, &y1, &x2, &y2);
x1 = x1 * param.zoom;
x2 = x2 * param.zoom;
y1 = y1 * param.zoom;
y2 = y2 * param.zoom;
double width = x2 - x1;
double height = y2 - y1;
if(w->getType() == formText)
{
double font_size = height / 2;
out << "<input id=\"text-" << pageNum << "-" << i
<< "\" class=\"" << CSS::INPUT_TEXT_CN
<< "\" type=\"text\" value=\"\""
<< " style=\"position: absolute; left: " << x1
<< "px; bottom: " << y1 << "px;"
<< " width: " << width << "px; height: " << std::to_string(height)
<< "px; line-height: " << std::to_string(height) << "px; font-size: "
<< font_size << "px;\" />" << endl;
}
else if(w->getType() == formButton)
{
//Ideally would check w->getButtonType()
//for more specific rendering
width += 3;
height += 3;
out << "<div id=\"cb-" << pageNum << "-" << i
<< "\" class=\"" << CSS::INPUT_RADIO_CN
<< "\" style=\"position: absolute; left: " << x1
<< "px; bottom: " << y1 << "px;"
<< " width: " << width << "px; height: "
<< std::to_string(height) << "px; background-size: cover;\" ></div>" << endl;
}
else
{
cerr << "Unsupported form field detected" << endl;
}
}
}
}

View File

@ -3,7 +3,7 @@
*
* Handling general stuffs
*
* Copyright (C) 2012,2013 Lu Wang <coolwanglu@gmail.com>
* Copyright (C) 2012,2013,2014 Lu Wang <coolwanglu@gmail.com>
*/
#include <cstdio>
@ -56,9 +56,10 @@ HTMLRenderer::HTMLRenderer(const Param & param)
}
ffw_init(param.debug);
cur_mapping = new int32_t [0x10000];
cur_mapping2 = new char* [0x100];
width_list = new int [0x10000];
cur_mapping.resize(0x10000);
cur_mapping2.resize(0x100);
width_list.resize(0x10000);
/*
* For these states, usually the error will not be accumulated
@ -80,19 +81,16 @@ HTMLRenderer::HTMLRenderer(const Param & param)
all_manager.bottom .set_eps(EPS);
tracer.on_char_drawn =
[this](double * box) { covered_text_detecor.add_char_bbox(box); };
[this](double * box) { covered_text_detector.add_char_bbox(box); };
tracer.on_char_clipped =
[this](double * box, bool partial) { covered_text_detecor.add_char_bbox_clipped(box, partial); };
[this](double * box, bool partial) { covered_text_detector.add_char_bbox_clipped(box, partial); };
tracer.on_non_char_drawn =
[this](double * box) { covered_text_detecor.add_non_char_bbox(box); };
[this](double * box) { covered_text_detector.add_non_char_bbox(box); };
}
HTMLRenderer::~HTMLRenderer()
{
ffw_finalize();
delete [] cur_mapping;
delete [] cur_mapping2;
delete [] width_list;
}
void HTMLRenderer::process(PDFDoc *doc)
@ -106,8 +104,6 @@ void HTMLRenderer::process(PDFDoc *doc)
///////////////////
// Process pages
bg_renderer = nullptr;
fallback_bg_renderer = nullptr;
if(param.process_nontext)
{
bg_renderer = BackgroundRenderer::getBackgroundRenderer(param.bg_format, this, param);
@ -132,6 +128,7 @@ void HTMLRenderer::process(PDFDoc *doc)
if(param.split_pages)
{
// copy the string out, since we will reuse the buffer soon
string filled_template_filename = (char*)str_fmt(param.page_filename.c_str(), i);
auto page_fn = str_fmt("%s/%s", param.dest_dir.c_str(), filled_template_filename.c_str());
f_curpage = new ofstream((char*)page_fn, ofstream::binary);
@ -167,16 +164,8 @@ void HTMLRenderer::process(PDFDoc *doc)
post_process();
if(bg_renderer)
{
delete bg_renderer;
bg_renderer = nullptr;
}
if(fallback_bg_renderer)
{
delete fallback_bg_renderer;
fallback_bg_renderer = nullptr;
}
bg_renderer = nullptr;
fallback_bg_renderer = nullptr;
cerr << endl;
}
@ -186,13 +175,9 @@ void HTMLRenderer::setDefaultCTM(double *ctm)
memcpy(default_ctm, ctm, sizeof(default_ctm));
}
#if POPPLER_OLDER_THAN_0_23_0
void HTMLRenderer::startPage(int pageNum, GfxState *state)
#else
void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
#endif
{
covered_text_detecor.reset();
covered_text_detector.reset();
tracer.reset(state);
this->pageNum = pageNum;
@ -239,8 +224,10 @@ void HTMLRenderer::endPage() {
if(param.process_nontext)
{
if (bg_renderer->render_page(cur_doc, pageNum))
{
bg_renderer->embed_image(pageNum);
else if (fallback_bg_renderer != nullptr)
}
else if (fallback_bg_renderer)
{
if (fallback_bg_renderer->render_page(cur_doc, pageNum))
fallback_bg_renderer->embed_image(pageNum);
@ -252,6 +239,10 @@ void HTMLRenderer::endPage() {
html_text_page.dump_css(f_css.fs);
html_text_page.clear();
// process form
if(param.process_form)
process_form(*f_curpage);
// process links before the page is closed
cur_doc->processLinks(this, pageNum);
@ -261,18 +252,20 @@ void HTMLRenderer::endPage() {
// dump info for js
// TODO: create a function for this
// BE CAREFUL WITH ESCAPES
(*f_curpage) << "<div class=\"" << CSS::PAGE_DATA_CN << "\" data-data='{";
//default CTM
(*f_curpage) << "\"ctm\":[";
for(int i = 0; i < 6; ++i)
{
if(i > 0) (*f_curpage) << ",";
(*f_curpage) << round(default_ctm[i]);
}
(*f_curpage) << "]";
(*f_curpage) << "<div class=\"" << CSS::PAGE_DATA_CN << "\" data-data='{";
(*f_curpage) << "}'></div>";
//default CTM
(*f_curpage) << "\"ctm\":[";
for(int i = 0; i < 6; ++i)
{
if(i > 0) (*f_curpage) << ",";
(*f_curpage) << round(default_ctm[i]);
}
(*f_curpage) << "]";
(*f_curpage) << "}'></div>";
}
// close page
(*f_curpage) << "</div>" << endl;
@ -391,8 +384,8 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
void HTMLRenderer::post_process(void)
{
dump_css();
// close files if they opened
// it's better to brace single liner LLVM complains
if (param.process_outline)
{
f_outline.fs.close();
@ -546,7 +539,6 @@ void HTMLRenderer::embed_file(ostream & out, const string & path, const string &
string fn = get_filename(path);
string suffix = (type == "") ? get_suffix(fn) : type;
// TODO
auto iter = EMBED_STRING_MAP.find(suffix);
if(iter == EMBED_STRING_MAP.end())
{

View File

@ -19,7 +19,7 @@
namespace pdf2htmlEX {
using std::all_of;
using std::none_of;
using std::cerr;
using std::endl;
@ -107,7 +107,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
}
else
{
if((param.decompose_ligature) && (uLen > 1) && all_of(u, u+uLen, isLegalUnicode))
if((param.decompose_ligature) && (uLen > 1) && none_of(u, u+uLen, is_illegal_unicode))
{
html_text_page.get_cur_line()->append_unicodes(u, uLen, ddx);
}
@ -153,7 +153,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
bool HTMLRenderer::is_char_covered(int index)
{
auto covered = covered_text_detecor.get_chars_covered();
auto covered = covered_text_detector.get_chars_covered();
if (index < 0 || index >= (int)covered.size())
{
std::cerr << "Warning: HTMLRenderer::is_char_covered: index out of bound: "

View File

@ -36,7 +36,7 @@ HTMLTextLine::HTMLTextLine (const HTMLLineState & line_state, const Param & para
void HTMLTextLine::append_unicodes(const Unicode * u, int l, double width)
{
if (l == 1)
if (l == 1)
text.push_back(min(u[0], (unsigned)INT_MAX));
else if (l > 1)
{
@ -52,12 +52,19 @@ void HTMLTextLine::append_offset(double width)
/*
* If the last offset is very thin, we can ignore it and directly use it
* But this should not happen often, and we will also filter near-zero offsets when outputting them
* So don't check it
* So don't check it.
*
* Offset must be appended immediately after the last real (non-padding) char, or the text optimizing
* algorithm may be confused: it may wrongly convert offsets at the beginning of a line to word-space.
*/
if((!offsets.empty()) && (offsets.back().start_idx == text.size()))
auto offset_idx = text.size();
while (offset_idx > 0 && text[offset_idx - 1] == 0)
--offset_idx;
if((!offsets.empty()) && (offsets.back().start_idx == offset_idx))
offsets.back().width += width;
else
offsets.emplace_back(text.size(), width);
offsets.emplace_back(offset_idx, width);
this->width += width;
}

View File

@ -23,10 +23,8 @@ HTMLTextPage::HTMLTextPage(const Param & param, AllStateManager & all_manager)
HTMLTextPage::~HTMLTextPage()
{
for(auto iter = text_lines.begin(); iter != text_lines.end(); ++iter)
{
delete (*iter);
}
for(auto p : text_lines)
delete p;
}
void HTMLTextPage::dump_text(ostream & out)
@ -35,12 +33,12 @@ void HTMLTextPage::dump_text(ostream & out)
{
// text lines may be split during optimization, collect them
std::vector<HTMLTextLine*> new_text_lines;
for(auto iter = text_lines.begin(); iter != text_lines.end(); ++iter)
(*iter)->optimize(new_text_lines);
for(auto p : text_lines)
p->optimize(new_text_lines);
std::swap(text_lines, new_text_lines);
}
for(auto iter = text_lines.begin(); iter != text_lines.end(); ++iter)
(*iter)->prepare();
for(auto p : text_lines)
p->prepare();
if(param.optimize_text)
optimize();

View File

@ -38,6 +38,7 @@ struct Param
int process_nontext;
int process_outline;
int process_annotation;
int process_form;
int correct_text_visibility;
int printing;
int fallback;

View File

@ -36,8 +36,8 @@ Preprocessor::Preprocessor(const Param & param)
Preprocessor::~Preprocessor(void)
{
for(auto iter = code_maps.begin(); iter != code_maps.end(); ++iter)
delete [] iter->second;
for(auto & p : code_maps)
delete [] p.second;
}
void Preprocessor::process(PDFDoc * doc)

View File

@ -59,19 +59,19 @@ public:
}
void dump_css(std::ostream & out) {
for(auto iter = value_map.begin(); iter != value_map.end(); ++iter)
for(auto & p : value_map)
{
out << "." << imp->get_css_class_name() << iter->second << "{";
imp->dump_value(out, iter->first);
out << "." << imp->get_css_class_name() << p.second << "{";
imp->dump_value(out, p.first);
out << "}" << std::endl;
}
}
void dump_print_css(std::ostream & out, double scale) {
for(auto iter = value_map.begin(); iter != value_map.end(); ++iter)
for(auto & p : value_map)
{
out << "." << imp->get_css_class_name() << iter->second << "{";
imp->dump_print_value(out, iter->first, scale);
out << "." << imp->get_css_class_name() << p.second << "{";
imp->dump_print_value(out, p.first, scale);
out << "}" << std::endl;
}
}
@ -109,10 +109,10 @@ public:
}
void dump_css(std::ostream & out) {
for(auto iter = value_map.begin(); iter != value_map.end(); ++iter)
for(auto & p : value_map)
{
out << "." << imp->get_css_class_name() << iter->second << "{";
imp->dump_value(out, iter->first);
out << "." << imp->get_css_class_name() << p.second << "{";
imp->dump_value(out, p.first);
out << "}" << std::endl;
}
}
@ -166,10 +166,10 @@ public:
imp->dump_transparent(out);
out << "}" << std::endl;
for(auto iter = value_map.begin(); iter != value_map.end(); ++iter)
for(auto & p : value_map)
{
out << "." << imp->get_css_class_name() << iter->second << "{";
imp->dump_value(out, iter->first);
out << "." << imp->get_css_class_name() << p.second << "{";
imp->dump_value(out, p.first);
out << "}" << std::endl;
}
}
@ -245,15 +245,15 @@ public:
static const char * get_css_class_name (void) { return CSS::WHITESPACE_CN; }
double default_value(void) { return 0; }
void dump_value(std::ostream & out, double value) {
out << ((value > 0) ? "display:inline-block;width:"
: "display:inline;margin-left:")
out << ((value > 0) ? "width:"
: "margin-left:")
<< round(value) << "px;";
}
void dump_print_value(std::ostream & out, double value, double scale)
{
value *= scale;
out << ((value > 0) ? "display:inline-block;width:"
: "display:inline;margin-left:")
out << ((value > 0) ? "width:"
: "margin-left:")
<< round(value) << "pt;";
}
};
@ -307,15 +307,15 @@ public:
auto prefixes = {"", "-ms-", "-webkit-"};
if(tm_equal(m, ID_MATRIX, 4))
{
for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter)
out << *iter << "transform:none;";
for(auto & s : prefixes)
out << s << "transform:none;";
}
else
{
for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter)
for(auto & s : prefixes)
{
// PDF use a different coordinate system from Web
out << *iter << "transform:matrix("
out << s << "transform:matrix("
<< round(m[0]) << ','
<< round(-m[1]) << ','
<< round(-m[2]) << ','
@ -332,10 +332,10 @@ public:
static const char * get_css_class_name (void) { return CSS::FILL_COLOR_CN; }
/* override base's method, as we need some workaround in CSS */
void dump_css(std::ostream & out) {
for(auto iter = value_map.begin(); iter != value_map.end(); ++iter)
for(auto & p : value_map)
{
out << "." << get_css_class_name() << iter->second
<< "{color:" << iter->first << ";}" << std::endl;
out << "." << get_css_class_name() << p.second
<< "{color:" << p.first << ";}" << std::endl;
}
}
};
@ -348,24 +348,24 @@ public:
void dump_css(std::ostream & out) {
// normal CSS
out << "." << get_css_class_name() << CSS::INVALID_ID << "{text-shadow:none;}" << std::endl;
for(auto iter = value_map.begin(); iter != value_map.end(); ++iter)
for(auto & p : value_map)
{
// TODO: take the stroke width from the graphics state,
// currently using 0.015em as a good default
out << "." << get_css_class_name() << iter->second << "{text-shadow:"
<< "-0.015em 0 " << iter->first << ","
<< "0 0.015em " << iter->first << ","
<< "0.015em 0 " << iter->first << ","
<< "0 -0.015em " << iter->first << ";"
out << "." << get_css_class_name() << p.second << "{text-shadow:"
<< "-0.015em 0 " << p.first << ","
<< "0 0.015em " << p.first << ","
<< "0.015em 0 " << p.first << ","
<< "0 -0.015em " << p.first << ";"
<< "}" << std::endl;
}
// webkit
out << CSS::WEBKIT_ONLY << "{" << std::endl;
out << "." << get_css_class_name() << CSS::INVALID_ID << "{-webkit-text-stroke:0px transparent;}" << std::endl;
for(auto iter = value_map.begin(); iter != value_map.end(); ++iter)
for(auto & p : value_map)
{
out << "." << get_css_class_name() << iter->second
<< "{-webkit-text-stroke:0.015em " << iter->first << ";text-shadow:none;}" << std::endl;
out << "." << get_css_class_name() << p.second
<< "{-webkit-text-stroke:0.015em " << p.first << ";text-shadow:none;}" << std::endl;
}
out << "}" << std::endl;
}
@ -385,20 +385,20 @@ public:
}
void dump_css(std::ostream & out) {
for(auto iter = value_map.begin(); iter != value_map.end(); ++iter)
for(auto & p : value_map)
{
const auto & s = iter->second;
out << "." << CSS::PAGE_CONTENT_BOX_CN << iter->first << "{";
const auto & s = p.second;
out << "." << CSS::PAGE_CONTENT_BOX_CN << p.first << "{";
out << "background-size:" << round(s.first) << "px " << round(s.second) << "px;";
out << "}" << std::endl;
}
}
void dump_print_css(std::ostream & out, double scale) {
for(auto iter = value_map.begin(); iter != value_map.end(); ++iter)
for(auto & p : value_map)
{
const auto & s = iter->second;
out << "." << CSS::PAGE_CONTENT_BOX_CN << iter->first << "{";
const auto & s = p.second;
out << "." << CSS::PAGE_CONTENT_BOX_CN << p.first << "{";
out << "background-size:" << round(s.first * scale) << "pt " << round(s.second * scale) << "pt;";
out << "}" << std::endl;
}

View File

@ -46,8 +46,9 @@ double TmpFiles::get_total_size() const
{
double total_size = 0;
struct stat st;
for(auto iter = tmp_files.begin(); iter != tmp_files.end(); ++iter) {
stat(iter->c_str(), &st);
for(auto & fn : tmp_files)
{
stat(fn.c_str(), &st);
total_size += st.st_size;
}
@ -60,9 +61,8 @@ void TmpFiles::clean()
if(!param.clean_tmp)
return;
for(auto iter = tmp_files.begin(); iter != tmp_files.end(); ++iter)
for(auto & fn : tmp_files)
{
const string & fn = *iter;
remove(fn.c_str());
if(param.debug)
cerr << "Remove temporary file: " << fn << endl;

View File

@ -34,3 +34,6 @@ set(CSS_WIDTH_CN "w") # Width
set(CSS_BOTTTOM_CN "y") # Y
set(CSS_CSS_DRAW_CN "d") # Draw
set(CSS_LINK_CN "l") # Link
set(CSS_INPUT_TEXT_CN "it") # Text input
set(CSS_INPUT_RADIO_CN "ir") # Radio button
set(CSS_RADIO_CHECKED_CN "checked") # Show picture of checked out radio button

View File

@ -11,8 +11,6 @@
#include <string>
#define POPPLER_OLDER_THAN_0_25_0 @POPPLER_OLDER_THAN_0_25_0@
#define POPPLER_OLDER_THAN_0_23_0 @POPPLER_OLDER_THAN_0_23_0@
#define ENABLE_SVG @ENABLE_SVG@
namespace pdf2htmlEX {

View File

@ -164,6 +164,7 @@ void parse_options (int argc, char **argv)
.add("process-nontext", &param.process_nontext, 1, "render graphics in addition to text")
.add("process-outline", &param.process_outline, 1, "show outline in HTML")
.add("process-annotation", &param.process_annotation, 0, "show annotation in HTML")
.add("process-form", &param.process_form, 0, "include text fields and radio buttons")
.add("printing", &param.printing, 1, "enable printing support")
.add("fallback", &param.fallback, 0, "output in fallback mode")
.add("tmp-file-size-limit", &param.tmp_file_size_limit, -1, "Maximum size (in KB) used by temporary files, -1 for no limit.")
@ -344,6 +345,12 @@ void check_param()
{
cerr << "Warning: No hint tool is specified for truetype fonts, the result may be rendered poorly in some circumstances." << endl;
}
if (param.embed_image && (param.bg_format == "svg") && !param.svg_embed_bitmap)
{
cerr << "Warning: --svg-embed-bitmap is forced on because --embed-image is on, or the dumped bitmaps can't be loaded." << endl;
param.svg_embed_bitmap = 1;
}
}
int main(int argc, char **argv)

View File

@ -56,6 +56,10 @@ const char * const BOTTOM_CN = "@CSS_BOTTTOM_CN@";
const char * const CSS_DRAW_CN = "@CSS_CSS_DRAW_CN@";
const char * const LINK_CN = "@CSS_LINK_CN@";
const char * const INPUT_TEXT_CN = "@CSS_INPUT_TEXT_CN@";
const char * const INPUT_RADIO_CN = "@CSS_INPUT_RADIO_CN@";
const char * const RADIO_CHECKED_CN = "@CSS_RADIO_CHECKED_CN@";
}
}

View File

@ -131,9 +131,9 @@ void writeURL(ostream & out, const string & s)
void writeJSON(ostream & out, const string & s)
{
for(auto iter = s.begin(); iter != s.end(); ++iter)
for(auto c : s)
{
switch (*iter)
switch (c)
{
case '\\': out << "\\\\"; break;
case '"': out << "\\\""; break;
@ -144,16 +144,15 @@ void writeJSON(ostream & out, const string & s)
case '\n': out << "\\n"; break;
case '\r': out << "\\r"; break;
case '\t': out << "\\t"; break;
default: out << *iter; break;
default: out << c; break;
}
}
}
void writeAttribute(std::ostream & out, const std::string & s)
{
for (auto iter = s.begin(); iter != s.end(); ++iter)
for (auto c : s)
{
char c = *iter;
switch(c)
{
case '&':

View File

@ -131,8 +131,8 @@ string get_suffix(const string & path)
else
{
string s = fn.substr(idx);
for(auto iter = s.begin(); iter != s.end(); ++iter)
*iter = tolower(*iter);
for(auto & c : s)
c = tolower(c);
return s;
}
}

View File

@ -18,81 +18,6 @@ using std::cerr;
using std::endl;
using std::ostream;
/*
* Test legal for HTML
*
* A legal unicode character should be accepted by browsers, and displayed correctly.
* Many unicode codes have special meaning which will be 'interpreted' by the browser, those should be filtered since they are not interpreted in PDF
* This function is not complete, just to be improved.
*/
bool isLegalUnicode(Unicode u)
{
const Unicode max_small_unicode = 1024;
static bool valid_small_unicode[max_small_unicode];
static bool valid_small_unicode_init = false;
if(!valid_small_unicode_init)
{
valid_small_unicode_init = true;
Unicode uu = 0;
/*
* 9, 10 and 13 are interpreted as white-spaces in HTML
* `word-spacing` may be applied on them
* and the browser may not use the actual glyphs in the font
* So mark them as illegal
*
* The problem is that the correct value can not be copied out in this way
*/
while(uu <= 31)
valid_small_unicode[uu++] = false;
/*
* 127-159 are not invalid
* 160, or 0xa0 is NBSP, which is legal in HTML
* But some browser will use the glyph for ' ' in the font, it there is one, instead of the glyphs for NBSP
* Again, `word-spacing` is applied.
* So mark it as illegal
*
* And the same problem as above, this character can no longer be copied out
*/
while(uu < 127)
valid_small_unicode[uu++] = true;
while(uu <= 160)
valid_small_unicode[uu++] = false;
/*
* 173, or 0xad, the soft hyphen
* which can be ignored by the browser in the middle of a line
*/
while(uu < 173)
valid_small_unicode[uu++] = true;
while(uu <= 173)
valid_small_unicode[uu++] = false;
while(uu < max_small_unicode)
valid_small_unicode[uu++] = true;
}
if(u < max_small_unicode)
return valid_small_unicode[u];
/*
* U+2029: Paragraph Separator
* TODO: check U+2028 etc
*/
if(u == 0x2029)
return false;
/*
* Reserved code for utf-16
*/
if((u >= 0xd800) && (u <= 0xdfff))
return false;
return true;
}
Unicode map_to_private(CharCode code)
{
Unicode private_mapping = (Unicode)(code + 0xE000);
@ -119,12 +44,8 @@ Unicode unicode_from_font (CharCode code, GfxFont * font)
// may be untranslated ligature
if(cname)
{
#if POPPLER_OLDER_THAN_0_25_0
Unicode ou = globalParams->mapNameToUnicode(cname);
#else
Unicode ou = globalParams->mapNameToUnicodeText(cname);
#endif
if(isLegalUnicode(ou))
if(!is_illegal_unicode(ou))
return ou;
}
}
@ -139,7 +60,7 @@ Unicode check_unicode(Unicode * u, int len, CharCode code, GfxFont * font)
if(len == 1)
{
if(isLegalUnicode(*u))
if(!is_illegal_unicode(*u))
return *u;
}

View File

@ -13,11 +13,58 @@
namespace pdf2htmlEX {
/*
* Check if the unicode is valid for HTML
* http://en.wikipedia.org/wiki/HTML_decimal_character_rendering
/**
* Check whether a unicode character is illegal for the output HTML.
* Unlike PDF readers, browsers has special treatments for such characters (normally treated as
* zero-width space), regardless of metrics and glyphs provided by fonts. So these characters
* should be mapped to unicode private area to "cheat" browsers, at the cost of loosing actual
* unicode values in the HTML.
*
* The following chart shows illegal characters in HTML by webkit, mozilla, and pdf2htmlEX (p2h).
* pdf2htmlEX's illegal character set is the union of webkit's and mozilla's, plus illegal unicode
* characters. "[" and ")" surrounding ranges denote "inclusive" and "exclusive", respectively.
*
* 00(NUL)--09(\t)--0A(\n)--0D(\r)--20(SP)--7F(DEL)--9F(APC)--A0(NBSP)--AD(SHY)--061C(ALM)--1361(Ethiopic word space)
* webkit: [--------------------------------) [------------------) [-]
* moz: [--------------------------------) [---------] [-]
* p2h: [--------------------------------) [------------------] [-] [-] [-]
*
* 200B(ZWSP)--200C(ZWNJ)--200D(ZWJ)--200E(LRM)--200F(RLM)--2028(LSEP)--2029(PSEP)--202A(LRE)--202E(RL0)--2066(LRI)--2069(PDI)
* webkit: [-----------------------------------------------] [----------]
* moz: [-] [----------] [-] [-] [----------] [------------]
* p2h: [-----------------------------------------------] [-] [-] [----------] [------------]
*
* D800(surrogate)--DFFF(surrogate)--FEFF(ZWNBSP)--FFFC(ORC)--FFFE(non-char)--FFFF(non-char)
* webkit: [-] [-]
* moz:
* p2h: [------------------] [-] [-] [-----------------]
*
* Note: 0xA0 (no-break space) affects word-spacing; and if "white-space:pre" is specified,
* \n and \r can break line, \t can shift text, so they are considered illegal.
*
* Resources (retrieved at 2015-03-16)
* * webkit
* * Avoid querying the font cache for the zero-width space glyph ( https://bugs.webkit.org/show_bug.cgi?id=90673 )
* * treatAsZeroWidthSpace( https://github.com/WebKit/webkit/blob/17bbff7400393e9389b40cc84ce005f7cc954680/Source/WebCore/platform/graphics/FontCascade.h#L272 )
* * mozilla
* * IsInvalidChar( http://mxr.mozilla.org/mozilla-central/source/gfx/thebes/gfxTextRun.cpp#1973 )
* * IsBidiControl( http://mxr.mozilla.org/mozilla-central/source/intl/unicharutil/util/nsBidiUtils.h#114 )
* * Character encodings in HTML ( http://en.wikipedia.org/wiki/Character_encodings_in_HTML#HTML_character_references )
* * CSS Text Spec ( http://dev.w3.org/csswg/css-text/ )
* * unicode table ( http://unicode-table.com )
*
* TODO Web specs? IE?
*
*/
bool isLegalUnicode(Unicode u);
inline bool is_illegal_unicode(Unicode c)
{
return (c < 0x20) || (c >= 0x7F && c <= 0xA0) || (c == 0xAD)
|| (c == 0x061C) || (c == 0x1361)
|| (c >= 0x200B && c <= 0x200F) || (c == 0x2028) || (c == 0x2029)
|| (c >= 0x202A && c <= 0x202E) || (c >= 0x2066 && c <= 0x2069)
|| (c >= 0xD800 && c <= 0xDFFF) || (c == 0xFEFF) || (c == 0xFFFC)
|| (c == 0xFFFE) || (c == 0xFFFF);
}
Unicode map_to_private(CharCode code);

2
test/.gitattributes vendored Normal file
View File

@ -0,0 +1,2 @@
*.pdf binary
*.woff binary

38
test/README.md Normal file
View File

@ -0,0 +1,38 @@
### Dependencies
- python2 and packages
- Python Imaging Library
- Selenium
- unittest
- Firefox
### Usage
- Run all tests:
- `./test.py`
- Run selected test suites:
- `./test.py test_local_browser`
- Run selected test case:
- `./test.py test_local_browser.test_basic_text`
- Or `./test.py test_basic_text`
- Environment variables:
- Set `P2H_TEST_SAVE_TMP=1` to keep the temporary files in `/tmp/pdf2htmlEX_test`
- Set `P2H_TEST_GEN=1` to generate new reference files
- Set `P2H_TEST_REMOTE=1` to test different browsers using Sauce Labs
- Install `sauceclient` for Python
- Set correct values for `SAUCE_USERNAME` and `SAUCE_ACCESS_KEY`
- Setup a HTTP server at `/` on port 8000
- Enable Sauce Connect
- See `.travis.yml` as an example
### Add new test cases
- Make sure you have the proper copyrights.
- Using meaningful file names, a description of the file, or issueXXX.pdf.
- Make each test case minimal:
- One page only, unless the test case is about multiple pages.
- Grayscale only, unless the test case is about colors.
- Remove unnecessary elements.
- [Optional] Include the source files that the PDF file is generated from.
- Add the new PDF file to the correct folder in `test/`, and add a new function in the corresponding Python file
- Run `P2H_TEST_GEN=1 test/test.py test_issueXXX` to generate the reference, assuming that the new function is called `test_issueXXX`

99
test/browser_tests.py Normal file
View File

@ -0,0 +1,99 @@
#!/usr/bin/env python
import os
import subprocess
import shutil
import unittest
from PIL import Image, ImageChops
from test import Common
class BrowserTests(Common):
TEST_DATA_DIR = os.path.join(Common.TEST_DIR, 'browser_tests')
DEFAULT_PDF2HTMLEX_ARGS = [
'--fit-width', 800,
'--last-page', 1,
'--embed', 'fi', # avoid base64 to make it faster
]
BROWSER_WIDTH=800
BROWSER_HEIGHT=1200
@classmethod
def setUpClass(cls):
pass
@classmethod
def tearDownClass(cls):
pass
def run_test_case(self, filename, pdf2htmlEX_args=[], page_must_load=True):
basefilename, extension = os.path.splitext(filename)
htmlfilename = basefilename + '.html'
ref_htmlfolder = os.path.join(self.TEST_DATA_DIR, basefilename)
ref_htmlfilename = os.path.join(ref_htmlfolder, htmlfilename)
out_htmlfilename = os.path.join(self.cur_output_dir, htmlfilename)
self.assertEquals(extension.lower(), '.pdf', 'Input file is not PDF')
pdf2htmlEX_args = self.DEFAULT_PDF2HTMLEX_ARGS \
+ list(pdf2htmlEX_args) + [
os.path.join(self.TEST_DATA_DIR, filename),
htmlfilename
]
result = self.run_pdf2htmlEX(pdf2htmlEX_args)
self.assertIn(htmlfilename, result['output_files'], 'HTML file is not generated')
if self.GENERATING_MODE:
# copy generated html files
shutil.rmtree(ref_htmlfolder, True)
shutil.copytree(self.cur_output_dir, ref_htmlfolder)
return
png_out_dir = os.path.join(self.cur_temp_dir, 'png_out')
os.mkdir(png_out_dir)
pngfilename_out_fullpath = os.path.join(png_out_dir, basefilename + '.out.png')
self.generate_image(out_htmlfilename, pngfilename_out_fullpath)
out_img = Image.open(pngfilename_out_fullpath)
pngfilename_ref_fullpath = os.path.join(png_out_dir, basefilename + '.ref.png')
self.generate_image(ref_htmlfilename, pngfilename_ref_fullpath, page_must_load=page_must_load)
ref_img = Image.open(pngfilename_ref_fullpath)
diff_img = ImageChops.difference(ref_img, out_img);
diff_bbox = diff_img.getbbox()
if diff_bbox is not None:
diff_size = (diff_bbox[2] - diff_bbox[0]) * (diff_bbox[3] - diff_bbox[1])
img_size = ref_img.size[0] * ref_img.size[1]
if self.SAVE_TMP:
# save the diff image
# http://stackoverflow.com/questions/15721484/saving-in-png-using-pil-library-after-taking-imagechops-difference-of-two-png
diff_img.convert('RGB').save(os.path.join(png_out_dir, basefilename + '.diff.png'))
self.fail('PNG files differ by <= %d pixels, (%f%% of %d pixels in total)' % (diff_size, 1.0*diff_size/img_size, img_size))
@unittest.skipIf(Common.GENERATING_MODE, 'Do not auto generate reference for test_fail')
def test_fail(self):
# The HTML reference is generated manually, which mismatches the PDF
# To test if the environment can detect any errors
# E.g. when network is down, 404 message is shown for any HTML message
with self.assertRaises(AssertionError):
self.run_test_case('test_fail.pdf', page_must_load=False)
def test_basic_text(self):
self.run_test_case('basic_text.pdf')
def test_geneve_1564(self):
self.run_test_case('geneve_1564.pdf')
def test_text_visibility(self):
self.run_test_case('text_visibility.pdf', ['--correct-text-visibility', 1])
def test_process_form(self):
self.run_test_case('with_form.pdf', ['--process-form', 1])

Binary file not shown.

View File

@ -0,0 +1,36 @@
\documentclass{article}
\begin{document}
Normal\hspace{10pt}{\tiny tiny}\hspace{10pt}Text
\pdfliteral{5 Ts}
Rise \\
\pdfliteral{0 Ts}
\pdfliteral{5 Tc}
CharSpace \\
\pdfliteral{0 Tc}
\pdfliteral{200 Tz}
Horizontal\hspace{10pt}Scale \\
\pdfliteral{100 Tz}
\vspace{3cm}
\pdfliteral{q}
\pdfliteral{0.71 0.71 -0.71 0.71 0 0 cm}
Rotated
\pdfliteral{5 Ts}
Rise \\
\pdfliteral{0 Ts}
\pdfliteral{5 Tc}
CharSpace \\
\pdfliteral{0 Tc}
\pdfliteral{200 Tz}
Horizontal\hspace{10pt}Scale \\
\pdfliteral{100 Tz}
\pdfliteral{Q}
\end{document}

View File

@ -0,0 +1,107 @@
<!DOCTYPE html>
<!-- Created by pdf2htmlEX (https://github.com/coolwanglu/pdf2htmlex) -->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8"/>
<meta name="generator" content="pdf2htmlEX"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1"/>
<style type="text/css">
/*!
* Base CSS for pdf2htmlEX
* Copyright 2012,2013 Lu Wang <coolwanglu@gmail.com>
* https://github.com/coolwanglu/pdf2htmlEX/blob/master/share/LICENSE
*/#sidebar{position:absolute;top:0;left:0;bottom:0;width:250px;padding:0;margin:0;overflow:auto}#page-container{position:absolute;top:0;left:0;margin:0;padding:0;border:0}@media screen{#sidebar.opened+#page-container{left:250px}#page-container{bottom:0;right:0;overflow:auto}.loading-indicator{display:none}.loading-indicator.active{display:block;position:absolute;width:64px;height:64px;top:50%;left:50%;margin-top:-32px;margin-left:-32px}.loading-indicator img{position:absolute;top:0;left:0;bottom:0;right:0}}@media print{@page{margin:0}html{margin:0}body{margin:0;-webkit-print-color-adjust:exact}#sidebar{display:none}#page-container{width:auto;height:auto;overflow:visible;background-color:transparent}.d{display:none}}.pf{position:relative;background-color:white;overflow:hidden;margin:0;border:0}.pc{position:absolute;border:0;padding:0;margin:0;top:0;left:0;width:100%;height:100%;overflow:hidden;display:block;transform-origin:0 0;-ms-transform-origin:0 0;-webkit-transform-origin:0 0}.pc.opened{display:block}.bf{position:absolute;border:0;margin:0;top:0;bottom:0;width:100%;height:100%;-ms-user-select:none;-moz-user-select:none;-webkit-user-select:none;user-select:none}.bi{position:absolute;border:0;margin:0;-ms-user-select:none;-moz-user-select:none;-webkit-user-select:none;user-select:none}@media print{.pf{margin:0;box-shadow:none;page-break-after:always;page-break-inside:avoid}@-moz-document url-prefix(){.pf{overflow:visible;border:1px solid #fff}.pc{overflow:visible}}}.c{position:absolute;border:0;padding:0;margin:0;overflow:hidden;display:block}.t{position:absolute;white-space:pre;font-size:1px;transform-origin:0 100%;-ms-transform-origin:0 100%;-webkit-transform-origin:0 100%;unicode-bidi:bidi-override;-moz-font-feature-settings:"liga" 0}.t:after{content:''}.t:before{content:'';display:inline-block}.t span{position:relative;unicode-bidi:bidi-override}._{display:inline-block;color:transparent;z-index:-1}::selection{background:rgba(127,255,255,0.4)}::-moz-selection{background:rgba(127,255,255,0.4)}.pi{display:none}.d{position:absolute;transform-origin:0 100%;-ms-transform-origin:0 100%;-webkit-transform-origin:0 100%}.it{border:0;background-color:rgba(255,255,255,0.0)}.ir:hover{cursor:pointer}</style>
<style type="text/css">
/* CSS for test cases */
#page-container {
overflow:hidden;
}
</style>
<style type="text/css">
.ff0{font-family:sans-serif;visibility:hidden;}
@font-face{font-family:ff1;src:url(f1.woff)format("woff");}.ff1{font-family:ff1;line-height:0.899000;font-style:normal;font-weight:normal;visibility:visible;}
@font-face{font-family:ff2;src:url(f2.woff)format("woff");}.ff2{font-family:ff2;line-height:0.882000;font-style:normal;font-weight:normal;visibility:visible;}
.m2{transform:matrix(0.231081,-0.231081,0.231081,0.231081,0,0);-ms-transform:matrix(0.231081,-0.231081,0.231081,0.231081,0,0);-webkit-transform:matrix(0.231081,-0.231081,0.231081,0.231081,0,0);}
.m0{transform:matrix(0.326797,0.000000,0.000000,0.326797,0,0);-ms-transform:matrix(0.326797,0.000000,0.000000,0.326797,0,0);-webkit-transform:matrix(0.326797,0.000000,0.000000,0.326797,0,0);}
.m3{transform:matrix(0.462161,-0.462161,0.231081,0.231081,0,0);-ms-transform:matrix(0.462161,-0.462161,0.231081,0.231081,0,0);-webkit-transform:matrix(0.462161,-0.462161,0.231081,0.231081,0,0);}
.m1{transform:matrix(0.653595,0.000000,0.000000,0.326797,0,0);-ms-transform:matrix(0.653595,0.000000,0.000000,0.326797,0,0);-webkit-transform:matrix(0.653595,0.000000,0.000000,0.326797,0,0);}
.v0{vertical-align:0.000000px;}
.ls0{letter-spacing:0.000000px;}
.ls1{letter-spacing:20.000000px;}
.ls2{letter-spacing:20.081833px;}
.sc_{text-shadow:none;}
.sc0{text-shadow:-0.015em 0 transparent,0 0.015em transparent,0.015em 0 transparent,0 -0.015em transparent;}
@media screen and (-webkit-min-device-pixel-ratio:0){
.sc_{-webkit-text-stroke:0px transparent;}
.sc0{-webkit-text-stroke:0.015em transparent;text-shadow:none;}
}
.ws0{word-spacing:0.000000px;}
._1{margin-left:-3.997949px;}
._2{margin-left:-1.115811px;}
._0{width:39.850619px;}
.fc0{color:rgb(0,0,0);}
.fs1{font-size:19.925200px;}
.fs0{font-size:39.850400px;}
.fs2{font-size:40.013453px;}
.y8{bottom:210.583007px;}
.y7{bottom:628.221281px;}
.y6{bottom:650.413190px;}
.y4{bottom:672.604170px;}
.y5{bottom:715.900248px;}
.y3{bottom:796.620915px;}
.y2{bottom:827.875817px;}
.y0{bottom:859.130719px;}
.y1{bottom:865.666667px;}
.h1{height:28.094532px;}
.h2{height:28.209484px;}
.h0{height:1035.294118px;}
.w0{width:800.000000px;}
.x0{left:194.394771px;}
.x2{left:199.825216px;}
.x4{left:222.016196px;}
.x3{left:233.840248px;}
.x5{left:244.208105px;}
.x1{left:312.996078px;}
.x6{left:396.252288px;}
@media print{
.v0{vertical-align:0.000000pt;}
.ls0{letter-spacing:0.000000pt;}
.ls1{letter-spacing:20.400000pt;}
.ls2{letter-spacing:20.483469pt;}
.ws0{word-spacing:0.000000pt;}
._1{margin-left:-4.077908pt;}
._2{margin-left:-1.138127pt;}
._0{width:40.647631pt;}
.fs1{font-size:20.323704pt;}
.fs0{font-size:40.647408pt;}
.fs2{font-size:40.813722pt;}
.y8{bottom:214.794667pt;}
.y7{bottom:640.785707pt;}
.y6{bottom:663.421453pt;}
.y4{bottom:686.056253pt;}
.y5{bottom:730.218253pt;}
.y3{bottom:812.553333pt;}
.y2{bottom:844.433333pt;}
.y0{bottom:876.313333pt;}
.y1{bottom:882.980000pt;}
.h1{height:28.656423pt;}
.h2{height:28.773674pt;}
.h0{height:1056.000000pt;}
.w0{width:816.000000pt;}
.x0{left:198.282667pt;}
.x2{left:203.821720pt;}
.x4{left:226.456520pt;}
.x3{left:238.517053pt;}
.x5{left:249.092267pt;}
.x1{left:319.256000pt;}
.x6{left:404.177333pt;}
}
</style>
<title></title>
</head>
<body>
<div id="page-container">
<div id="pf1" class="pf w0 h0" data-page-no="1"><div class="pc pc1 w0 h0"><div class="t m0 x0 h1 y0 ff1 fs0 fc0 sc0 ls0 ws0">Normal<span class="_ _0"> </span><span class="ff2 fs1">tiny<span class="_ _0"> </span></span>T<span class="_ _1"></span>ext</div><div class="t m0 x1 h1 y1 ff1 fs0 fc0 sc0 ls0 ws0">Rise</div><div class="t m0 x0 h1 y2 ff1 fs0 fc0 sc0 ls1 ws0">CharSpace</div><div class="t m1 x0 h1 y3 ff1 fs0 fc0 sc0 ls0 ws0">Horizon<span class="_ _2"></span>tal<span class="_ _0"> </span>Scale</div><div class="t m2 x2 h2 y4 ff1 fs2 fc0 sc0 ls0 ws0">Rotated</div><div class="t m2 x3 h2 y5 ff1 fs2 fc0 sc0 ls0 ws0">Rise</div><div class="t m2 x4 h2 y6 ff1 fs2 fc0 sc0 ls2 ws0">CharSpace</div><div class="t m3 x5 h2 y7 ff1 fs2 fc0 sc0 ls0 ws0">Horizon<span class="_ _2"></span>tal<span class="_ _0"> </span>Scale</div><div class="t m0 x6 h1 y8 ff1 fs0 fc0 sc0 ls0 ws0">1</div></div><div class="pi" data-data='{"ctm":[1.307190,0.000000,0.000000,1.307190,0.000000,0.000000]}'></div></div>
</div>
</body>
</html>

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 174 KiB

Binary file not shown.

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@ -0,0 +1,9 @@
\documentclass{article}
\begin{document}
\Huge
The quick brown fox jumps over the lazy dog
The quick brown fox jumps over the lazy dog
The quick brown fox jumps over the lazy dog
\end{document}

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 109 KiB

Binary file not shown.

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.1 KiB

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,216 @@
<!DOCTYPE html>
<!-- Created by pdf2htmlEX (https://github.com/coolwanglu/pdf2htmlex) -->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8"/>
<meta name="generator" content="pdf2htmlEX"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1"/>
<style type="text/css">
/*!
* Base CSS for pdf2htmlEX
* Copyright 2012,2013 Lu Wang <coolwanglu@gmail.com>
* https://github.com/coolwanglu/pdf2htmlEX/blob/master/share/LICENSE
*/#sidebar{position:absolute;top:0;left:0;bottom:0;width:250px;padding:0;margin:0;overflow:auto}#page-container{position:absolute;top:0;left:0;margin:0;padding:0;border:0}@media screen{#sidebar.opened+#page-container{left:250px}#page-container{bottom:0;right:0;overflow:auto}.loading-indicator{display:none}.loading-indicator.active{display:block;position:absolute;width:64px;height:64px;top:50%;left:50%;margin-top:-32px;margin-left:-32px}.loading-indicator img{position:absolute;top:0;left:0;bottom:0;right:0}}@media print{@page{margin:0}html{margin:0}body{margin:0;-webkit-print-color-adjust:exact}#sidebar{display:none}#page-container{width:auto;height:auto;overflow:visible;background-color:transparent}.d{display:none}}.pf{position:relative;background-color:white;overflow:hidden;margin:0;border:0}.pc{position:absolute;border:0;padding:0;margin:0;top:0;left:0;width:100%;height:100%;overflow:hidden;display:block;transform-origin:0 0;-ms-transform-origin:0 0;-webkit-transform-origin:0 0}.pc.opened{display:block}.bf{position:absolute;border:0;margin:0;top:0;bottom:0;width:100%;height:100%;-ms-user-select:none;-moz-user-select:none;-webkit-user-select:none;user-select:none}.bi{position:absolute;border:0;margin:0;-ms-user-select:none;-moz-user-select:none;-webkit-user-select:none;user-select:none}@media print{.pf{margin:0;box-shadow:none;page-break-after:always;page-break-inside:avoid}@-moz-document url-prefix(){.pf{overflow:visible;border:1px solid #fff}.pc{overflow:visible}}}.c{position:absolute;border:0;padding:0;margin:0;overflow:hidden;display:block}.t{position:absolute;white-space:pre;font-size:1px;transform-origin:0 100%;-ms-transform-origin:0 100%;-webkit-transform-origin:0 100%;unicode-bidi:bidi-override;-moz-font-feature-settings:"liga" 0}.t:after{content:''}.t:before{content:'';display:inline-block}.t span{position:relative;unicode-bidi:bidi-override}._{display:inline-block;color:transparent;z-index:-1}::selection{background:rgba(127,255,255,0.4)}::-moz-selection{background:rgba(127,255,255,0.4)}.pi{display:none}.d{position:absolute;transform-origin:0 100%;-ms-transform-origin:0 100%;-webkit-transform-origin:0 100%}.it{border:0;background-color:rgba(255,255,255,0.0)}.ir:hover{cursor:pointer}</style>
<style type="text/css">
/* CSS for test cases */
#page-container {
overflow:hidden;
}
</style>
<style type="text/css">
.ff0{font-family:sans-serif;visibility:hidden;}
@font-face{font-family:ff1;src:url(f1.woff)format("woff");}.ff1{font-family:ff1;line-height:1.108000;font-style:normal;font-weight:normal;visibility:visible;}
@font-face{font-family:ff2;src:url(f2.woff)format("woff");}.ff2{font-family:ff2;line-height:0.889000;font-style:normal;font-weight:normal;visibility:visible;}
@font-face{font-family:ff3;src:url(f3.woff)format("woff");}.ff3{font-family:ff3;line-height:1.326000;font-style:normal;font-weight:normal;visibility:visible;}
@font-face{font-family:ff4;src:url(f4.woff)format("woff");}.ff4{font-family:ff4;line-height:1.170898;font-style:normal;font-weight:normal;visibility:visible;}
@font-face{font-family:ff5;src:url(f5.woff)format("woff");}.ff5{font-family:ff5;line-height:1.254000;font-style:normal;font-weight:normal;visibility:visible;}
@font-face{font-family:ff6;src:url(f6.woff)format("woff");}.ff6{font-family:ff6;line-height:1.292000;font-style:normal;font-weight:normal;visibility:visible;}
.m1{transform:matrix(0.000000,-0.326797,0.326797,0.000000,0,0);-ms-transform:matrix(0.000000,-0.326797,0.326797,0.000000,0,0);-webkit-transform:matrix(0.000000,-0.326797,0.326797,0.000000,0,0);}
.m0{transform:matrix(0.326797,0.000000,0.000000,0.326797,0,0);-ms-transform:matrix(0.326797,0.000000,0.000000,0.326797,0,0);-webkit-transform:matrix(0.326797,0.000000,0.000000,0.326797,0,0);}
.v0{vertical-align:0.000000px;}
.ls1{letter-spacing:-0.480000px;}
.ls0{letter-spacing:0.000000px;}
.ls3{letter-spacing:4.326400px;}
.ls2{letter-spacing:105.216000px;}
.sc_{text-shadow:none;}
.sc0{text-shadow:-0.015em 0 transparent,0 0.015em transparent,0.015em 0 transparent,0 -0.015em transparent;}
@media screen and (-webkit-min-device-pixel-ratio:0){
.sc_{-webkit-text-stroke:0px transparent;}
.sc0{-webkit-text-stroke:0.015em transparent;text-shadow:none;}
}
.ws0{word-spacing:0.000000px;}
.ws1{word-spacing:103.742400px;}
.ws2{word-spacing:108.590400px;}
._9{margin-left:-123.403200px;}
._6{margin-left:-118.555200px;}
._8{margin-left:-99.360000px;}
._5{margin-left:-94.512000px;}
._c{margin-left:-13.520000px;}
._7{margin-left:-11.280000px;}
._3{margin-left:-6.240000px;}
._a{margin-left:-4.996000px;}
._0{margin-left:-2.640000px;}
._e{margin-left:-1.560000px;}
._1{width:9.412000px;}
._d{width:31.564000px;}
._2{width:49.400000px;}
._4{width:103.728000px;}
._b{width:108.576000px;}
.fc1{color:rgb(255,255,255);}
.fc0{color:rgb(0,0,0);}
.fs1{font-size:28.000000px;}
.fs0{font-size:48.000000px;}
.fs4{font-size:52.000000px;}
.fs2{font-size:56.000000px;}
.fs3{font-size:116.000000px;}
.y2{bottom:22.652157px;}
.y0{bottom:39.869281px;}
.y1{bottom:51.602614px;}
.y3{bottom:52.972549px;}
.y1b{bottom:74.683660px;}
.y1a{bottom:102.134641px;}
.y19{bottom:135.467974px;}
.y18{bottom:190.369935px;}
.y17{bottom:223.703268px;}
.y16{bottom:278.605229px;}
.y15{bottom:306.056209px;}
.y14{bottom:339.389542px;}
.y13{bottom:394.291503px;}
.y12{bottom:427.624837px;}
.y1c{bottom:449.921569px;}
.y11{bottom:482.526797px;}
.y10{bottom:515.860131px;}
.yf{bottom:598.213072px;}
.ye{bottom:623.703268px;}
.yd{bottom:649.193464px;}
.yc{bottom:674.683660px;}
.yb{bottom:700.173856px;}
.ya{bottom:755.075817px;}
.y9{bottom:780.566013px;}
.y8{bottom:806.056209px;}
.y7{bottom:831.546405px;}
.y6{bottom:857.036601px;}
.y5{bottom:916.539739px;}
.y4{bottom:966.777908px;}
.h3{height:19.124000px;}
.h2{height:42.720000px;}
.h7{height:49.344000px;}
.h6{height:55.640000px;}
.h4{height:59.920000px;}
.h5{height:109.259766px;}
.h1{height:924.836601px;}
.h0{height:1035.294118px;}
.w1{width:682.352941px;}
.w0{width:800.000000px;}
.x0{left:47.058824px;}
.x8{left:89.175817px;}
.x5{left:91.424837px;}
.x6{left:117.568627px;}
.x7{left:156.784314px;}
.x1{left:185.962092px;}
.x2{left:367.098039px;}
.x4{left:438.899346px;}
.x3{left:708.799869px;}
@media print{
.v0{vertical-align:0.000000pt;}
.ls1{letter-spacing:-0.489600pt;}
.ls0{letter-spacing:0.000000pt;}
.ls3{letter-spacing:4.412928pt;}
.ls2{letter-spacing:107.320320pt;}
.ws0{word-spacing:0.000000pt;}
.ws1{word-spacing:105.817248pt;}
.ws2{word-spacing:110.762208pt;}
._9{margin-left:-125.871264pt;}
._6{margin-left:-120.926304pt;}
._8{margin-left:-101.347200pt;}
._5{margin-left:-96.402240pt;}
._c{margin-left:-13.790400pt;}
._7{margin-left:-11.505600pt;}
._3{margin-left:-6.364800pt;}
._a{margin-left:-5.095920pt;}
._0{margin-left:-2.692800pt;}
._e{margin-left:-1.591200pt;}
._1{width:9.600240pt;}
._d{width:32.195280pt;}
._2{width:50.388000pt;}
._4{width:105.802560pt;}
._b{width:110.747520pt;}
.fs1{font-size:28.560000pt;}
.fs0{font-size:48.960000pt;}
.fs4{font-size:53.040000pt;}
.fs2{font-size:57.120000pt;}
.fs3{font-size:118.320000pt;}
.y2{bottom:23.105200pt;}
.y0{bottom:40.666667pt;}
.y1{bottom:52.634667pt;}
.y3{bottom:54.032000pt;}
.y1b{bottom:76.177333pt;}
.y1a{bottom:104.177333pt;}
.y19{bottom:138.177333pt;}
.y18{bottom:194.177333pt;}
.y17{bottom:228.177333pt;}
.y16{bottom:284.177333pt;}
.y15{bottom:312.177333pt;}
.y14{bottom:346.177333pt;}
.y13{bottom:402.177333pt;}
.y12{bottom:436.177333pt;}
.y1c{bottom:458.920000pt;}
.y11{bottom:492.177333pt;}
.y10{bottom:526.177333pt;}
.yf{bottom:610.177333pt;}
.ye{bottom:636.177333pt;}
.yd{bottom:662.177333pt;}
.yc{bottom:688.177333pt;}
.yb{bottom:714.177333pt;}
.ya{bottom:770.177333pt;}
.y9{bottom:796.177333pt;}
.y8{bottom:822.177333pt;}
.y7{bottom:848.177333pt;}
.y6{bottom:874.177333pt;}
.y5{bottom:934.870533pt;}
.y4{bottom:986.113467pt;}
.h3{height:19.506480pt;}
.h2{height:43.574400pt;}
.h7{height:50.330880pt;}
.h6{height:56.752800pt;}
.h4{height:61.118400pt;}
.h5{height:111.444961pt;}
.h1{height:943.333333pt;}
.h0{height:1056.000000pt;}
.w1{width:696.000000pt;}
.w0{width:816.000000pt;}
.x0{left:48.000000pt;}
.x8{left:90.959333pt;}
.x5{left:93.253333pt;}
.x6{left:119.920000pt;}
.x7{left:159.920000pt;}
.x1{left:189.681333pt;}
.x2{left:374.440000pt;}
.x4{left:447.677333pt;}
.x3{left:722.975867pt;}
}
</style>
<title></title>
</head>
<body>
<div id="page-container">
<div id="pf1" class="pf w0 h0" data-page-no="1"><div class="pc pc1 w0 h0"><img class="bi x0 y0 w1 h1" alt="" src="bg1.png"/><div class="t m0 x1 h2 y1 ff1 fs0 fc0 sc0 ls0 ws0">Éditions « <span class="_ _0"></span>À Reproduire » Internet : www<span class="_ _0"></span>.en<span class="_ _0"></span>volee.com</div><div class="t m0 x2 h3 y2 ff2 fs1 fc0 sc0 ls0 ws0">Question de textes 4</div><div class="t m0 x3 h4 y3 ff3 fs2 fc1 sc0 ls0 ws0">7</div><div class="t m0 x4 h2 y4 ff1 fs0 fc0 sc0 ls0 ws0">Nom</div><div class="t m0 x5 h5 y5 ff4 fs3 fc1 sc0 ls0 ws0">Mona veut un chien</div><div class="t m0 x6 h6 y6 ff5 fs4 fc0 sc0 ls0 ws0"> <span class="_ _1"> </span><span class="ff3 fc1">4</span> <span class="_ _2"> </span>Que veut dire la mère de Mona quand elle dit : « Cest beaucoup de travail, avoir un chien » <span class="_ _3"></span>?</div><div class="t m0 x6 h7 y7 ff6 fs0 fc0 sc0 ls0 ws0"> <span class="_ _4"> </span>a) <span class="_ _1"> </span>Mona doit faire plus de recherche avant dêtre prête à avoir un chien.</div><div class="t m0 x6 h7 y8 ff6 fs0 fc0 sc0 ls0 ws1"> b) <span class="_ _5"></span> <span class="_ _6"></span><span class="ls1 ws0">Les chiens travaillent fort afi<span class="_ _7"></span> <span class="_ _0"></span>n dêtre de bons animaux de compagnie pour les gens.</span></div><div class="t m0 x6 h7 y9 ff6 fs0 fc0 sc0 ls0 ws2"> c) <span class="_ _8"></span> <span class="_ _9"></span><span class="ls1 ws0">Les chiens ont besoin de nourriture et dexercice. Il faut aussi ramasser leurs dégâts.</span></div><div class="t m0 x6 h7 ya ff6 fs0 fc0 sc0 ls0 ws0"> <span class="_ _4"> </span>d) <span class="_ _1"> </span>Mona devra se reposer plus si elle a un chien.</div><div class="t m0 x6 h6 yb ff5 fs4 fc0 sc0 ls0 ws0"> <span class="_ _1"> </span><span class="ff3 fc1">5</span> <span class="_ _2"> </span>Comment Mona a-t-elle acquis autant de connaissances sur les chiens <span class="_ _3"></span>?</div><div class="t m0 x6 h7 yc ff6 fs0 fc0 sc0 ls0 ws0"> <span class="_ _4"> </span>a) <span class="_ _1"> </span>En écoutant son enseignante à lécole.</div><div class="t m0 x6 h7 yd ff6 fs0 fc0 sc0 ls0 ws0"> <span class="_ _4"> </span>b) <span class="_ _1"> </span>En visitant un site W<span class="_ _a"></span>eb.</div><div class="t m0 x6 h7 ye ff6 fs0 fc0 sc0 ls0 ws0"> <span class="_ _b"> </span>c) <span class="_ _1"> </span>En écrivant à la Société protectrice des animaux.</div><div class="t m0 x6 h7 yf ff6 fs0 fc0 sc0 ls0 ws0"> <span class="_ _4"> </span>d) <span class="_ _1"> </span>En regardant une vidéo sur les chiens.</div><div class="t m0 x6 h6 y10 ff5 fs4 fc0 sc0 ls0 ws0"> <span class="_ _1"> </span><span class="ff3 fc1">6</span> <span class="_ _2"> </span>Pourquoi Mona veut-elle un Jack Russell <span class="_ _3"></span>?</div><div class="t m0 x6 h7 y11 ff6 fs0 fc0 sc0 ls2 ws0"> </div><div class="t m0 x6 h6 y12 ff3 fs4 fc1 sc0 ls3 ws0"> 7<span class="_ _a"></span><span class="ff5 fc0 ls0"> <span class="_ _2"> </span>Que devra faire Mona même si elle est fatiguée ou quil pleut <span class="_ _3"></span>?</span></div><div class="t m0 x6 h7 y13 ff6 fs0 fc0 sc0 ls2 ws0"> </div><div class="t m0 x6 h6 y14 ff5 fs4 fc0 sc0 ls0 ws0"> <span class="_ _1"> </span><span class="ff3 fc1">8</span> <span class="_ _2"> </span>Décris physiquement le chien que Mona veut choisir<span class="_ _0"></span>.</div><div class="t m0 x6 h7 y15 ff6 fs0 fc0 sc0 ls2 ws0"> </div><div class="t m0 x7 h7 y16 ff6 fs0 fc0 sc0 ls0 ws0"> </div><div class="t m0 x6 h6 y17 ff3 fs4 fc1 sc0 ls3 ws0"> 9<span class="_ _a"></span><span class="ff5 fc0 ls0"> <span class="_ _2"> </span>Quand fait-elle ses recherches sur le Web <span class="_ _3"></span>?</span></div><div class="t m0 x6 h7 y18 ff6 fs0 fc0 sc0 ls2 ws0"> </div><div class="t m0 x6 h6 y19 ff3 fs4 fc1 sc0 ls0 ws0"> <span class="_ _c"></span>10<span class="ff5 fc0"> <span class="_ _d"> </span>Que contient le site W<span class="_ _e"></span>eb préféré de Mona <span class="_ _3"></span>? Nomme deux éléments.</span></div><div class="t m0 x6 h7 y1a ff6 fs0 fc0 sc0 ls2 ws0"> </div><div class="t m0 x7 h7 y1b ff6 fs0 fc0 sc0 ls0 ws0"> </div><div class="t m1 x8 h7 y1c ff6 fs0 fc0 sc0 ls0 ws0">REPÉRAGE</div><input id="text-1-0" class="it" type="text" value="" style="position: absolute; left: 0.000000px; bottom: 0.000000px; width: 0.000000px; height: 0.000000px; line-height: 0.000000px; font-size: 0.000000px;" />
<div id="cb-1-1" class="ir" style="position: absolute; left: 0.000000px; bottom: 0.000000px; width: 3.000000px; height: 3.000000px; background-size: cover;" ></div>
<div id="cb-1-2" class="ir" style="position: absolute; left: 0.000000px; bottom: 0.000000px; width: 3.000000px; height: 3.000000px; background-size: cover;" ></div>
<div id="cb-1-3" class="ir" style="position: absolute; left: 0.000000px; bottom: 0.000000px; width: 3.000000px; height: 3.000000px; background-size: cover;" ></div>
<div id="cb-1-4" class="ir" style="position: absolute; left: 0.000000px; bottom: 0.000000px; width: 3.000000px; height: 3.000000px; background-size: cover;" ></div>
<input id="text-1-5" class="it" type="text" value="" style="position: absolute; left: 0.000000px; bottom: 0.000000px; width: 0.000000px; height: 0.000000px; line-height: 0.000000px; font-size: 0.000000px;" />
<input id="text-1-6" class="it" type="text" value="" style="position: absolute; left: 0.000000px; bottom: 0.000000px; width: 0.000000px; height: 0.000000px; line-height: 0.000000px; font-size: 0.000000px;" />
<input id="text-1-7" class="it" type="text" value="" style="position: absolute; left: 0.000000px; bottom: 0.000000px; width: 0.000000px; height: 0.000000px; line-height: 0.000000px; font-size: 0.000000px;" />
<input id="text-1-8" class="it" type="text" value="" style="position: absolute; left: 0.000000px; bottom: 0.000000px; width: 0.000000px; height: 0.000000px; line-height: 0.000000px; font-size: 0.000000px;" />
<input id="text-1-9" class="it" type="text" value="" style="position: absolute; left: 0.000000px; bottom: 0.000000px; width: 0.000000px; height: 0.000000px; line-height: 0.000000px; font-size: 0.000000px;" />
<input id="text-1-a" class="it" type="text" value="" style="position: absolute; left: 0.000000px; bottom: 0.000000px; width: 0.000000px; height: 0.000000px; line-height: 0.000000px; font-size: 0.000000px;" />
<div id="cb-1-b" class="ir" style="position: absolute; left: 0.000000px; bottom: 0.000000px; width: 3.000000px; height: 3.000000px; background-size: cover;" ></div>
<div id="cb-1-c" class="ir" style="position: absolute; left: 0.000000px; bottom: 0.000000px; width: 3.000000px; height: 3.000000px; background-size: cover;" ></div>
<div id="cb-1-d" class="ir" style="position: absolute; left: 0.000000px; bottom: 0.000000px; width: 3.000000px; height: 3.000000px; background-size: cover;" ></div>
<div id="cb-1-e" class="ir" style="position: absolute; left: 0.000000px; bottom: 0.000000px; width: 3.000000px; height: 3.000000px; background-size: cover;" ></div>
<input id="text-1-f" class="it" type="text" value="" style="position: absolute; left: 0.000000px; bottom: 0.000000px; width: 0.000000px; height: 0.000000px; line-height: 0.000000px; font-size: 0.000000px;" />
</div><div class="pi" data-data='{"ctm":[1.307190,0.000000,0.000000,1.307190,0.000000,0.000000]}'></div></div>
</div>
</body>
</html>

4
test/fancy.min.css vendored Normal file
View File

@ -0,0 +1,4 @@
/* CSS for test cases */
#page-container {
overflow:hidden;
}

View File

@ -0,0 +1,3 @@
Open($1);
Generate($1:r+".woff");
Quit(0);

24
test/old/test.py Executable file
View File

@ -0,0 +1,24 @@
#!/usr/bin/env python
DIR = 'pdf'
#DIR = '../../pdf.js/test/pdfs'
import os
import sys
with open('out.html','w') as outf:
outf.write('<!DOCTYPE html>\n<html><head><meta charset=\"utf-8\"></head><body><div style="position:absolute;top:0;left:0;width:80%;height:100%;"><iframe width="100%" height="100%" name="pdf"></iframe></div><div style="position:absolute;top:0;right:0;width:20%;height:100%;overflow:auto;text-align:right;">')
for f in os.listdir(DIR):
if not f.lower().endswith('.pdf'):
continue
print f
if os.system('pdf2htmlEX -l 10 --no-drm 1 --fit-width 1024 --dest-dir html --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) != 0:
print "error on ", f
sys.exit(-1)
ff = f[:-3]
outf.write('<a href="html/%shtml" target="pdf">%s</a><br/>' % (ff,ff))
outf.flush();
outf.write('</div></body></html>')

2
test/start_xvfb.sh Executable file
View File

@ -0,0 +1,2 @@
#!/bin/sh
/sbin/start-stop-daemon --start --quiet --pidfile /tmp/custom_xvfb_99.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -ac -screen 0 1280x1920x16

View File

@ -1,24 +1,129 @@
#!/usr/bin/env python
DIR = 'pdf'
#DIR = '../../pdf.js/test/pdfs'
import unittest
import os
import sys
import tempfile
import shutil
import subprocess
with open('out.html','w') as outf:
outf.write('<!DOCTYPE html>\n<html><head><meta charset=\"utf-8\"></head><body><div style="position:absolute;top:0;left:0;width:80%;height:100%;"><iframe width="100%" height="100%" name="pdf"></iframe></div><div style="position:absolute;top:0;right:0;width:20%;height:100%;overflow:auto;text-align:right;">')
class Common(object):
SRC_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
TEST_DIR = os.path.join(SRC_DIR, 'test')
DATA_DIR = os.path.join(SRC_DIR, 'share')
PDF2HTMLEX_PATH = os.path.join(SRC_DIR, 'pdf2htmlEX')
for f in os.listdir(DIR):
if not f.lower().endswith('.pdf'):
continue
print f
if os.system('pdf2htmlEX -l 10 --no-drm 1 --fit-width 1024 --dest-dir html --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) != 0:
print "error on ", f
sys.exit(-1)
SAVE_TMP = bool(os.environ.get('P2H_TEST_SAVE_TMP'))
GENERATING_MODE = bool(os.environ.get('P2H_TEST_GEN'))
ff = f[:-3]
outf.write('<a href="html/%shtml" target="pdf">%s</a><br/>' % (ff,ff))
outf.flush();
CANONICAL_TEMPDIR = '/tmp/pdf2htmlEX_test'
def setUp(self):
if not self.SAVE_TMP:
self.cur_temp_dir = tempfile.mkdtemp(prefix='pdf2htmlEX_test')
else:
shutil.rmtree(self.CANONICAL_TEMPDIR, True)
os.mkdir(self.CANONICAL_TEMPDIR)
self.cur_temp_dir = self.CANONICAL_TEMPDIR
outf.write('</div></body></html>')
self.cur_data_dir = os.path.join(self.cur_temp_dir, 'share')
self.cur_output_dir = os.path.join(self.cur_temp_dir, 'out')
os.mkdir(self.cur_data_dir)
os.mkdir(self.cur_output_dir)
# filter manifest
with open(os.path.join(self.DATA_DIR, 'manifest')) as inf:
with open(os.path.join(self.cur_data_dir, 'manifest'), 'w') as outf:
ignore = False
for line in inf:
if ignore:
if line.startswith('#TEST_IGNORE_END'):
ignore = False
elif line.startswith('#TEST_IGNORE_BEGIN'):
ignore = True
else:
outf.write(line)
# copy files
shutil.copy(os.path.join(self.DATA_DIR, 'base.min.css'),
os.path.join(self.cur_data_dir, 'base.min.css'))
shutil.copy(os.path.join(self.TEST_DIR, 'fancy.min.css'),
os.path.join(self.cur_data_dir, 'fancy.min.css'))
def tearDown(self):
if not self.SAVE_TMP:
shutil.rmtree(self.cur_temp_dir, True)
def run_pdf2htmlEX(self, args):
"""
Execute the pdf2htmlEX with the specified arguments.
:type args: list of values
:param args: list of arguments to pass to executable.
:return: an object of relevant info
"""
args = [self.PDF2HTMLEX_PATH,
'--data-dir', self.cur_data_dir,
'--dest-dir', self.cur_output_dir
] + args
with open(os.devnull, 'w') as fnull:
return_code = subprocess.call(list(map(str, args)), stderr=fnull)
self.assertEquals(return_code, 0, 'cannot execute pdf2htmlEX')
files = os.listdir(self.cur_output_dir)
return {
'return_code' : return_code,
'output_files' : files
}
if __name__ == '__main__':
if not os.path.isfile(Common.PDF2HTMLEX_PATH) or not os.access(Common.PDF2HTMLEX_PATH, os.X_OK):
print >> sys.stderr, "Cannot locate pdf2htmlEX executable. Make sure source was built before running this test."
exit(1)
suites = []
loader = unittest.TestLoader()
all_modules = []
all_modules.append(__import__('test_output'))
all_modules.append(__import__('test_local_browser'))
all_classes = ['test_output', 'test_local_browser']
if bool(os.environ.get('P2H_TEST_REMOTE')):
m = __import__('test_remote_browser')
all_modules.append(m)
all_classes += m.test_classnames
test_names = []
for name in sys.argv[1:]:
if name.find('.') != -1:
test_names.append(name)
else:
for m in all_classes:
test_names.append(m + '.' + name)
for module in all_modules:
if len(test_names) > 0:
for n in test_names:
try:
suites.append(loader.loadTestsFromName(n, module))
except:
pass
else:
suites.append(loader.loadTestsFromModule(module))
if len(suites) == 0:
print >>sys.stderr, 'No test found'
exit(1)
failure_count = 0
runner = unittest.TextTestRunner(verbosity=2)
for suite in suites:
result = runner.run(suite)
failure_count += len(result.errors) + len(result.failures)
exit(failure_count)

37
test/test_local_browser.py Executable file
View File

@ -0,0 +1,37 @@
#!/usr/bin/env python
# Run browsers tests with a local Firefox
import unittest
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
from browser_tests import BrowserTests
class test_local_browser(BrowserTests, unittest.TestCase):
@classmethod
def setUpClass(cls):
super(test_local_browser, cls).setUpClass()
if not cls.GENERATING_MODE:
cls.browser = webdriver.Firefox()
cls.browser.maximize_window()
size = cls.browser.get_window_size()
assert ((size['width'] >= cls.BROWSER_WIDTH) and (size['height'] >= cls.BROWSER_HEIGHT)), 'Screen is not large enough'
cls.browser.set_window_size(cls.BROWSER_WIDTH, cls.BROWSER_HEIGHT)
@classmethod
def tearDownClass(cls):
if not cls.GENERATING_MODE:
cls.browser.quit()
super(test_local_browser, cls).tearDownClass()
def generate_image(self, html_file, png_file, page_must_load=True):
self.browser.get('file://' + html_file)
try:
WebDriverWait(self.browser, 5).until(expected_conditions.presence_of_element_located((By.ID, 'page-container')))
except:
if page_must_load:
raise
self.browser.save_screenshot(png_file)

View File

@ -1,262 +0,0 @@
#!/usr/bin/env python
import unittest
import os
import sys
import tempfile
import shutil
import subprocess
# We assume that this file is put inside SRC_DIR/test
TEST_DIR = os.path.dirname(__file__)
# The location where our test PDFs are stored
TEST_DATA_DIR = os.path.join(TEST_DIR, 'test_data')
# The location where the base css file, etc is stored in the build folder
DATA_DIR = os.path.join(TEST_DIR, '../share')
# The script should be run in the directory containing the binary
# The location where the executable is generated by the build
PDF2HTMLEX_PATH = './pdf2htmlEX'
def execute_pdf2htmlex_with_args(args):
"""
Execute the pdf2htmlEX with the specified arguments.
:type args: list of values
:param args: list of arguments to pass to executable. First part of each tuple is the argument, second part is the value.
:rtype: int
:return: The exit code of the command
"""
executable = os.path.abspath(PDF2HTMLEX_PATH)
cmd = [executable, '--data-dir', os.path.abspath(DATA_DIR)]
for val in args:
cmd.append(str(val))
return_code = subprocess.call(cmd)
if return_code != 0:
print >> sys.stderr, "Command return code %d: %s" % (return_code, ' '.join(cmd))
return return_code
def execute_pdf2htmlex_and_get_files(args):
"""
Execute the pdf2htmlEX with the specified arguments, and get the names of the output files. Will automatically create
a temporary directory for the output, pass that as the output dir to pdf2htmlEX, determine the files generated, and
clean up the temporary directory afterwards.
:type args: list of values
:param args: list of arguments to pass to executable. First part of each tuple is the argument, second part is the value.
:rtype: list of str
:return: List of the file names that were generated as output in alphabetical order. None if the command does not execute successfully.
"""
temp_dir = tempfile.mkdtemp()
try:
if execute_pdf2htmlex_with_args(['--dest-dir', temp_dir] + args) != 0:
return None
files = os.listdir(temp_dir)
files.sort()
return files
finally:
shutil.rmtree(path=temp_dir, ignore_errors=True)
def path_to_test_file(filename):
"""
Retrieve an absolute path to the specified test file.
:type filename:
:param filename: the name of the test file to get the path to
:rtype: str
:returns: the full path to the test file
"""
return os.path.abspath(os.path.join(TEST_DATA_DIR, filename))
class OutputNamingTests(unittest.TestCase):
def test_generate_single_html_default_name_single_page_pdf(self):
files = execute_pdf2htmlex_and_get_files([
path_to_test_file('1-page.pdf')
])
self.assertEquals(files, ['1-page.html'])
def test_generate_single_html_default_name_multiple_page_pdf(self):
files = execute_pdf2htmlex_and_get_files([
path_to_test_file('2-pages.pdf')
])
self.assertEquals(files, ['2-pages.html'])
def test_generate_single_html_specify_name_single_page_pdf(self):
files = execute_pdf2htmlex_and_get_files([
path_to_test_file('1-page.pdf'),
'foo.html'
])
self.assertEquals(files, ['foo.html'])
def test_generate_single_html_specify_name_multiple_page_pdf(self):
files = execute_pdf2htmlex_and_get_files([
path_to_test_file('2-pages.pdf'),
'foo.html'
])
self.assertEquals(files, ['foo.html'])
def test_generate_split_pages_default_name_single_page(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
path_to_test_file('1-page.pdf')
])
self.assertEquals(files, sorted(['1-page.html', '1-page1.page']))
def test_generate_split_pages_default_name_multiple_pages(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
path_to_test_file('3-pages.pdf')
])
self.assertEquals(files, sorted(['3-pages.html', '3-pages1.page', '3-pages2.page', '3-pages3.page']))
def test_generate_split_pages_specify_name_single_page(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'foo.xyz',
path_to_test_file('1-page.pdf'),
])
self.assertEquals(files, sorted(['1-page.html', 'foo1.xyz']))
def test_generate_split_pages_specify_name_multiple_pages(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'foo.xyz',
path_to_test_file('3-pages.pdf'),
])
self.assertEquals(files, sorted(['3-pages.html', 'foo1.xyz', 'foo2.xyz', 'foo3.xyz']))
def test_generate_split_pages_specify_name_formatter_multiple_pages(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'fo%do.xyz',
path_to_test_file('3-pages.pdf'),
])
self.assertEquals(files, sorted(['3-pages.html', 'fo1o.xyz', 'fo2o.xyz', 'fo3o.xyz']))
def test_generate_split_pages_specify_name_formatter_with_padded_zeros_multiple_pages(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'fo%03do.xyz',
path_to_test_file('3-pages.pdf')
])
self.assertEquals(files, sorted(['3-pages.html', 'fo001o.xyz', 'fo002o.xyz', 'fo003o.xyz']))
def test_generate_split_pages_specify_name_only_first_formatter_gets_taken(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'f%do%do.xyz',
path_to_test_file('3-pages.pdf')
])
self.assertEquals(files, sorted(['3-pages.html', 'f1o%do.xyz', 'f2o%do.xyz', 'f3o%do.xyz']))
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_s(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'f%soo.xyz',
path_to_test_file('3-pages.pdf')
])
self.assertEquals(files, sorted(['3-pages.html', 'f%soo1.xyz', 'f%soo2.xyz', 'f%soo3.xyz']))
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_p(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'f%poo.xyz',
path_to_test_file('3-pages.pdf'),
])
self.assertEquals(files, sorted(['3-pages.html', 'f%poo1.xyz', 'f%poo2.xyz', 'f%poo3.xyz']))
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_n(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'f%noo.xyz',
path_to_test_file('3-pages.pdf')
])
self.assertEquals(files, sorted(['3-pages.html', 'f%noo1.xyz', 'f%noo2.xyz', 'f%noo3.xyz']))
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_percent(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'f%%oo.xyz',
path_to_test_file('3-pages.pdf')
])
self.assertEquals(files, sorted(['3-pages.html', 'f%%oo1.xyz', 'f%%oo2.xyz', 'f%%oo3.xyz']))
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_percent_with_actual_placeholder(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'f%%o%do.xyz',
path_to_test_file('3-pages.pdf')
])
self.assertEquals(files, sorted(['3-pages.html', 'f%%o1o.xyz', 'f%%o2o.xyz', 'f%%o3o.xyz']))
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_percent_with_actual_placeholder(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'fo%do%%.xyz',
path_to_test_file('3-pages.pdf')
])
self.assertEquals(files, sorted(['3-pages.html', 'fo1o%%.xyz', 'fo2o%%.xyz', 'fo3o%%.xyz']))
def test_generate_split_pages_specify_name_only_formatter_starts_part_way_through_invalid_formatter(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'f%02%doo.xyz',
path_to_test_file('3-pages.pdf'),
])
self.assertEquals(files, sorted(['3-pages.html', 'f%021oo.xyz', 'f%022oo.xyz', 'f%023oo.xyz']))
def test_generate_split_pages_specify_output_filename_no_formatter_no_extension(self):
files = execute_pdf2htmlex_and_get_files([
'--split-pages', 1,
'--page-filename', 'foo',
path_to_test_file('1-page.pdf'),
])
self.assertEquals(files, sorted(['1-page.html', 'foo1']))
def test_generate_single_html_name_specified_format_characters_percent_d(self):
files = execute_pdf2htmlex_and_get_files([
path_to_test_file('2-pages.pdf'),
'foo%d.html'
])
self.assertEquals(files, ['foo%d.html'])
def test_generate_single_html_name_specified_format_characters_percent_p(self):
files = execute_pdf2htmlex_and_get_files([
path_to_test_file('2-pages.pdf'),
'foo%p.html'
])
self.assertEquals(files, ['foo%p.html'])
def test_generate_single_html_name_specified_format_characters_percent_n(self):
files = execute_pdf2htmlex_and_get_files([
path_to_test_file('2-pages.pdf'),
'foo%n.html'
])
self.assertEquals(files, ['foo%n.html'])
def test_generate_single_html_name_specified_format_characters_percent_percent(self):
files = execute_pdf2htmlex_and_get_files([
path_to_test_file('2-pages.pdf'),
'foo%%.html'
])
self.assertEquals(files, ['foo%%.html'])
if __name__=="__main__":
executable = os.path.abspath(PDF2HTMLEX_PATH)
if not os.path.isfile(executable) or not os.access(executable, os.X_OK):
print >> sys.stderr, "Cannot locate pdf2htmlEX executable. Make sure source was built before running this test."
exit(1)
unittest.main()

86
test/test_output.py Normal file
View File

@ -0,0 +1,86 @@
#!/usr/bin/env python
# Test output files
import unittest
import os
from test import Common
@unittest.skipIf(Common.GENERATING_MODE, 'Skipping test_output in generating mode')
class test_output(Common, unittest.TestCase):
def run_test_case(self, input_file, expected_output_files, args=[]):
args = list(args)
args.insert(0, os.path.join(self.TEST_DIR, 'test_output', input_file))
self.assertItemsEqual(self.run_pdf2htmlEX(args)['output_files'], expected_output_files)
def test_generate_single_html_default_name_single_page_pdf(self):
self.run_test_case('1-page.pdf', ['1-page.html'])
def test_generate_single_html_default_name_multiple_page_pdf(self):
self.run_test_case('2-pages.pdf', ['2-pages.html'])
def test_generate_single_html_specify_name_single_page_pdf(self):
self.run_test_case('1-page.pdf', ['foo.html'], ['foo.html'])
def test_generate_single_html_specify_name_multiple_page_pdf(self):
self.run_test_case('2-pages.pdf', ['foo.html'], ['foo.html'])
def test_generate_split_pages_default_name_single_page(self):
self.run_test_case('1-page.pdf', ['1-page.html', '1-page1.page'], ['--split-pages', 1])
def test_generate_split_pages_default_name_multiple_pages(self):
self.run_test_case('3-pages.pdf', ['3-pages.html', '3-pages1.page', '3-pages2.page', '3-pages3.page'], ['--split-pages', 1])
def test_generate_split_pages_specify_name_single_page(self):
self.run_test_case('1-page.pdf', ['1-page.html', 'foo1.xyz'], ['--split-pages', 1, '--page-filename', 'foo.xyz'])
def test_generate_split_pages_specify_name_multiple_pages(self):
self.run_test_case('3-pages.pdf', ['3-pages.html', 'foo1.xyz', 'foo2.xyz', 'foo3.xyz'], ['--split-pages', 1, '--page-filename', 'foo.xyz'])
def test_generate_split_pages_specify_name_formatter_multiple_pages(self):
self.run_test_case('3-pages.pdf', ['3-pages.html', 'fo1o.xyz', 'fo2o.xyz', 'fo3o.xyz'], ['--split-pages', 1, '--page-filename', 'fo%do.xyz'])
def test_generate_split_pages_specify_name_formatter_with_padded_zeros_multiple_pages(self):
self.run_test_case('3-pages.pdf', ['3-pages.html', 'fo001o.xyz', 'fo002o.xyz', 'fo003o.xyz'], ['--split-pages', 1, '--page-filename', 'fo%03do.xyz'])
def test_generate_split_pages_specify_name_only_first_formatter_gets_taken(self):
self.run_test_case('3-pages.pdf', ['3-pages.html', 'f1o%do.xyz', 'f2o%do.xyz', 'f3o%do.xyz'], ['--split-pages', 1, '--page-filename', 'f%do%do.xyz'])
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_s(self):
self.run_test_case('3-pages.pdf', ['3-pages.html', 'f%soo1.xyz', 'f%soo2.xyz', 'f%soo3.xyz'], ['--split-pages', 1, '--page-filename', 'f%soo.xyz'])
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_p(self):
self.run_test_case('3-pages.pdf', ['3-pages.html', 'f%poo1.xyz', 'f%poo2.xyz', 'f%poo3.xyz'], ['--split-pages', 1, '--page-filename', 'f%poo.xyz'])
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_n(self):
self.run_test_case('3-pages.pdf', ['3-pages.html', 'f%noo1.xyz', 'f%noo2.xyz', 'f%noo3.xyz'], ['--split-pages', 1, '--page-filename', 'f%noo.xyz'])
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_percent(self):
self.run_test_case('3-pages.pdf', ['3-pages.html', 'f%%oo1.xyz', 'f%%oo2.xyz', 'f%%oo3.xyz'], ['--split-pages', 1, '--page-filename', 'f%%oo.xyz'])
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_percent_with_actual_placeholder(self):
self.run_test_case('3-pages.pdf', ['3-pages.html', 'f%%o1o.xyz', 'f%%o2o.xyz', 'f%%o3o.xyz'], ['--split-pages', 1, '--page-filename', 'f%%o%do.xyz'])
def test_generate_split_pages_specify_name_only_percent_d_is_used_percent_percent_with_actual_placeholder(self):
self.run_test_case('3-pages.pdf', ['3-pages.html', 'fo1o%%.xyz', 'fo2o%%.xyz', 'fo3o%%.xyz'], ['--split-pages', 1, '--page-filename', 'fo%do%%.xyz'])
def test_generate_split_pages_specify_name_only_formatter_starts_part_way_through_invalid_formatter(self):
self.run_test_case('3-pages.pdf', ['3-pages.html', 'f%021oo.xyz', 'f%022oo.xyz', 'f%023oo.xyz'], ['--split-pages', 1, '--page-filename', 'f%02%doo.xyz'])
def test_generate_split_pages_specify_output_filename_no_formatter_no_extension(self):
self.run_test_case('1-page.pdf', ['1-page.html', 'foo1'], ['--split-pages', 1, '--page-filename', 'foo'])
def test_generate_single_html_name_specified_format_characters_percent_d(self):
self.run_test_case('2-pages.pdf', ['foo%d.html'], ['foo%d.html'])
def test_generate_single_html_name_specified_format_characters_percent_p(self):
self.run_test_case('2-pages.pdf', ['foo%p.html'], ['foo%p.html'])
def test_generate_single_html_name_specified_format_characters_percent_n(self):
self.run_test_case('2-pages.pdf', ['foo%n.html'], ['foo%n.html'])
def test_generate_single_html_name_specified_format_characters_percent_percent(self):
self.run_test_case('2-pages.pdf', ['foo%%.html'], ['foo%%.html'])

138
test/test_remote_browser.py Executable file
View File

@ -0,0 +1,138 @@
#!/usr/bin/env python
# Run browser tests through Sauce Labs
import unittest
import sys
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
from sauceclient import SauceClient
from browser_tests import BrowserTests
# Set your own environment variables
USERNAME = os.environ.get('SAUCE_USERNAME')
ACCESS_KEY = os.environ.get('SAUCE_ACCESS_KEY')
# The base url that remote browser will access
# Usually a HTTP server should be set up in the folder containing the test cases
# Also Sauce Connect should be enabled
BASEURL='http://localhost:8000/'
SAUCE_OPTIONS = {
'record-video': 'false',
}
# we want to test the latest stable version
# and 'beta' is usually the best estimation
BROWSER_MATRIX = [
('win_ie', {
'platform': 'Windows 8.1',
'browserName': 'internet explorer',
'version': '11',
}),
('win_firefox', {
'platform': 'Windows 8.1',
'browserName': 'firefox',
'version': 'beta',
}),
('win_chrome', {
'platform': 'Windows 8.1',
'browserName': 'chrome',
'version': 'beta',
}),
('mac_firefox', {
'platform': 'OS X 10.9',
'browserName': 'firefox',
'version': 'beta',
}),
('mac_chrome', {
'platform': 'OS X 10.9',
'browserName': 'chrome',
'version': '40.0', # beta is not supported
}),
('linux_firefox', {
'platform': 'Linux',
'browserName': 'firefox',
'version': 'beta',
}),
('linux_chrome', {
'platform': 'Linux',
'browserName': 'chrome',
'version': 'beta',
}),
]
@unittest.skipIf((not (USERNAME and ACCESS_KEY)), 'Sauce Labs is not available')
class test_remote_browser_base(BrowserTests):
@classmethod
def setUpClass(cls):
super(test_remote_browser_base, cls).setUpClass()
if not cls.GENERATING_MODE:
cls.sauce = SauceClient(USERNAME, ACCESS_KEY)
cls.sauce_url = 'http://%s:%s@ondemand.saucelabs.com:80/wd/hub' % (USERNAME, ACCESS_KEY)
cls.browser = webdriver.Remote(
desired_capabilities=cls.desired_capabilities,
command_executor=cls.sauce_url
)
cls.browser.implicitly_wait(30)
# remote screen may not be large enough for the whole page
cls.browser.set_window_size(cls.BROWSER_WIDTH, cls.BROWSER_HEIGHT)
@classmethod
def tearDownClass(cls):
if not cls.GENERATING_MODE:
cls.browser.quit()
super(test_remote_browser_base, cls).tearDownClass()
def setUp(self):
super(test_remote_browser_base, self).setUp()
sys.exc_clear()
def tearDown(self):
try:
passed = (sys.exc_info() == (None, None, None))
branch = os.environ.get('TRAVIS_BRANCH', 'manual')
pull_request = os.environ.get('TRAVIS_PULL_REQUEST', 'false')
self.sauce.jobs.update_job(self.browser.session_id,
build_num=os.environ.get('TRAVIS_BUILD_NUMBER', '0'),
name='pdf2htmlEX',
passed=passed,
public='public restricted',
tags = [pull_request] if pull_request != 'false' else [branch]
)
except:
raise
pass
def generate_image(self, html_file, png_file, page_must_load=True):
self.browser.get(BASEURL + html_file)
try:
WebDriverWait(self.browser, 5).until(expected_conditions.presence_of_element_located((By.ID, 'page-container')))
except:
if page_must_load:
raise
self.browser.save_screenshot(png_file)
test_classnames = []
def generate_classes():
module = globals()
for browser_name, browser_caps in BROWSER_MATRIX:
d = dict(test_remote_browser_base.__dict__)
caps = SAUCE_OPTIONS.copy()
caps.update(browser_caps)
tunnel_identifier = os.environ.get('TRAVIS_JOB_NUMBER')
if tunnel_identifier:
caps['tunnel-identifier'] = tunnel_identifier
d['desired_capabilities'] = caps
name = "test_remote_%s" % (browser_name, )
module[name] = type(name, (test_remote_browser_base, unittest.TestCase), d)
test_classnames.append(name)
generate_classes()