New master (#2)

* Show header in font map files

* fix a usage of unique_ptr with array

* Added '--quiet' argument to hide progress messages (resolves #503)

* Revert cout messages to cerr (see #622)

* bump version

* fix build; fix some coverity warnings

* Many bug fixes and improvements, including:

- Incorporated latest Cairo files from cairo-0.15.2
- Moved build to out-of-source
- Added clean script
- Rewritten correct_text_visibility option to improve accuracy
- Transparent characters drawn on background layer
- Improved bad unicode detection

* Many bug fixes and improvements, including:

- Incorporated latest Cairo files from cairo-0.15.2
- Moved build to out-of-source
- Added clean script
- Rewritten correct_text_visibility option to improve accuracy
- Transparent characters drawn on background layer
- Improved bad unicode detection

* Rationlise DPI to single number.
Implement actual_dpi - clamp maximum background image size in cases of huge PDF pages

* DPI fixes - increase DPI when partially covered text to covered-text-dpi
Add font-style italic for oblique fonts
Reduce char bbox for occlusion tests

* Don't shrink bbox - not required if zoom=25 used

* Ignore occlusion from stroke/fill with opacity < 0.5
Better compute char bbox for occlusion
Use 10% inset for char bbox for occlusion
Back out adding font-weight: bold to potentially bold fonts
Fix bug to ensure CID ascent/descent matches subfont values

* Removed zero char logging

* Remove forced italic - missing italic is due to fontforge bug which needs fixing

* Typos fixed, readme updated

* Typos

* Increase maximum background image width
Fix private use range to avoid stupid mobile safari switching to emoji font

* included -pthread switch to link included 3rdparty poppler files.

* Updated files from poppler 0.59.0 and adjusted includes.

* Support updated "Object" class from poppler 0.59.0
This commit is contained in:
Trent Petersen 2018-01-10 13:31:38 -06:00 committed by GitHub
parent f12fc15515
commit 9ed21007e5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
34 changed files with 1397 additions and 773 deletions

View File

@ -17,16 +17,16 @@
// Copyright (C) 2005-2007 Jeff Muizelaar <jeff@infidigm.net>
// Copyright (C) 2005, 2006 Kristian Høgsberg <krh@redhat.com>
// Copyright (C) 2005 Martin Kretzschmar <martink@gnome.org>
// Copyright (C) 2005, 2009, 2012, 2013 Albert Astals Cid <aacid@kde.org>
// Copyright (C) 2006, 2007, 2010, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
// Copyright (C) 2007 Koji Otani <sho@bbr.jp>
// Copyright (C) 2008, 2009 Chris Wilson <chris@chris-wilson.co.uk>
// Copyright (C) 2008, 2012 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2008, 2012, 2014, 2016 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2009 Darren Kenny <darren.kenny@sun.com>
// Copyright (C) 2010 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
// Copyright (C) 2010 Jan Kümmel <jan+freedesktop@snorc.org>
// Copyright (C) 2012 Hib Eris <hib@hiberis.nl>
// Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
// Copyright (C) 2015, 2016 Jason Crain <jason@aquaticape.us>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@ -58,16 +58,11 @@
#pragma implementation
#endif
/*
* pdf2htmlEX: disabled multi thread
#if MULTITHREADED
# define fontEngineLocker() MutexLocker locker(&mutex)
#else
*/
# define fontEngineLocker()
/*
#endif
*/
//------------------------------------------------------------------------
// CairoFont
@ -116,7 +111,7 @@ CairoFont::getGlyph(CharCode code,
double
CairoFont::getSubstitutionCorrection(GfxFont *gfxFont)
{
double w1, w2;
double w1, w2, w3;
CharCode code;
char *name;
@ -146,7 +141,8 @@ CairoFont::getSubstitutionCorrection(GfxFont *gfxFont)
cairo_font_options_destroy(options);
w2 = extents.x_advance;
}
if (!gfxFont->isSymbolic()) {
w3 = ((Gfx8BitFont *)gfxFont)->getWidth(0);
if (!gfxFont->isSymbolic() && w2 > 0 && w1 > w3) {
// if real font is substantially narrower than substituted
// font, reduce the font size accordingly
if (w1 > 0.01 && w1 < 0.9 * w2) {
@ -260,12 +256,16 @@ _ft_done_face (void *closure)
else
_ft_open_faces = data->next;
if (data->fd != -1) {
#if defined(__SUNPRO_CC) && defined(__sun) && defined(__SVR4)
munmap ((char*)data->bytes, data->size);
munmap ((char*)data->bytes, data->size);
#else
munmap (data->bytes, data->size);
munmap (data->bytes, data->size);
#endif
close (data->fd);
close (data->fd);
} else {
gfree (data->bytes);
}
FT_Done_Face (data->face);
gfree (data);
@ -322,6 +322,8 @@ _ft_new_face (FT_Library lib,
munmap (tmpl.bytes, tmpl.size);
#endif
close (tmpl.fd);
} else {
gfree (tmpl.bytes);
}
*face_out = l->face;
*font_face_out = cairo_font_face_reference (l->font_face);
@ -399,7 +401,7 @@ CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref,
GfxFontType fontType;
GfxFontLoc *fontLoc;
char **enc;
char *name;
const char *name;
FoFiTrueType *ff;
FoFiType1C *ff1c;
Ref ref;
@ -408,7 +410,7 @@ CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref,
int *codeToGID;
Guint codeToGIDLen;
codeToGID = NULL;
codeToGIDLen = 0;
font_data = NULL;
@ -417,12 +419,11 @@ CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref,
fileNameC = NULL;
GBool substitute = gFalse;
ref = *gfxFont->getID();
fontType = gfxFont->getType();
// pdf2htmlEX: changed gFlase to nullptr
if (!(fontLoc = gfxFont->locateFont(xref, nullptr))) {
if (!(fontLoc = gfxFont->locateFont(xref, NULL))) {
error(errSyntaxError, -1, "Couldn't find a font for '{0:s}'",
gfxFont->getName() ? gfxFont->getName()->getCString()
: "(unnamed)");
@ -454,15 +455,26 @@ CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref,
error(errSyntaxError, -1, "could not create type1 face");
goto err2;
}
enc = ((Gfx8BitFont *)gfxFont)->getEncoding();
codeToGID = (int *)gmallocn(256, sizeof(int));
codeToGIDLen = 256;
for (i = 0; i < 256; ++i) {
codeToGID[i] = 0;
if ((name = enc[i])) {
codeToGID[i] = FT_Get_Name_Index(face, name);
codeToGID[i] = FT_Get_Name_Index(face, (char*)name);
if (codeToGID[i] == 0) {
Unicode u;
u = globalParams->mapNameToUnicodeText (name);
codeToGID[i] = FT_Get_Char_Index (face, u);
}
if (codeToGID[i] == 0) {
name = GfxFont::getAlternateName(name);
if (name) {
codeToGID[i] = FT_Get_Name_Index(face, (char*)name);
}
}
}
}
break;
@ -491,6 +503,7 @@ CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref,
codeToGIDLen = n;
/* Fall through */
case fontTrueType:
case fontTrueTypeOT:
if (font_data != NULL) {
ff = FoFiTrueType::make(font_data, font_data_len);
} else {
@ -501,7 +514,7 @@ CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref,
goto err2;
}
/* This might be set already for the CIDType2 case */
if (fontType == fontTrueType) {
if (fontType == fontTrueType || fontType == fontTrueTypeOT) {
codeToGID = ((Gfx8BitFont *)gfxFont)->getCodeToGIDMap(ff);
codeToGIDLen = 256;
}
@ -511,7 +524,7 @@ CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref,
goto err2;
}
break;
case fontCIDType0:
case fontCIDType0C:
@ -532,13 +545,45 @@ CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref,
}
if (! _ft_new_face (lib, fileNameC, font_data, font_data_len, &face, &font_face)) {
gfree(codeToGID);
codeToGID = NULL;
error(errSyntaxError, -1, "could not create cid face\n");
goto err2;
}
break;
case fontCIDType0COT:
codeToGID = NULL;
n = 0;
if (((GfxCIDFont *)gfxFont)->getCIDToGID()) {
n = ((GfxCIDFont *)gfxFont)->getCIDToGIDLen();
if (n) {
codeToGID = (int *)gmallocn(n, sizeof(int));
memcpy(codeToGID, ((GfxCIDFont *)gfxFont)->getCIDToGID(),
n * sizeof(int));
}
}
codeToGIDLen = n;
if (!codeToGID) {
if (!useCIDs) {
if (font_data != NULL) {
ff = FoFiTrueType::make(font_data, font_data_len);
} else {
ff = FoFiTrueType::load(fileNameC);
}
if (ff) {
if (ff->isOpenTypeCFF()) {
codeToGID = ff->getCIDToGIDMap((int *)&codeToGIDLen);
}
delete ff;
}
}
}
if (! _ft_new_face (lib, fileNameC, font_data, font_data_len, &face, &font_face)) {
error(errSyntaxError, -1, "could not create cid (OT) face\n");
goto err2;
}
break;
default:
fprintf (stderr, "font type %d not handled\n", (int)fontType);
goto err2;
@ -554,6 +599,8 @@ CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref,
err2:
/* hmm? */
delete fontLoc;
gfree (codeToGID);
gfree (font_data);
fprintf (stderr, "some font thing failed\n");
return NULL;
}
@ -659,7 +706,8 @@ _render_type3_glyph (cairo_scaled_font_t *scaled_font,
output_dev->startDoc(info->doc, info->fontEngine);
output_dev->startPage (1, gfx->getState(), gfx->getXRef());
output_dev->setInType3Char(gTrue);
gfx->display(charProcs->getVal(glyph, &charProc));
charProc = charProcs->getVal(glyph);
gfx->display(&charProc);
output_dev->getType3GlyphWidth (&wx, &wy);
cairo_matrix_transform_distance (&matrix, &wx, &wy);
@ -678,7 +726,6 @@ _render_type3_glyph (cairo_scaled_font_t *scaled_font,
delete gfx;
delete output_dev;
charProc.free();
return CAIRO_STATUS_SUCCESS;
}
@ -762,33 +809,27 @@ CairoFontEngine::CairoFontEngine(FT_Library libA) {
for (i = 0; i < cairoFontCacheSize; ++i) {
fontCache[i] = NULL;
}
FT_Int major, minor, patch;
// as of FT 2.1.8, CID fonts are indexed by CID instead of GID
FT_Library_Version(lib, &major, &minor, &patch);
useCIDs = major > 2 ||
(major == 2 && (minor > 1 || (minor == 1 && patch > 7)));
/*
* pdf2htmlEX: disabled multi thread
#if MULTITHREADED
gInitMutex(&mutex);
#endif
*/
}
CairoFontEngine::~CairoFontEngine() {
int i;
for (i = 0; i < cairoFontCacheSize; ++i) {
if (fontCache[i])
delete fontCache[i];
}
/*
* pdf2htmlEX: disabled multi thread
#if MULTITHREADED
gDestroyMutex(&mutex);
#endif
*/
}
CairoFont *
@ -797,7 +838,7 @@ CairoFontEngine::getFont(GfxFont *gfxFont, PDFDoc *doc, GBool printing, XRef *xr
Ref ref;
CairoFont *font;
GfxFontType fontType;
fontEngineLocker();
ref = *gfxFont->getID();
@ -811,7 +852,7 @@ CairoFontEngine::getFont(GfxFont *gfxFont, PDFDoc *doc, GBool printing, XRef *xr
return font;
}
}
fontType = gfxFont->getType();
if (fontType == fontType3)
font = CairoType3Font::create (gfxFont, doc, this, printing, xref);

View File

@ -74,7 +74,7 @@ protected:
class CairoFreeTypeFont : public CairoFont {
public:
static CairoFreeTypeFont *create(GfxFont *gfxFont, XRef *xref, FT_Library lib, GBool useCIDs);
virtual ~CairoFreeTypeFont();
~CairoFreeTypeFont();
private:
CairoFreeTypeFont(Ref ref, cairo_font_face_t *cairo_font_face,
@ -88,9 +88,9 @@ public:
static CairoType3Font *create(GfxFont *gfxFont, PDFDoc *doc,
CairoFontEngine *fontEngine,
GBool printing, XRef *xref);
virtual ~CairoType3Font();
~CairoType3Font();
virtual GBool matches(Ref &other, GBool printing);
GBool matches(Ref &other, GBool printing) override;
private:
CairoType3Font(Ref ref, PDFDoc *doc,
@ -121,12 +121,9 @@ private:
CairoFont *fontCache[cairoFontCacheSize];
FT_Library lib;
GBool useCIDs;
/*
* pdf2htmlEX: disabled multi thread
#if MULTITHREADED
GooMutex mutex;
#endif
*/
};
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -18,9 +18,11 @@
// Copyright (C) 2005, 2006 Kristian Høgsberg <krh@redhat.com>
// Copyright (C) 2005 Nickolay V. Shmyrev <nshmyrev@yandex.ru>
// Copyright (C) 2006-2011, 2013 Carlos Garcia Campos <carlosgc@gnome.org>
// Copyright (C) 2008, 2009, 2011-2013 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2008, 2009, 2011-2016 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2008 Michael Vrable <mvrable@cs.ucsd.edu>
// Copyright (C) 2010-2013 Thomas Freitag <Thomas.Freitag@alfa.de>
// Copyright (C) 2015 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
// Copyright (C) 2016 Jason Crain <jason@aquaticape.us>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
@ -63,14 +65,14 @@ public:
// Set the image cairo surface
void setImage (cairo_surface_t *image);
// Get the image cairo surface
cairo_surface_t *getImage () const { return image; }
// Get the image rectangle
void getRect (double *xa1, double *ya1, double *xa2, double *ya2)
{ *xa1 = x1; *ya1 = y1; *xa2 = x2; *ya2 = y2; }
private:
cairo_surface_t *image; // image cairo surface
double x1, y1; // upper left corner
@ -95,114 +97,125 @@ public:
// Does this device use upside-down coordinates?
// (Upside-down means (0,0) is the top left corner of the page.)
virtual GBool upsideDown() { return gTrue; }
GBool upsideDown() override { return gTrue; }
// Does this device use drawChar() or drawString()?
virtual GBool useDrawChar() { return gTrue; }
GBool useDrawChar() override { return gTrue; }
// Does this device use tilingPatternFill()? If this returns false,
// tiling pattern fills will be reduced to a series of other drawing
// operations.
virtual GBool useTilingPatternFill() { return gTrue; }
GBool useTilingPatternFill() override { return gTrue; }
// Does this device use functionShadedFill(), axialShadedFill(), and
// radialShadedFill()? If this returns false, these shaded fills
// will be reduced to a series of other drawing operations.
#if CAIRO_VERSION >= CAIRO_VERSION_ENCODE(1, 12, 0)
virtual GBool useShadedFills(int type) { return type <= 7; }
GBool useShadedFills(int type) override { return type <= 7; }
#else
virtual GBool useShadedFills(int type) { return type < 4; }
GBool useShadedFills(int type) override { return type > 1 && type < 4; }
#endif
// Does this device use FillColorStop()?
virtual GBool useFillColorStop() { return gTrue; }
GBool useFillColorStop() override { return gTrue; }
// Does this device use beginType3Char/endType3Char? Otherwise,
// text in Type 3 fonts will be drawn with drawChar/drawString.
virtual GBool interpretType3Chars() { return gFalse; }
GBool interpretType3Chars() override { return gFalse; }
// Does this device need to clip pages to the crop box even when the
// box is the crop box?
GBool needClipToCropBox() override { return gTrue; }
// Does this device need to clip pages to the crop box even when the
// box is the crop box?
virtual GBool needClipToCropBox() { return gTrue; }
//----- initialization and control
// Start a page.
virtual void startPage(int pageNum, GfxState *state, XRef *xref);
void startPage(int pageNum, GfxState *state, XRef *xref) override;
// End a page.
virtual void endPage();
void endPage() override;
//----- save/restore graphics state
virtual void saveState(GfxState *state);
virtual void restoreState(GfxState *state);
void saveState(GfxState *state) override;
void restoreState(GfxState *state) override;
//----- update graphics state
virtual void updateAll(GfxState *state);
virtual void setDefaultCTM(double *ctm);
virtual void updateCTM(GfxState *state, double m11, double m12,
double m21, double m22, double m31, double m32);
virtual void updateLineDash(GfxState *state);
virtual void updateFlatness(GfxState *state);
virtual void updateLineJoin(GfxState *state);
virtual void updateLineCap(GfxState *state);
virtual void updateMiterLimit(GfxState *state);
virtual void updateLineWidth(GfxState *state);
virtual void updateFillColor(GfxState *state);
virtual void updateStrokeColor(GfxState *state);
virtual void updateFillOpacity(GfxState *state);
virtual void updateStrokeOpacity(GfxState *state);
virtual void updateFillColorStop(GfxState *state, double offset);
virtual void updateBlendMode(GfxState *state);
void updateAll(GfxState *state) override;
void setDefaultCTM(double *ctm) override;
void updateCTM(GfxState *state, double m11, double m12,
double m21, double m22, double m31, double m32) override;
void updateLineDash(GfxState *state) override;
void updateFlatness(GfxState *state) override;
void updateLineJoin(GfxState *state) override;
void updateLineCap(GfxState *state) override;
void updateMiterLimit(GfxState *state) override;
void updateLineWidth(GfxState *state) override;
void updateFillColor(GfxState *state) override;
void updateStrokeColor(GfxState *state) override;
void updateFillOpacity(GfxState *state) override;
void updateStrokeOpacity(GfxState *state) override;
void updateFillColorStop(GfxState *state, double offset) override;
void updateBlendMode(GfxState *state) override;
//----- update text state
virtual void updateFont(GfxState *state);
void updateFont(GfxState *state) override;
//----- path painting
virtual void stroke(GfxState *state);
virtual void fill(GfxState *state);
virtual void eoFill(GfxState *state);
virtual void clipToStrokePath(GfxState *state);
virtual GBool tilingPatternFill(GfxState *state, Gfx *gfx, Catalog *cat, Object *str,
double *pmat, int paintType, int tilingType, Dict *resDict,
double *mat, double *bbox,
int x0, int y0, int x1, int y1,
double xStep, double yStep);
virtual GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax);
virtual GBool axialShadedSupportExtend(GfxState *state, GfxAxialShading *shading);
virtual GBool radialShadedFill(GfxState *state, GfxRadialShading *shading, double sMin, double sMax);
virtual GBool radialShadedSupportExtend(GfxState *state, GfxRadialShading *shading);
void stroke(GfxState *state) override;
void fill(GfxState *state) override;
void eoFill(GfxState *state) override;
void clipToStrokePath(GfxState *state) override;
GBool tilingPatternFill(GfxState *state, Gfx *gfx, Catalog *cat, Object *str,
double *pmat, int paintType, int tilingType, Dict *resDict,
double *mat, double *bbox,
int x0, int y0, int x1, int y1,
double xStep, double yStep) override;
#if CAIRO_VERSION >= CAIRO_VERSION_ENCODE(1, 12, 0)
virtual GBool gouraudTriangleShadedFill(GfxState *state, GfxGouraudTriangleShading *shading);
virtual GBool patchMeshShadedFill(GfxState *state, GfxPatchMeshShading *shading);
GBool functionShadedFill(GfxState *state, GfxFunctionShading *shading) override;
#endif
GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax) override;
GBool axialShadedSupportExtend(GfxState *state, GfxAxialShading *shading) override;
GBool radialShadedFill(GfxState *state, GfxRadialShading *shading, double sMin, double sMax) override;
GBool radialShadedSupportExtend(GfxState *state, GfxRadialShading *shading) override;
#if CAIRO_VERSION >= CAIRO_VERSION_ENCODE(1, 12, 0)
GBool gouraudTriangleShadedFill(GfxState *state, GfxGouraudTriangleShading *shading) override;
GBool patchMeshShadedFill(GfxState *state, GfxPatchMeshShading *shading) override;
#endif
//----- path clipping
virtual void clip(GfxState *state);
virtual void eoClip(GfxState *state);
void clip(GfxState *state) override;
void eoClip(GfxState *state) override;
//----- text drawing
void beginString(GfxState *state, GooString *s);
void endString(GfxState *state);
void beginString(GfxState *state, GooString *s) override;
void endString(GfxState *state) override;
void drawChar(GfxState *state, double x, double y,
double dx, double dy,
double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen);
void beginActualText(GfxState *state, GooString *text);
void endActualText(GfxState *state);
CharCode code, int nBytes, Unicode *u, int uLen) override;
void beginActualText(GfxState *state, GooString *text) override;
void endActualText(GfxState *state) override;
virtual GBool beginType3Char(GfxState *state, double x, double y,
double dx, double dy,
CharCode code, Unicode *u, int uLen);
virtual void endType3Char(GfxState *state);
virtual void beginTextObject(GfxState *state);
virtual void endTextObject(GfxState *state);
GBool beginType3Char(GfxState *state, double x, double y,
double dx, double dy,
CharCode code, Unicode *u, int uLen) override;
void endType3Char(GfxState *state) override;
void beginTextObject(GfxState *state) override;
void endTextObject(GfxState *state) override;
//----- image drawing
virtual void drawImageMask(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert, GBool interpolate,
GBool inlineImg);
virtual void setSoftMaskFromImageMask(GfxState *state,
Object *ref, Stream *str,
int width, int height, GBool invert,
GBool inlineImg, double *baseMatrix);
virtual void unsetSoftMaskFromImageMask(GfxState *state, double *baseMatrix);
void drawImageMask(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert, GBool interpolate,
GBool inlineImg) override;
void setSoftMaskFromImageMask(GfxState *state,
Object *ref, Stream *str,
int width, int height, GBool invert,
GBool inlineImg, double *baseMatrix) override;
void unsetSoftMaskFromImageMask(GfxState *state, double *baseMatrix) override;
void drawImageMaskPrescaled(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert, GBool interpolate,
GBool inlineImg);
@ -210,53 +223,54 @@ public:
int width, int height, GBool invert, GBool interpolate,
GBool inlineImg);
virtual void drawImage(GfxState *state, Object *ref, Stream *str,
int width, int height, GfxImageColorMap *colorMap,
GBool interpolate, int *maskColors, GBool inlineImg);
virtual void drawSoftMaskedImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap,
GBool interpolate,
Stream *maskStr,
int maskWidth, int maskHeight,
GfxImageColorMap *maskColorMap,
GBool maskInterpolate);
void drawImage(GfxState *state, Object *ref, Stream *str,
int width, int height, GfxImageColorMap *colorMap,
GBool interpolate, int *maskColors, GBool inlineImg) override;
void drawSoftMaskedImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap,
GBool interpolate,
Stream *maskStr,
int maskWidth, int maskHeight,
GfxImageColorMap *maskColorMap,
GBool maskInterpolate) override;
virtual void drawMaskedImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap,
GBool interpolate,
Stream *maskStr,
int maskWidth, int maskHeight,
GBool maskInvert, GBool maskInterpolate);
void drawMaskedImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap,
GBool interpolate,
Stream *maskStr,
int maskWidth, int maskHeight,
GBool maskInvert, GBool maskInterpolate) override;
//----- transparency groups and soft masks
virtual void beginTransparencyGroup(GfxState * /*state*/, double * /*bbox*/,
void beginTransparencyGroup(GfxState * /*state*/, double * /*bbox*/,
GfxColorSpace * /*blendingColorSpace*/,
GBool /*isolated*/, GBool /*knockout*/,
GBool /*forSoftMask*/);
virtual void endTransparencyGroup(GfxState * /*state*/);
GBool /*forSoftMask*/) override;
void endTransparencyGroup(GfxState * /*state*/) override;
void popTransparencyGroup();
virtual void paintTransparencyGroup(GfxState * /*state*/, double * /*bbox*/);
virtual void setSoftMask(GfxState * /*state*/, double * /*bbox*/, GBool /*alpha*/,
Function * /*transferFunc*/, GfxColor * /*backdropColor*/);
virtual void clearSoftMask(GfxState * /*state*/);
void paintTransparencyGroup(GfxState * /*state*/, double * /*bbox*/) override;
void setSoftMask(GfxState * /*state*/, double * /*bbox*/, GBool /*alpha*/,
Function * /*transferFunc*/, GfxColor * /*backdropColor*/) override;
void clearSoftMask(GfxState * /*state*/) override;
//----- Type 3 font operators
virtual void type3D0(GfxState *state, double wx, double wy);
virtual void type3D1(GfxState *state, double wx, double wy,
double llx, double lly, double urx, double ury);
void type3D0(GfxState *state, double wx, double wy) override;
void type3D1(GfxState *state, double wx, double wy,
double llx, double lly, double urx, double ury) override;
//----- special access
// Called to indicate that a new PDF document has been loaded.
void startDoc(PDFDoc *docA, CairoFontEngine *fontEngine = NULL);
GBool isReverseVideo() { return gFalse; }
void setCairo (cairo_t *cr);
void setTextPage (TextPage *text);
void setPrinting (GBool printing) { this->printing = printing; needFontUpdate = gTrue; }
void setAntialias(cairo_antialias_t antialias);
void setInType3Char(GBool inType3Char) { this->inType3Char = inType3Char; }
void getType3GlyphWidth (double *wx, double *wy) { *wx = t3_glyph_wx; *wy = t3_glyph_wy; }
@ -272,11 +286,14 @@ protected:
cairo_filter_t getFilterForSurface(cairo_surface_t *image,
GBool interpolate);
GBool getStreamData (Stream *str, char **buffer, int *length);
// pdf2htmlEX: make setMimeData virtual, we need to override it
virtual
void setMimeData(Stream *str, Object *ref, cairo_surface_t *image);
void setMimeData(GfxState *state, Stream *str, Object *ref,
GfxImageColorMap *colorMap, cairo_surface_t *image);
void fillToStrokePathClip(GfxState *state);
void alignStrokeCoords(GfxSubpath *subpath, int i, double *x, double *y);
#if CAIRO_VERSION >= CAIRO_VERSION_ENCODE(1, 14, 0)
GBool setMimeDataForJBIG2Globals (Stream *str, cairo_surface_t *image);
#endif
static void setContextAntialias(cairo_t *cr, cairo_antialias_t antialias);
GfxRGB fill_color, stroke_color;
cairo_pattern_t *fill_pattern, *stroke_pattern;
@ -298,6 +315,7 @@ protected:
cairo_line_cap_t cap;
cairo_line_join_t join;
double miter;
int ref_count;
} *strokePathClip;
PDFDoc *doc; // the current document
@ -313,6 +331,7 @@ protected:
GBool needFontUpdate; // set when the font needs to be updated
GBool printing;
GBool use_show_text_glyphs;
GBool text_matrix_valid;
cairo_surface_t *surface;
cairo_glyph_t *glyphs;
int glyphCount;
@ -327,7 +346,7 @@ protected:
double t3_glyph_wx, t3_glyph_wy;
GBool t3_glyph_has_bbox;
double t3_glyph_bbox[4];
cairo_antialias_t antialias;
GBool prescaleImages;
TextPage *text; // text for the current page
@ -373,118 +392,118 @@ public:
// Does this device use upside-down coordinates?
// (Upside-down means (0,0) is the top left corner of the page.)
virtual GBool upsideDown() { return gTrue; }
GBool upsideDown() override { return gTrue; }
// Does this device use drawChar() or drawString()?
virtual GBool useDrawChar() { return gFalse; }
GBool useDrawChar() override { return gFalse; }
// Does this device use tilingPatternFill()? If this returns false,
// tiling pattern fills will be reduced to a series of other drawing
// operations.
virtual GBool useTilingPatternFill() { return gTrue; }
GBool useTilingPatternFill() override { return gTrue; }
// Does this device use functionShadedFill(), axialShadedFill(), and
// radialShadedFill()? If this returns false, these shaded fills
// will be reduced to a series of other drawing operations.
#if CAIRO_VERSION >= CAIRO_VERSION_ENCODE(1, 11, 2)
virtual GBool useShadedFills(int type) { return type <= 7; }
GBool useShadedFills(int type) override { return type <= 7; }
#else
virtual GBool useShadedFills(int type) { return type < 4; }
GBool useShadedFills(int type) override { return type < 4; }
#endif
// Does this device use FillColorStop()?
virtual GBool useFillColorStop() { return gFalse; }
GBool useFillColorStop() override { return gFalse; }
// Does this device use beginType3Char/endType3Char? Otherwise,
// text in Type 3 fonts will be drawn with drawChar/drawString.
virtual GBool interpretType3Chars() { return gFalse; }
GBool interpretType3Chars() override { return gFalse; }
// Does this device need non-text content?
virtual GBool needNonText() { return gTrue; }
GBool needNonText() override { return gTrue; }
//----- save/restore graphics state
virtual void saveState(GfxState *state) { }
virtual void restoreState(GfxState *state) { }
void saveState(GfxState *state) override { }
void restoreState(GfxState *state) override { }
//----- update graphics state
virtual void updateAll(GfxState *state) { }
virtual void setDefaultCTM(double *ctm) { }
virtual void updateCTM(GfxState *state, double m11, double m12,
double m21, double m22, double m31, double m32) { }
virtual void updateLineDash(GfxState *state) { }
virtual void updateFlatness(GfxState *state) { }
virtual void updateLineJoin(GfxState *state) { }
virtual void updateLineCap(GfxState *state) { }
virtual void updateMiterLimit(GfxState *state) { }
virtual void updateLineWidth(GfxState *state) { }
virtual void updateFillColor(GfxState *state) { }
virtual void updateStrokeColor(GfxState *state) { }
virtual void updateFillOpacity(GfxState *state) { }
virtual void updateStrokeOpacity(GfxState *state) { }
virtual void updateBlendMode(GfxState *state) { }
void updateAll(GfxState *state) override { }
void setDefaultCTM(double *ctm) override { }
void updateCTM(GfxState *state, double m11, double m12,
double m21, double m22, double m31, double m32) override { }
void updateLineDash(GfxState *state) override { }
void updateFlatness(GfxState *state) override { }
void updateLineJoin(GfxState *state) override { }
void updateLineCap(GfxState *state) override { }
void updateMiterLimit(GfxState *state) override { }
void updateLineWidth(GfxState *state) override { }
void updateFillColor(GfxState *state) override { }
void updateStrokeColor(GfxState *state) override { }
void updateFillOpacity(GfxState *state) override { }
void updateStrokeOpacity(GfxState *state) override { }
void updateBlendMode(GfxState *state) override { }
//----- update text state
virtual void updateFont(GfxState *state) { }
void updateFont(GfxState *state) override { }
//----- path painting
virtual void stroke(GfxState *state) { }
virtual void fill(GfxState *state) { }
virtual void eoFill(GfxState *state) { }
virtual void clipToStrokePath(GfxState *state) { }
virtual GBool tilingPatternFill(GfxState *state, Gfx *gfx, Catalog *cat, Object *str,
double *pmat, int paintType, int tilingType, Dict *resDict,
double *mat, double *bbox,
int x0, int y0, int x1, int y1,
double xStep, double yStep) { return gTrue; }
virtual GBool axialShadedFill(GfxState *state,
GfxAxialShading *shading,
double tMin, double tMax) { return gTrue; }
virtual GBool radialShadedFill(GfxState *state,
GfxRadialShading *shading,
double sMin, double sMax) { return gTrue; }
void stroke(GfxState *state) override { }
void fill(GfxState *state) override { }
void eoFill(GfxState *state) override { }
void clipToStrokePath(GfxState *state) override { }
GBool tilingPatternFill(GfxState *state, Gfx *gfx, Catalog *cat, Object *str,
double *pmat, int paintType, int tilingType, Dict *resDict,
double *mat, double *bbox,
int x0, int y0, int x1, int y1,
double xStep, double yStep) override { return gTrue; }
GBool axialShadedFill(GfxState *state,
GfxAxialShading *shading,
double tMin, double tMax) override { return gTrue; }
GBool radialShadedFill(GfxState *state,
GfxRadialShading *shading,
double sMin, double sMax) override { return gTrue; }
//----- path clipping
virtual void clip(GfxState *state) { }
virtual void eoClip(GfxState *state) { }
void clip(GfxState *state) override { }
void eoClip(GfxState *state) override { }
//----- image drawing
virtual void drawImageMask(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert,
GBool interpolate, GBool inlineImg);
virtual void drawImage(GfxState *state, Object *ref, Stream *str,
int width, int height, GfxImageColorMap *colorMap,
GBool interpolate, int *maskColors, GBool inlineImg);
virtual void drawSoftMaskedImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap,
GBool interpolate,
Stream *maskStr,
int maskWidth, int maskHeight,
GfxImageColorMap *maskColorMap,
GBool maskInterpolate);
virtual void drawMaskedImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap,
GBool interpolate,
Stream *maskStr,
int maskWidth, int maskHeight,
GBool maskInvert, GBool maskInterpolate);
virtual void setSoftMaskFromImageMask(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert,
GBool inlineImg, double *baseMatrix);
virtual void unsetSoftMaskFromImageMask(GfxState *state, double *baseMatrix) {}
void drawImageMask(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert,
GBool interpolate, GBool inlineImg) override;
void drawImage(GfxState *state, Object *ref, Stream *str,
int width, int height, GfxImageColorMap *colorMap,
GBool interpolate, int *maskColors, GBool inlineImg) override;
void drawSoftMaskedImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap,
GBool interpolate,
Stream *maskStr,
int maskWidth, int maskHeight,
GfxImageColorMap *maskColorMap,
GBool maskInterpolate) override;
void drawMaskedImage(GfxState *state, Object *ref, Stream *str,
int width, int height,
GfxImageColorMap *colorMap,
GBool interpolate,
Stream *maskStr,
int maskWidth, int maskHeight,
GBool maskInvert, GBool maskInterpolate) override;
void setSoftMaskFromImageMask(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert,
GBool inlineImg, double *baseMatrix) override;
void unsetSoftMaskFromImageMask(GfxState *state, double *baseMatrix) override {}
//----- transparency groups and soft masks
virtual void beginTransparencyGroup(GfxState * /*state*/, double * /*bbox*/,
GfxColorSpace * /*blendingColorSpace*/,
GBool /*isolated*/, GBool /*knockout*/,
GBool /*forSoftMask*/) {}
virtual void endTransparencyGroup(GfxState * /*state*/) {}
virtual void paintTransparencyGroup(GfxState * /*state*/, double * /*bbox*/) {}
virtual void setSoftMask(GfxState * /*state*/, double * /*bbox*/, GBool /*alpha*/,
Function * /*transferFunc*/, GfxColor * /*backdropColor*/) {}
virtual void clearSoftMask(GfxState * /*state*/) {}
void beginTransparencyGroup(GfxState * /*state*/, double * /*bbox*/,
GfxColorSpace * /*blendingColorSpace*/,
GBool /*isolated*/, GBool /*knockout*/,
GBool /*forSoftMask*/) override {}
void endTransparencyGroup(GfxState * /*state*/) override {}
void paintTransparencyGroup(GfxState * /*state*/, double * /*bbox*/) override {}
void setSoftMask(GfxState * /*state*/, double * /*bbox*/, GBool /*alpha*/,
Function * /*transferFunc*/, GfxColor * /*backdropColor*/) override {}
void clearSoftMask(GfxState * /*state*/) override {}
//----- Image list
// By default images are not rendred
@ -498,7 +517,7 @@ private:
void saveImage(CairoImage *image);
void getBBox(GfxState *state, int width, int height,
double *x1, double *y1, double *x2, double *y2);
CairoImage **images;
int numImages;
int size;

View File

@ -51,6 +51,7 @@
#include <stdlib.h>
#include <math.h>
#include "goo/gmem.h"
//#include "goo/gtypes_p.h"
#include "CairoRescaleBox.h"
@ -374,4 +375,4 @@ cleanup:
free (scanline);
return retval;
}
}

View File

@ -58,4 +58,4 @@ public:
};
#endif /* CAIRO_RESCALE_BOX_H */
#endif /* CAIRO_RESCALE_BOX_H */

View File

@ -9,7 +9,7 @@ option(ENABLE_SVG "Enable SVG support, for generating SVG background images and
include_directories(${CMAKE_SOURCE_DIR}/src)
set(PDF2HTMLEX_VERSION "0.14.6")
set(PDF2HTMLEX_VERSION "0.15.0")
set(ARCHIVE_NAME pdf2htmlex-${PDF2HTMLEX_VERSION})
add_custom_target(dist
COMMAND git archive --prefix=${ARCHIVE_NAME}/ HEAD
@ -81,7 +81,7 @@ endif()
if(CYGWIN)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++0x")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x -pthread")
endif()
# check the C++11 features we need

View File

@ -2,6 +2,28 @@ pdf2htmlEX is no longer under active development. New maintainers are [wanted](h
#![](http://coolwanglu.github.io/pdf2htmlEX/images/pdf2htmlEX-64x64.png) pdf2htmlEX
# My branch differences:
This is my branch of pdf2htmlEX which I maintain for my own purposes. I have made a number of changes and improvements over the original code:
* Lots of bugs fixes, mostly of edge cases
* Integration of latest Cairo code
* Out of source building
* Rewritten handling of obscured/partially obscured text - now much more accurate
* Some support for transparent text
* Improvement of DPI settings - clamping of DPI to ensure output graphic isn't too big
`--correct-text-visibility` tracks the visibility of 4 sample points for each character (currently the 4 corners of the character's bounding box, inset slightly) to determine visibility.
It now has two modes. 1 = Fully occluded text handled (i.e. doesn't get put into the HTML layer). 2 = Partially occluded text handled.
The default is now "1", so fully occluded text should no longer show through. If "2" is selected then if the character is partially occluded it will be drawn in the background layer. In this case, the rendered DPI of the page will be automatically increased to `--covered-text-dpi` (default: 300) to reduce the impact of rasterized text.
For maximum accuracy I strongly recommend using the output options: `--font-size-multiplier 1 --zoom 25`. This will circumvent rounding errors inside web browsers. You will then have to scale down the resulting HTML page using an appropriate "scale" transform.
If you are concerned about file size of the resulting HTML, then I recommend patching fontforge to prevent it writing the current time into the dumped fonts, and then post-process the pdf2htmlEX data to remove duplicate files - there will usually be many duplicate background images and fonts.
# Original README.md follows...
<!--
[![Build Status](https://travis-ci.org/coolwanglu/pdf2htmlEX.png?branch=master)](https://travis-ci.org/coolwanglu/pdf2htmlEX)
-->

4
dobuild Executable file
View File

@ -0,0 +1,4 @@
mkdir build
cd build
cmake ..
make install

1
doclean Executable file
View File

@ -0,0 +1 @@
rm -rf build pdf2htmlEX.1 share/*.css share/*.js share/*.min.* src/pdf2htmlEX-config.h src/util/css_const.h

View File

@ -247,9 +247,10 @@ If set to 0, pdf2htmlEX would try its best to balance the two methods above.
If set to 1, pdf2htmlEX will try to reduce the number of HTML elements used for text. Turn it off if anything goes wrong.
.TP
.B --correct-text-visibility <0|1> (Default: 0)
If set to 1, pdf2htmlEX will try to detect texts covered by other graphics and properly arrange them,
i.e. covered texts are made transparent in text layer, and are drawn on background layer.
.B --correct-text-visibility <0|1|2> (Default: 1)
0 : Do not do visibility calculations for text
1 : Text fully occluded will be drawn in the background layer
2 : Text partially occluded will be drawn in the background layer (more false positives than option "1")
.SS Background Image

View File

@ -901,7 +901,7 @@ Viewer.prototype = {
var self = this;
/**
* page should have type Page
* page should have type Page
* @param{Page} page
*/
var transform_and_scroll = function(page) {

View File

@ -134,9 +134,9 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
if(param.embed_image)
html_renderer->tmp_files.add(fn);
surface = cairo_svg_surface_create(fn.c_str(), page_width * param.h_dpi / DEFAULT_DPI, page_height * param.v_dpi / DEFAULT_DPI);
surface = cairo_svg_surface_create(fn.c_str(), page_width * param.actual_dpi / DEFAULT_DPI, page_height * param.actual_dpi / DEFAULT_DPI);
cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2);
cairo_surface_set_fallback_resolution(surface, param.h_dpi, param.v_dpi);
cairo_surface_set_fallback_resolution(surface, param.actual_dpi, param.actual_dpi);
cairo_t * cr = cairo_create(surface);
setCairo(cr);
@ -144,15 +144,15 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
bitmaps_in_current_page.clear();
bool process_annotation = param.process_annotation;
doc->displayPage(this, pageno, param.h_dpi, param.v_dpi,
0,
doc->displayPage(this, pageno, param.actual_dpi, param.actual_dpi,
0,
(!(param.use_cropbox)),
false,
false,
false,
nullptr, nullptr, &annot_cb, &process_annotation);
setCairo(nullptr);
{
auto status = cairo_status(cr);
cairo_destroy(cr);
@ -198,7 +198,7 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
void CairoBackgroundRenderer::embed_image(int pageno)
{
auto & f_page = *(html_renderer->f_curpage);
// SVGs introduced by <img> or background-image can't have external resources;
// SVGs introduced by <embed> and <object> can, but they are more expensive for browsers.
// So we use <img> if the SVG contains no external bitmaps, and use <embed> otherwise.
@ -235,11 +235,11 @@ string CairoBackgroundRenderer::build_bitmap_path(int id)
return string(html_renderer->str_fmt("%s/o%d.jpg", param.dest_dir.c_str(), id));
}
// Override CairoOutputDev::setMimeData() and dump bitmaps in SVG to external files.
void CairoBackgroundRenderer::setMimeData(Stream *str, Object *ref, cairo_surface_t *image)
void CairoBackgroundRenderer::setMimeData(GfxState *state, Stream *str, Object *ref, GfxImageColorMap *colorMap, cairo_surface_t *image)
{
if (param.svg_embed_bitmap)
{
CairoOutputDev::setMimeData(str, ref, image);
CairoOutputDev::setMimeData(state, str, ref, colorMap, image);
return;
}
@ -263,21 +263,20 @@ void CairoBackgroundRenderer::setMimeData(Stream *str, Object *ref, cairo_surfac
//
// In PDF, jpeg stream objects can also specify other color spaces like DeviceN and Separation,
// It is also not safe to dump them directly.
Object obj;
str->getDict()->lookup("ColorSpace", &obj);
Object obj = str->getDict()->lookup("ColorSpace");
if (!obj.isName() || (strcmp(obj.getName(), "DeviceRGB") && strcmp(obj.getName(), "DeviceGray")) )
{
obj.free();
//obj.free();
return;
}
obj.free();
str->getDict()->lookup("Decode", &obj);
//obj.free();
obj = str->getDict()->lookup("Decode");
if (obj.isArray())
{
obj.free();
//obj.free();
return;
}
obj.free();
//obj.free();
int imgId = ref->getRef().num;
auto uri = strdup((char*) html_renderer->str_fmt("o%d.jpg", imgId));

View File

@ -51,7 +51,7 @@ public:
void updateRender(GfxState *state);
protected:
virtual void setMimeData(Stream *str, Object *ref, cairo_surface_t *image);
virtual void setMimeData(GfxState *state, Stream *str, Object *ref, GfxImageColorMap *colorMap, cairo_surface_t *image);
protected:
HTMLRenderer * html_renderer;

View File

@ -29,7 +29,7 @@ using std::unique_ptr;
const SplashColor SplashBackgroundRenderer::white = {255,255,255};
SplashBackgroundRenderer::SplashBackgroundRenderer(const string & imgFormat, HTMLRenderer * html_renderer, const Param & param)
: SplashOutputDev(splashModeRGB8, 4, gFalse, (SplashColorPtr)(&white))
: SplashOutputDev(splashModeRGB8, 4, gFalse, (SplashColorPtr)(&white), gTrue, splashThinLineSolid) // DCRH: Make thin line mode = solid
, html_renderer(html_renderer)
, param(param)
, format(imgFormat)
@ -67,30 +67,10 @@ void SplashBackgroundRenderer::drawChar(GfxState *state, double x, double y,
double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen)
{
// draw characters as image when
// - in fallback mode
// - OR there is special filling method
// - OR using a writing mode font
// - OR using a Type 3 font while param.process_type3 is not enabled
// - OR the text is used as path
if((param.fallback || param.proof)
|| ( (state->getFont())
&& ( (state->getFont()->getWMode())
|| ((state->getFont()->getType() == fontType3) && (!param.process_type3))
|| (state->getRender() >= 4)
)
)
)
{
if (param.proof || html_renderer->is_char_covered(drawn_char_count)) {
SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code,nBytes,u,uLen);
}
// If a char is treated as image, it is not subject to cover test
// (see HTMLRenderer::drawString), so don't increase drawn_char_count.
else if (param.correct_text_visibility) {
if (html_renderer->is_char_covered(drawn_char_count))
SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code,nBytes,u,uLen);
drawn_char_count++;
}
drawn_char_count++;
}
void SplashBackgroundRenderer::beginTextObject(GfxState *state)
@ -134,7 +114,8 @@ bool SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
{
drawn_char_count = 0;
bool process_annotation = param.process_annotation;
doc->displayPage(this, pageno, param.h_dpi, param.v_dpi,
doc->displayPage(this, pageno, param.actual_dpi, param.actual_dpi,
0,
(!(param.use_cropbox)),
false, false,
@ -159,8 +140,8 @@ void SplashBackgroundRenderer::embed_image(int pageno)
dump_image((char*)fn, xmin, ymin, xmax, ymax);
}
double h_scale = html_renderer->text_zoom_factor() * DEFAULT_DPI / param.h_dpi;
double v_scale = html_renderer->text_zoom_factor() * DEFAULT_DPI / param.v_dpi;
double h_scale = html_renderer->text_zoom_factor() * DEFAULT_DPI / param.actual_dpi;
double v_scale = html_renderer->text_zoom_factor() * DEFAULT_DPI / param.actual_dpi;
auto & f_page = *(html_renderer->f_curpage);
auto & all_manager = html_renderer->all_manager;
@ -227,7 +208,7 @@ void SplashBackgroundRenderer::dump_image(const char * filename, int x1, int y1,
throw string("Image format not supported: ") + format;
}
if(!writer->init(f, width, height, param.h_dpi, param.v_dpi))
if(!writer->init(f, width, height, param.actual_dpi, param.actual_dpi))
throw "Cannot initialize image writer";
auto * bitmap = getBitmap();

View File

@ -7,43 +7,110 @@
#include "CoveredTextDetector.h"
#include <algorithm>
#include "util/math.h"
//#define DEBUG
namespace pdf2htmlEX {
CoveredTextDetector::CoveredTextDetector(Param & param): param(param)
{
}
void CoveredTextDetector::reset()
{
char_bboxes.clear();
chars_covered.clear();
char_pts_visible.clear();
}
void CoveredTextDetector::add_char_bbox(double * bbox)
void CoveredTextDetector::add_char_bbox(cairo_t *cairo, double * bbox)
{
char_bboxes.insert(char_bboxes.end(), bbox, bbox + 4);
chars_covered.push_back(false);
char_pts_visible.push_back(1|2|4|8);
}
void CoveredTextDetector::add_char_bbox_clipped(double * bbox, bool patially)
void CoveredTextDetector::add_char_bbox_clipped(cairo_t *cairo, double * bbox, int pts_visible)
{
#ifdef DEBUG
printf("add_char_bbox_clipped: pts_visible:%x: [%f,%f,%f,%f]\n", pts_visible, bbox[0], bbox[1], bbox[2], bbox[3]);
#endif
char_bboxes.insert(char_bboxes.end(), bbox, bbox + 4);
chars_covered.push_back(true);
if (patially)
add_non_char_bbox(bbox, chars_covered.size() - 1);
char_pts_visible.push_back(pts_visible);
// DCRH: Hide if no points are visible, or if some points are visible and correct_text_visibility == 2
if (pts_visible == 0 || param.correct_text_visibility == 2) {
chars_covered.push_back(true);
if (pts_visible > 0 && param.correct_text_visibility == 2) {
param.actual_dpi = std::min(param.text_dpi, param.max_dpi); // Char partially covered so increase background resolution
}
} else {
chars_covered.push_back(false);
}
}
void CoveredTextDetector::add_non_char_bbox(double * bbox, int index)
// We now track the visibility of each corner of the char bbox. Potentially we could track
// more sample points but this should be sufficient for most cases.
// We check to see if each point is covered by any stroke or fill operation
// and mark it as invisible if so
void CoveredTextDetector::add_non_char_bbox(cairo_t *cairo, double * bbox, int what)
{
if (index < 0)
index = chars_covered.size();
for (int i = 0; i < index; i++)
{
int index = chars_covered.size();
for (int i = 0; i < index; i++) {
if (chars_covered[i])
continue;
double * cbbox = &char_bboxes[i * 4];
if (bbox_intersect(cbbox, bbox))
{
chars_covered[i] = true;
add_non_char_bbox(cbbox, i);
#ifdef DEBUG
printf("add_non_char_bbox: what=%d, cbbox:[%f,%f,%f,%f], bbox:[%f,%f,%f,%f]\n", what, cbbox[0], cbbox[1], cbbox[2], cbbox[3], bbox[0], bbox[1], bbox[2], bbox[3]);
#endif
if (bbox_intersect(cbbox, bbox)) {
int pts_visible = char_pts_visible[i];
#ifdef DEBUG
printf("pts_visible=%x\n", pts_visible);
#endif
if ((pts_visible & 1) && cairo_in_clip(cairo, cbbox[0], cbbox[1]) &&
(what == 0 ||
(what == 1 && cairo_in_fill(cairo, cbbox[0], cbbox[1])) ||
(what == 2 && cairo_in_stroke(cairo, cbbox[0], cbbox[1])))) {
pts_visible &= ~1;
}
if ((pts_visible & 2) && cairo_in_clip(cairo, cbbox[2], cbbox[1]) &&
(what == 0 ||
(what == 1 && cairo_in_fill(cairo, cbbox[2], cbbox[1])) ||
(what == 2 && cairo_in_stroke(cairo, cbbox[2], cbbox[1])))) {
pts_visible &= ~2;
}
if ((pts_visible & 4) && cairo_in_clip(cairo, cbbox[2], cbbox[3]) &&
(what == 0 ||
(what == 1 && cairo_in_fill(cairo, cbbox[2], cbbox[3])) ||
(what == 2 && cairo_in_stroke(cairo, cbbox[2], cbbox[3])))) {
pts_visible &= ~4;
}
if ((pts_visible & 8) && cairo_in_clip(cairo, cbbox[0], cbbox[3]) &&
(what == 0 ||
(what == 1 && cairo_in_fill(cairo, cbbox[0], cbbox[3])) ||
(what == 2 && cairo_in_stroke(cairo, cbbox[0], cbbox[3])))) {
pts_visible &= ~8;
}
#ifdef DEBUG
printf("pts_visible=%x\n", pts_visible);
#endif
char_pts_visible[i] = pts_visible;
if (pts_visible == 0 || (pts_visible != (1|2|4|8) && param.correct_text_visibility == 2)) {
#ifdef DEBUG
printf("Char covered\n");
#endif
chars_covered[i] = true;
if (pts_visible > 0 && param.correct_text_visibility == 2) { // Partially visible text => increase rendering DPI
param.actual_dpi = std::min(param.text_dpi, param.max_dpi);
}
}
} else {
#ifdef DEBUG
printf("Not covered\n");
#endif
}
}
}

View File

@ -9,6 +9,9 @@
#define COVEREDTEXTDETECTOR_H__
#include <vector>
#include "Param.h"
#include <cairo.h>
namespace pdf2htmlEX {
@ -19,6 +22,8 @@ class CoveredTextDetector
{
public:
CoveredTextDetector(Param & param);
/**
* Reset to initial state. Should be called when start drawing a page.
*/
@ -28,9 +33,9 @@ public:
* Add a drawn character's bounding box.
* @param bbox (x0, y0, x1, y1)
*/
void add_char_bbox(double * bbox);
void add_char_bbox(cairo_t *, double * bbox);
void add_char_bbox_clipped(double * bbox, bool patially);
void add_char_bbox_clipped(cairo_t *,double * bbox, int pts_covered);
/**
* Add a drawn non-char graphics' bounding box.
@ -40,7 +45,7 @@ public:
* @param index this graphics' drawing order: assume it is drawn after (index-1)th
* char. -1 means after the last char.
*/
void add_non_char_bbox(double * bbox, int index = -1);
void add_non_char_bbox(cairo_t *cairo, double * bbox, int what);
/**
* An array of flags indicating whether a char is covered by any non-char graphics.
@ -54,6 +59,8 @@ private:
std::vector<bool> chars_covered;
// x00, y00, x01, y01; x10, y10, x11, y11;...
std::vector<double> char_bboxes;
std::vector<int> char_pts_visible;
Param & param;
};
}

View File

@ -11,18 +11,15 @@
#include "DrawingTracer.h"
#if !ENABLE_SVG
#warning "Cairo is disabled because ENABLE_SVG is off, --correct-text-visibility has limited functionality."
#error "ENABLE_SVG must be enabled"
#endif
static constexpr bool DT_DEBUG = false;
//#define DEBUG
namespace pdf2htmlEX
{
DrawingTracer::DrawingTracer(const Param & param): param(param)
#if ENABLE_SVG
, cairo(nullptr)
#endif
DrawingTracer::DrawingTracer(const Param & param): param(param), cairo(nullptr)
{
}
@ -33,11 +30,8 @@ DrawingTracer::~DrawingTracer()
void DrawingTracer::reset(GfxState *state)
{
if (!param.correct_text_visibility)
return;
finish();
#if ENABLE_SVG
// pbox is defined in device space, which is affected by zooming;
// We want to trace in page space which is stable, so invert pbox by ctm.
double pbox[] { 0, 0, state->getPageWidth(), state->getPageHeight() };
@ -48,20 +42,24 @@ void DrawingTracer::reset(GfxState *state)
cairo_rectangle_t page_box { pbox[0], pbox[1], pbox[2] - pbox[0], pbox[3] - pbox[1] };
cairo_surface_t * surface = cairo_recording_surface_create(CAIRO_CONTENT_COLOR_ALPHA, &page_box);
cairo = cairo_create(surface);
if (DT_DEBUG)
printf("DrawingTracer::reset:page bbox:[%f,%f,%f,%f]\n",pbox[0], pbox[1], pbox[2], pbox[3]);
ctm_stack.clear();
double *identity = new double[6];
tm_init(identity);
ctm_stack.push_back(identity);
#ifdef DEBUG
printf("DrawingTracer::reset:page bbox:[%f,%f,%f,%f]\n",pbox[0], pbox[1], pbox[2], pbox[3]);
#endif
}
void DrawingTracer::finish()
{
#if ENABLE_SVG
if (cairo)
{
cairo_destroy(cairo);
cairo = nullptr;
}
#endif
}
// Poppler won't inform us its initial CTM, and the initial CTM is affected by zoom level.
@ -72,22 +70,17 @@ void DrawingTracer::update_ctm(GfxState *state, double m11, double m12, double m
if (!param.correct_text_visibility)
return;
#if ENABLE_SVG
cairo_matrix_t matrix;
matrix.xx = m11;
matrix.yx = m12;
matrix.xy = m21;
matrix.yy = m22;
matrix.x0 = m31;
matrix.y0 = m32;
cairo_transform(cairo, &matrix);
if (DT_DEBUG)
{
cairo_matrix_t mat;
cairo_get_matrix(cairo, &mat);
printf("DrawingTracer::update_ctm:ctm:[%f,%f,%f,%f,%f,%f]\n", mat.xx, mat.yx, mat.xy, mat.yy, mat.x0, mat.y0);
}
double *tmp = new double[6];
tmp[0] = m11;
tmp[1] = m12;
tmp[2] = m21;
tmp[3] = m22;
tmp[4] = m31;
tmp[5] = m32;
double *ctm = ctm_stack.back();
tm_multiply(ctm, tmp);
#ifdef DEBUG
printf("DrawingTracer::before update_ctm:ctm:[%f,%f,%f,%f,%f,%f] => [%f,%f,%f,%f,%f,%f]\n", m11, m12, m21, m22, m31, m32, ctm[0], ctm[1], ctm[2], ctm[3], ctm[4], ctm[5]);
#endif
}
@ -95,16 +88,15 @@ void DrawingTracer::clip(GfxState * state, bool even_odd)
{
if (!param.correct_text_visibility)
return;
#if ENABLE_SVG
do_path(state, state->getPath());
cairo_set_fill_rule(cairo, even_odd? CAIRO_FILL_RULE_EVEN_ODD : CAIRO_FILL_RULE_WINDING);
cairo_clip (cairo);
if (DT_DEBUG)
#ifdef DEBUG
{
double cbox[4];
cairo_clip_extents(cairo, cbox, cbox + 1, cbox + 2, cbox + 3);
printf("DrawingTracer::clip:extents:[%f,%f,%f,%f]\n", cbox[0],cbox[1],cbox[2],cbox[3]);
printf("DrawingTracer::clip:extents:even_odd=%d,[%f,%f,%f,%f]\n", even_odd, cbox[0],cbox[1],cbox[2],cbox[3]);
}
#endif
}
@ -113,6 +105,8 @@ void DrawingTracer::clip_to_stroke_path(GfxState * state)
{
if (!param.correct_text_visibility)
return;
printf("TODO:clip_to_stroke_path\n");
// TODO cairo_stroke_to_path() ?
}
@ -120,92 +114,112 @@ void DrawingTracer::save()
{
if (!param.correct_text_visibility)
return;
#if ENABLE_SVG
cairo_save(cairo);
if (DT_DEBUG)
printf("DrawingTracer::save\n");
double *e = new double[6];
memcpy(e, ctm_stack.back(), sizeof(double) * 6);
ctm_stack.push_back(e);
#ifdef DEBUG
printf("DrawingTracer::saved: [%f,%f,%f,%f,%f,%f]\n", e[0], e[1], e[2], e[3], e[4], e[5]);
#endif
}
void DrawingTracer::restore()
{
if (!param.correct_text_visibility)
return;
#if ENABLE_SVG
cairo_restore(cairo);
if (DT_DEBUG)
printf("DrawingTracer::restore\n");
ctm_stack.pop_back();
#ifdef DEBUG
double *ctm = ctm_stack.back();
printf("DrawingTracer::restored: [%f,%f,%f,%f,%f,%f]\n", ctm[0], ctm[1], ctm[2], ctm[3], ctm[4], ctm[5]);
#endif
}
void DrawingTracer::do_path(GfxState * state, GfxPath * path)
{
#if ENABLE_SVG
//copy from CairoOutputDev::doPath
GfxSubpath *subpath;
int i, j;
double x, y;
cairo_new_path(cairo);
if (DT_DEBUG)
printf("DrawingTracer::do_path:new_path\n");
#ifdef DEBUG
printf("DrawingTracer::do_path:new_path (%d subpaths)\n", path->getNumSubpaths());
#endif
for (i = 0; i < path->getNumSubpaths(); ++i) {
subpath = path->getSubpath(i);
if (subpath->getNumPoints() > 0) {
x = subpath->getX(0);
y = subpath->getY(0);
xform_pt(x, y);
cairo_move_to(cairo, x, y);
if (DT_DEBUG)
printf("DrawingTracer::do_path:move_to[%f,%f]\n",x,y);
j = 1;
while (j < subpath->getNumPoints()) {
if (subpath->getCurve(j)) {
x = subpath->getX(j+2);
y = subpath->getY(j+2);
double x1 = subpath->getX(j);
double y1 = subpath->getY(j);
double x2 = subpath->getX(j+1);
double y2 = subpath->getY(j+1);
xform_pt(x, y);
xform_pt(x1, y1);
xform_pt(x2, y2);
cairo_curve_to(cairo,
subpath->getX(j), subpath->getY(j),
subpath->getX(j+1), subpath->getY(j+1),
x1, y1,
x2, y2,
x, y);
if (DT_DEBUG)
printf("DrawingTracer::do_path:curve_to[%f,%f]\n",x,y);
j += 3;
} else {
x = subpath->getX(j);
y = subpath->getY(j);
xform_pt(x, y);
cairo_line_to(cairo, x, y);
if (DT_DEBUG)
printf("DrawingTracer::do_path:line_to[%f,%f]\n",x,y);
++j;
}
}
if (subpath->isClosed()) {
cairo_close_path (cairo);
if (DT_DEBUG)
printf("DrawingTracer::do_path:close\n");
}
}
}
#endif
}
void DrawingTracer::stroke(GfxState * state)
{
#if ENABLE_SVG
if (!param.correct_text_visibility)
return;
if (DT_DEBUG)
printf("DrawingTracer::stroke\n");
if (state->getStrokeOpacity() < 0.5) {
// Ignore partially transparent fills for occlusion purposes
return;
}
cairo_set_line_width(cairo, state->getLineWidth());
// Transform the line width by the ctm. This isn't 100% - we should really do this path segment by path segment,
// this is a reasonable approximation providing the CTM has uniform scaling X/Y
double lwx, lwy;
lwx = lwy = sqrt(0.5);
tm_transform(ctm_stack.back(), lwx, lwy, true);
double lineWidthScale = sqrt(lwx * lwx + lwy * lwy);
#ifdef DEBUG
printf("DrawingTracer::stroke. line width = %f*%f, line cap = %d\n", lineWidthScale, state->getLineWidth(), state->getLineCap());
#endif
cairo_set_line_width(cairo, lineWidthScale * state->getLineWidth());
// Line cap is important - some PDF line widths are very large
switch (state->getLineCap()) {
case 0:
cairo_set_line_cap (cairo, CAIRO_LINE_CAP_BUTT);
break;
case 1:
cairo_set_line_cap (cairo, CAIRO_LINE_CAP_ROUND);
break;
case 2:
cairo_set_line_cap (cairo, CAIRO_LINE_CAP_SQUARE);
break;
}
// GfxPath is broken into steps, each step makes up a cairo path and its bbox is used for covering test.
// TODO
// 1. path steps that are not vertical or horizontal lines may still falsely "cover" many chars,
// can we slice those steps further?
// 2. if the line width is small, can we just ignore the path?
// 3. line join feature can't be retained. We use line-cap-square to minimize the problem that
// some chars actually covered by a line join are missed. However chars covered by a acute angle
// with line-join-miter may be still recognized as not covered.
cairo_set_line_cap(cairo, CAIRO_LINE_CAP_SQUARE);
GfxPath * path = state->getPath();
for (int i = 0; i < path->getNumSubpaths(); ++i) {
GfxSubpath * subpath = path->getSubpath(i);
@ -213,48 +227,54 @@ void DrawingTracer::stroke(GfxState * state)
continue;
double x = subpath->getX(0);
double y = subpath->getY(0);
xform_pt(x, y);
//p: loop cursor; j: next point index
int p =1, j = 1;
int p =1;
int n = subpath->getNumPoints();
while (p <= n) {
while (p < n) {
cairo_new_path(cairo);
#ifdef DEBUG
printf("move_to: [%f,%f]\n", x, y);
#endif
cairo_move_to(cairo, x, y);
if (subpath->getCurve(j)) {
x = subpath->getX(j+2);
y = subpath->getY(j+2);
if (subpath->getCurve(p)) {
x = subpath->getX(p+2);
y = subpath->getY(p+2);
double x1 = subpath->getX(p);
double y1 = subpath->getY(p);
double x2 = subpath->getX(p+1);
double y2 = subpath->getY(p+1);
xform_pt(x, y);
xform_pt(x1, y1);
xform_pt(x2, y2);
#ifdef DEBUG
printf("curve_to: [%f,%f], [%f,%f], [%f,%f]\n", x1, y1, x2, y2, x, y);
#endif
cairo_curve_to(cairo,
subpath->getX(j), subpath->getY(j),
subpath->getX(j+1), subpath->getY(j+1),
x1, y1,
x2, y2,
x, y);
p += 3;
} else {
x = subpath->getX(j);
y = subpath->getY(j);
x = subpath->getX(p);
y = subpath->getY(p);
xform_pt(x, y);
#ifdef DEBUG
printf("line_to: [%f,%f]\n", x, y);
#endif
cairo_line_to(cairo, x, y);
++p;
}
if (DT_DEBUG)
printf("DrawingTracer::stroke:new box:\n");
double sbox[4];
cairo_stroke_extents(cairo, sbox, sbox + 1, sbox + 2, sbox + 3);
#ifdef DEBUG
printf("DrawingTracer::stroke:new box:[%f,%f,%f,%f]\n", sbox[0], sbox[1], sbox[2], sbox[3]);
#endif
if (sbox[0] != sbox[2] && sbox[1] != sbox[3])
draw_non_char_bbox(state, sbox);
else if (DT_DEBUG)
printf("DrawingTracer::stroke:zero box!\n");
if (p == n)
{
if (subpath->isClosed())
j = 0; // if sub path is closed, go back to starting point
else
break;
}
else
j = p;
draw_non_char_bbox(state, sbox, 2);
}
}
#endif
}
void DrawingTracer::fill(GfxState * state, bool even_odd)
@ -262,139 +282,166 @@ void DrawingTracer::fill(GfxState * state, bool even_odd)
if (!param.correct_text_visibility)
return;
#if ENABLE_SVG
if (state->getFillOpacity() < 0.5) {
// Ignore partially transparent fills for occlusion purposes
return;
}
do_path(state, state->getPath());
//cairo_fill_extents don't take fill rule into account.
//cairo_set_fill_rule (cairo, even_odd? CAIRO_FILL_RULE_EVEN_ODD : CAIRO_FILL_RULE_WINDING);
double fbox[4];
cairo_fill_extents(cairo, fbox, fbox + 1, fbox + 2, fbox + 3);
draw_non_char_bbox(state, fbox);
#ifdef DEBUG
printf("DrawingTracer::fill:[%f,%f,%f,%f]\n", fbox[0],fbox[1],fbox[2],fbox[3]);
#endif
draw_non_char_bbox(state, fbox, 1);
}
void DrawingTracer::draw_non_char_bbox(GfxState * state, double * bbox)
void DrawingTracer::draw_non_char_bbox(GfxState * state, double * bbox, int what)
{
#if ENABLE_SVG
// what == 0 => just do bbox text
// what == 1 => stroke test
// what == 2 => fill test
double cbox[4];
cairo_clip_extents(cairo, cbox, cbox + 1, cbox + 2, cbox + 3);
if(bbox_intersect(cbox, bbox, bbox))
#endif
if(bbox_intersect(cbox, bbox))
{
transform_bbox_by_ctm(bbox, state);
if (DT_DEBUG)
printf("DrawingTracer::draw_non_char_bbox:[%f,%f,%f,%f]\n", bbox[0],bbox[1],bbox[2],bbox[3]);
#ifdef DEBUG
printf("DrawingTracer::draw_non_char_bbox:what=%d,[%f,%f,%f,%f]\n", what, bbox[0],bbox[1],bbox[2],bbox[3]);
#endif
if (on_non_char_drawn)
on_non_char_drawn(bbox);
on_non_char_drawn(cairo, bbox, what);
}
}
void DrawingTracer::draw_char_bbox(GfxState * state, double * bbox)
void DrawingTracer::draw_char_bbox(GfxState * state, double * bbox, int inTransparencyGroup)
{
#if ENABLE_SVG
// Note: even if 4 corners of the char are all in or all out of the clip area,
// it could still be partially clipped.
// TODO better solution?
int pt_in = 0;
if (cairo_in_clip(cairo, bbox[0], bbox[1]))
++pt_in;
if (cairo_in_clip(cairo, bbox[2], bbox[3]))
++pt_in;
if (cairo_in_clip(cairo, bbox[2], bbox[1]))
++pt_in;
if (cairo_in_clip(cairo, bbox[0], bbox[3]))
++pt_in;
if (inTransparencyGroup || state->getFillOpacity() < 1.0 || state->getStrokeOpacity() < 1.0) {
on_char_clipped(cairo, bbox, 0);
return;
}
if (!param.correct_text_visibility) {
double bbox[4] = { 0, 0, 0, 0 }; // bbox not relevant if not correcting text visibility
on_char_drawn(cairo, bbox);
return;
}
if (pt_in == 0)
{
transform_bbox_by_ctm(bbox);
if(on_char_clipped)
on_char_clipped(bbox, false);
}
else
{
if (pt_in < 4)
{
double cbox[4];
cairo_clip_extents(cairo, cbox, cbox + 1, cbox + 2, cbox + 3);
bbox_intersect(cbox, bbox, bbox);
}
transform_bbox_by_ctm(bbox);
if (pt_in < 4)
{
if(on_char_clipped)
on_char_clipped(bbox, true);
}
else
{
if (on_char_drawn)
on_char_drawn(bbox);
}
}
#else
transform_bbox_by_ctm(bbox, state);
if (on_char_drawn)
on_char_drawn(bbox);
double cbox[4];
cairo_clip_extents(cairo, cbox, cbox + 1, cbox + 2, cbox + 3);
#ifdef DEBUG
printf("DrawingTracer::draw_char_bbox::char bbox[%f,%f,%f,%f],clip extents:[%f,%f,%f,%f]\n", bbox[0], bbox[1], bbox[2], bbox[3], cbox[0],cbox[1],cbox[2],cbox[3]);
#endif
if (DT_DEBUG)
printf("DrawingTracer::draw_char_bbox:[%f,%f,%f,%f]\n",bbox[0],bbox[1],bbox[2],bbox[3]);
if (bbox_intersect(bbox, cbox)) {
#ifdef DEBUG
printf("char intersects clip\n");
#endif
int pts_visible = 0;
// See which points are inside the current clip
if (cairo_in_clip(cairo, bbox[0], bbox[1]))
pts_visible |= 1;
if (cairo_in_clip(cairo, bbox[2], bbox[1]))
pts_visible |= 2;
if (cairo_in_clip(cairo, bbox[2], bbox[3]))
pts_visible |= 4;
if (cairo_in_clip(cairo, bbox[0], bbox[3]))
pts_visible |= 8;
if (pts_visible == (1|2|4|8)) {
#ifdef DEBUG
printf("char inside clip\n");
#endif
on_char_drawn(cairo, bbox);
} else {
#ifdef DEBUG
printf("char partial clip (%x)\n", pts_visible);
#endif
on_char_clipped(cairo, bbox, pts_visible);
}
} else {
#ifdef DEBUG
printf("char outside clip\n");
#endif
on_char_clipped(cairo, bbox, 0);
}
}
void DrawingTracer::draw_image(GfxState *state)
{
if (!param.correct_text_visibility)
return;
double x1, y1, x2, y2, x3, y3, x4, y4;
x1 = x4 = y3 = y4 = 0;
x2 = y2 = x3 = y1 = 1;
xform_pt(x1, y1);
xform_pt(x2, y2);
xform_pt(x3, y3);
xform_pt(x4, y4);
cairo_new_path(cairo);
cairo_move_to(cairo, x1, y1);
cairo_line_to(cairo, x2, y2);
cairo_line_to(cairo, x3, y3);
cairo_line_to(cairo, x4, y4);
cairo_close_path (cairo);
#ifdef DEBUG
printf("draw_image: [%f,%f], [%f,%f], [%f,%f], [%f,%f]\n", x1, y1, x2, y2, x3, y3, x4, y4);
#endif
double bbox[4] {0, 0, 1, 1};
draw_non_char_bbox(state, bbox);
tm_transform_bbox(ctm_stack.back(), bbox);
draw_non_char_bbox(state, bbox, 1);
}
void DrawingTracer::draw_char(GfxState *state, double x, double y, double ax, double ay)
void DrawingTracer::draw_char(GfxState *state, double x, double y, double width, double height, int inTransparencyGroup)
{
if (!param.correct_text_visibility)
return;
//printf("x=%f,y=%f,width=%f,height=%f\n", x, y, width, height);
Matrix tm, itm;
memcpy(tm.m, state->getTextMat(), sizeof(tm.m));
//printf("tm = %f,%f,%f,%f,%f,%f\n", tm.m[0], tm.m[1], tm.m[2], tm.m[3], tm.m[4], tm.m[5]);
double cx = state->getCurX(), cy = state->getCurY(), fs = state->getFontSize(),
ry = state->getRise(), h = state->getHorizScaling();
ry = state->getRise(), h = state->getHorizScaling();
//printf("cx=%f,cy=%f,fs=%f,ry=%f,h=%f\n", cx,cy,fs,ry,h);
//cx and cy has been transformed by text matrix, we need to reverse them.
tm.invertTo(&itm);
double char_cx, char_cy;
itm.transform(cx, cy, &char_cx, &char_cy);
//printf("char_cx = %f, char_cy = %f\n", char_cx, char_cy);
//TODO Vertical? Currently vertical/type3 chars are treated as non-chars.
double char_m[6] {fs * h, 0, 0, fs, char_cx + x, char_cy + y + ry};
//printf("char_m = %f,%f,%f,%f,%f,%f\n", char_m[0], char_m[1], char_m[2], char_m[3], char_m[4], char_m[5]);
double final_m[6];
tm_multiply(final_m, tm.m, char_m);
auto font = state->getFont();
double bbox[4] {0, 0, ax, ay};
double desc = font->getDescent(), asc = font->getAscent();
if (font->getWMode() == 0)
{
bbox[1] += desc;
bbox[3] += asc;
}
else
{//TODO Vertical?
}
tm_transform_bbox(final_m, bbox);
draw_char_bbox(state, bbox);
//printf("final_m = %f,%f,%f,%f,%f,%f\n", final_m[0], final_m[1], final_m[2], final_m[3], final_m[4], final_m[5]);
double final_after_ctm[6];
tm_multiply(final_after_ctm, ctm_stack.back(), final_m);
//printf("final_after_ctm= %f,%f,%f,%f,%f,%f\n", final_after_ctm[0], final_after_ctm[1], final_after_ctm[2], final_after_ctm[3], final_after_ctm[4], final_after_ctm[5]);
double inset = 0.1;
double bbox[4] {inset*width, inset*height, (1-inset)*width, (1-inset)*height};
//printf("bbox before: [%f,%f,%f,%f]\n", bbox[0],bbox[1],bbox[2],bbox[3]);
//printf("bbox after: [%f,%f,%f,%f]\n", bbox[0],bbox[1],bbox[2],bbox[3]);
tm_transform_bbox(final_after_ctm, bbox);
//printf("bbox after: [%f,%f,%f,%f]\n", bbox[0],bbox[1],bbox[2],bbox[3]);
draw_char_bbox(state, bbox, inTransparencyGroup);
}
void DrawingTracer::transform_bbox_by_ctm(double * bbox, GfxState * state)
{
#if ENABLE_SVG
cairo_matrix_t mat;
cairo_get_matrix(cairo, &mat);
double mat_a[6] {mat.xx, mat.yx, mat.xy, mat.yy, mat.x0, mat.y0};
tm_transform_bbox(mat_a, bbox);
#else
tm_transform_bbox(state->getCTM(), bbox);
#endif
void DrawingTracer::xform_pt(double & x, double & y) {
tm_transform(ctm_stack.back(), x, y);
}
} /* namespace pdf2htmlEX */

View File

@ -12,6 +12,9 @@
#include <GfxState.h>
#include <vector>
#include <array>
#include "pdf2htmlEX-config.h"
#if ENABLE_SVG
@ -31,11 +34,11 @@ public:
* bbox in device space.
*/
// a non-char graphics is drawn
std::function<void(double * bbox)> on_non_char_drawn;
std::function<void(cairo_t *cairo, double * bbox, int what)> on_non_char_drawn;
// a char is drawn in the clip area
std::function<void(double * bbox)> on_char_drawn;
std::function<void(cairo_t *cairo, double * bbox)> on_char_drawn;
// a char is drawn out of/partially in the clip area
std::function<void(double * bbox, bool patially)> on_char_clipped;
std::function<void(cairo_t *cairo, double * bbox, int pts_visible)> on_char_clipped;
DrawingTracer(const Param & param);
virtual ~DrawingTracer();
@ -44,9 +47,9 @@ public:
/*
* A character is drawing
* x, y: glyph-drawing position, in PDF text object space.
* ax, ay: glyph advance, in glyph space.
* width, height: glyph width/height
*/
void draw_char(GfxState * state, double x, double y, double ax, double ay);
void draw_char(GfxState * state, double x, double y, double width, double height, int inTransparencyGroup);
/*
* An image is drawing
*/
@ -63,13 +66,15 @@ private:
void finish();
// Following methods operate in user space (just before CTM is applied)
void do_path(GfxState * state, GfxPath * path);
void draw_non_char_bbox(GfxState * state, double * bbox);
void draw_char_bbox(GfxState * state, double * bbox);
void draw_non_char_bbox(GfxState * state, double * bbox, int what);
void draw_char_bbox(GfxState * state, double * bbox, int inTransparencyGroup);
// If cairo is available, parameter state is ignored
void transform_bbox_by_ctm(double * bbox, GfxState * state = nullptr);
void xform_pt(double & x, double & y);
const Param & param;
std::vector<double*> ctm_stack;
#if ENABLE_SVG
cairo_t * cairo;
#endif

View File

@ -47,7 +47,7 @@ namespace pdf2htmlEX {
struct HTMLRenderer : OutputDev
{
HTMLRenderer(const Param & param);
HTMLRenderer(Param & param);
virtual ~HTMLRenderer();
void process(PDFDoc * doc);
@ -144,6 +144,13 @@ struct HTMLRenderer : OutputDev
virtual void eoFill(GfxState *state);
virtual GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax);
virtual void beginTransparencyGroup(GfxState * /*state*/, double * /*bbox*/,
GfxColorSpace * /*blendingColorSpace*/,
GBool /*isolated*/, GBool /*knockout*/,
GBool /*forSoftMask*/);
virtual void endTransparencyGroup(GfxState * /*state*/);
virtual void processLink(AnnotLink * al);
/*
@ -245,11 +252,12 @@ protected:
double print_scale (void) const { return 96.0 / DEFAULT_DPI / text_zoom_factor(); }
const Param & param;
Param & param;
////////////////////////////////////////////////////
// PDF states
////////////////////////////////////////////////////
int inTransparencyGroup;
// track the original (unscaled) values to determine scaling and merge lines
// current position
double cur_tx, cur_ty; // real text position, in text coords

View File

@ -62,4 +62,14 @@ GBool HTMLRenderer::axialShadedFill(GfxState *state, GfxAxialShading *shading, d
return true;
}
void HTMLRenderer::beginTransparencyGroup(GfxState *state, double *bbox,
GfxColorSpace *blendingColorSpace,
GBool isolated, GBool knockout,
GBool forSoftMask) {
inTransparencyGroup++;
}
void HTMLRenderer::endTransparencyGroup(GfxState *state) {
inTransparencyGroup--;
}
} // namespace pdf2htmlEX

View File

@ -66,10 +66,10 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, FontInfo & info)
auto * id = font->getID();
Object ref_obj;
ref_obj.initRef(id->num, id->gen);
ref_obj.fetch(xref, &font_obj);
ref_obj.free();
Object ref_obj(id->num, id->gen);
//ref_obj.initRef(id->num, id->gen);
font_obj = ref_obj.fetch(xref);
//ref_obj.free();
if(!font_obj.isDict())
{
@ -78,7 +78,8 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, FontInfo & info)
}
Dict * dict = font_obj.getDict();
if(dict->lookup("DescendantFonts", &font_obj2)->isArray())
font_obj2 = dict->lookup("DescendantFonts");
if(font_obj2.isArray())
{
if(font_obj2.arrayGetLength() == 0)
{
@ -86,27 +87,31 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, FontInfo & info)
}
else
{
if(font_obj2.arrayGetLength() > 1)
if(font_obj2.arrayGetLength() > 1) {
cerr << "TODO: multiple entries in DescendantFonts array" << endl;
if(font_obj2.arrayGet(0, &obj2)->isDict())
}
obj2 = font_obj2.arrayGet(0);
if(obj2.isDict())
{
dict = obj2.getDict();
}
}
}
if(!dict->lookup("FontDescriptor", &fontdesc_obj)->isDict())
fontdesc_obj = dict->lookup("FontDescriptor");
if(!fontdesc_obj.isDict())
{
cerr << "Cannot find FontDescriptor " << endl;
throw 0;
}
dict = fontdesc_obj.getDict();
if(dict->lookup("FontFile3", &obj)->isStream())
obj = dict->lookup("FontFile3");
if(obj.isStream())
{
if(obj.streamGetDict()->lookup("Subtype", &obj1)->isName())
obj1 = obj.streamGetDict()->lookup("Subtype");
if(obj1.isName())
{
subtype = obj1.getName();
if(subtype == "Type1C")
@ -132,19 +137,19 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, FontInfo & info)
cerr << "Invalid subtype in font descriptor" << endl;
throw 0;
}
}
else if (dict->lookup("FontFile2", &obj)->isStream())
{
suffix = ".ttf";
}
else if (dict->lookup("FontFile", &obj)->isStream())
{
suffix = ".pfa";
}
else
{
cerr << "Cannot find FontFile for dump" << endl;
throw 0;
} else {
obj = dict->lookup("FontFile2");
if (obj.isStream()) {
suffix = ".ttf";
} else {
obj = dict->lookup("FontFile");
if (obj.isStream()) {
suffix = ".pfa";
} else {
cerr << "Cannot find FontFile for dump" << endl;
throw 0;
}
}
}
if(suffix == "")
@ -175,13 +180,13 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, FontInfo & info)
cerr << "Something wrong when trying to dump font " << hex << fn_id << dec << endl;
}
obj2.free();
obj1.free();
obj.free();
//obj2.free();
//obj1.free();
//obj.free();
fontdesc_obj.free();
font_obj2.free();
font_obj.free();
//fontdesc_obj.free();
//font_obj2.free();
//font_obj.free();
return filepath;
}
@ -237,7 +242,7 @@ string HTMLRenderer::dump_type3_font (GfxFont * font, FontInfo & info)
surface = cairo_svg_surface_create(glyph_filename.c_str(), transformed_bbox_width * scale, transformed_bbox_height * scale);
cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2);
cairo_surface_set_fallback_resolution(surface, param.h_dpi, param.v_dpi);
cairo_surface_set_fallback_resolution(surface, param.actual_dpi, param.actual_dpi);
cairo_t * cr = cairo_create(surface);
// track the position of the origin
@ -373,6 +378,14 @@ string HTMLRenderer::dump_type3_font (GfxFont * font, FontInfo & info)
#endif
}
namespace {
void output_map_file_header(std::ostream& out) {
out << "glyph_code mapped_code unicode" << std::endl;
}
} // namespace
void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only)
{
if(param.debug)
@ -528,6 +541,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
ffw_reencode_glyph_order();
GfxCIDFont * _font = dynamic_cast<GfxCIDFont*>(font);
assert(_font != nullptr);
// To locate CID2GID for the font
// as in CairoFontEngine.cc
@ -574,6 +588,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
map_filename = (char*)str_fmt("%s/f%llx.map", param.tmp_dir.c_str(), info.id);
tmp_files.add(map_filename);
map_outf.open(map_filename);
output_map_file_header(map_outf);
}
unordered_set<int> codeset;
@ -650,6 +665,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
{
map_outf.close();
map_outf.open(map_filename);
output_map_file_header(map_outf);
}
continue;
}

View File

@ -41,12 +41,13 @@ using std::abs;
using std::cerr;
using std::endl;
HTMLRenderer::HTMLRenderer(const Param & param)
HTMLRenderer::HTMLRenderer(Param & param)
:OutputDev()
,param(param)
,html_text_page(param, all_manager)
,preprocessor(param)
,tmp_files(param)
,covered_text_detector(param)
,tracer(param)
{
if(!(param.debug))
@ -81,11 +82,11 @@ HTMLRenderer::HTMLRenderer(const Param & param)
all_manager.bottom .set_eps(EPS);
tracer.on_char_drawn =
[this](double * box) { covered_text_detector.add_char_bbox(box); };
[this](cairo_t *cairo, double * box) { covered_text_detector.add_char_bbox(cairo, box); };
tracer.on_char_clipped =
[this](double * box, bool partial) { covered_text_detector.add_char_bbox_clipped(box, partial); };
[this](cairo_t *cairo, double * box, int partial) { covered_text_detector.add_char_bbox_clipped(cairo, box, partial); };
tracer.on_non_char_drawn =
[this](double * box) { covered_text_detector.add_non_char_bbox(box); };
[this](cairo_t *cairo, double * box, int what) { covered_text_detector.add_non_char_bbox(cairo, box, what); };
}
HTMLRenderer::~HTMLRenderer()
@ -93,6 +94,8 @@ HTMLRenderer::~HTMLRenderer()
ffw_finalize();
}
#define MAX_DIMEN 9000
void HTMLRenderer::process(PDFDoc *doc)
{
cur_doc = doc;
@ -119,12 +122,22 @@ void HTMLRenderer::process(PDFDoc *doc)
int page_count = (param.last_page - param.first_page + 1);
for(int i = param.first_page; i <= param.last_page ; ++i)
{
param.actual_dpi = param.desired_dpi;
param.max_dpi = 72 * MAX_DIMEN / max(doc->getPageCropWidth(i), doc->getPageCropHeight(i));
if (param.actual_dpi > param.max_dpi) {
param.actual_dpi = param.max_dpi;
printf("Warning:Page %d clamped to %f DPI\n", i, param.actual_dpi);
}
if (param.tmp_file_size_limit != -1 && tmp_files.get_total_size() > param.tmp_file_size_limit * 1024) {
cerr << "Stop processing, reach max size\n";
if(param.quiet == 0)
cerr << "Stop processing, reach max size\n";
break;
}
cerr << "Working: " << (i-param.first_page) << "/" << page_count << '\r' << flush;
if (param.quiet == 0)
cerr << "Working: " << (i-param.first_page) << "/" << page_count << '\r' << flush;
if(param.split_pages)
{
@ -147,15 +160,21 @@ void HTMLRenderer::process(PDFDoc *doc)
false, // printing
nullptr, nullptr, nullptr, nullptr);
if (param.desired_dpi != param.actual_dpi) {
printf("Page %d DPI change %.1f => %.1f\n", i, param.desired_dpi, param.actual_dpi);
}
if(param.split_pages)
{
delete f_curpage;
f_curpage = nullptr;
}
}
if(page_count >= 0)
if(page_count >= 0 && param.quiet == 0)
cerr << "Working: " << page_count << "/" << page_count;
cerr << endl;
if(param.quiet == 0)
cerr << endl;
////////////////////////
// Process Outline
@ -167,7 +186,8 @@ void HTMLRenderer::process(PDFDoc *doc)
bg_renderer = nullptr;
fallback_bg_renderer = nullptr;
cerr << endl;
if(param.quiet == 0)
cerr << endl;
}
void HTMLRenderer::setDefaultCTM(double *ctm)

View File

@ -56,73 +56,70 @@ static string get_linkdest_detail_str(LinkDest * dest, Catalog * catalog, int &
// dec
sout << "[" << pageno;
if(dest)
switch(dest->getKind())
{
switch(dest->getKind())
{
case destXYZ:
{
sout << ",\"XYZ\",";
if(dest->getChangeLeft())
sout << (dest->getLeft());
else
sout << "null";
sout << ",";
if(dest->getChangeTop())
sout << (dest->getTop());
else
sout << "null";
sout << ",";
if(dest->getChangeZoom())
sout << (dest->getZoom());
else
sout << "null";
}
break;
case destFit:
sout << ",\"Fit\"";
break;
case destFitH:
sout << ",\"FitH\",";
if(dest->getChangeTop())
sout << (dest->getTop());
else
sout << "null";
break;
case destFitV:
sout << ",\"FitV\",";
case destXYZ:
{
sout << ",\"XYZ\",";
if(dest->getChangeLeft())
sout << (dest->getLeft());
else
sout << "null";
break;
case destFitR:
sout << ",\"FitR\","
<< (dest->getLeft()) << ","
<< (dest->getBottom()) << ","
<< (dest->getRight()) << ","
<< (dest->getTop());
break;
case destFitB:
sout << ",\"FitB\"";
break;
case destFitBH:
sout << ",\"FitBH\",";
sout << ",";
if(dest->getChangeTop())
sout << (dest->getTop());
else
sout << "null";
break;
case destFitBV:
sout << ",\"FitBV\",";
if(dest->getChangeLeft())
sout << (dest->getLeft());
sout << ",";
if(dest->getChangeZoom())
sout << (dest->getZoom());
else
sout << "null";
break;
default:
break;
}
}
break;
case destFit:
sout << ",\"Fit\"";
break;
case destFitH:
sout << ",\"FitH\",";
if(dest->getChangeTop())
sout << (dest->getTop());
else
sout << "null";
break;
case destFitV:
sout << ",\"FitV\",";
if(dest->getChangeLeft())
sout << (dest->getLeft());
else
sout << "null";
break;
case destFitR:
sout << ",\"FitR\","
<< (dest->getLeft()) << ","
<< (dest->getBottom()) << ","
<< (dest->getRight()) << ","
<< (dest->getTop());
break;
case destFitB:
sout << ",\"FitB\"";
break;
case destFitBH:
sout << ",\"FitBH\",";
if(dest->getChangeTop())
sout << (dest->getTop());
else
sout << "null";
break;
case destFitBV:
sout << ",\"FitBV\",";
if(dest->getChangeLeft())
sout << (dest->getLeft());
else
sout << "null";
break;
default:
break;
}
sout << "]";
@ -166,6 +163,7 @@ string HTMLRenderer::get_linkaction_str(LinkAction * action, string & detail)
case actionURI:
{
auto * real_action = dynamic_cast<LinkURI*>(action);
assert(real_action != nullptr);
dest_str = real_action->getURI()->getCString();
}
break;

View File

@ -104,6 +104,7 @@ void HTMLRenderer::clipToStrokePath(GfxState * state)
}
void HTMLRenderer::reset_state()
{
inTransparencyGroup = 0;
draw_text_scale = 1.0;
cur_font_size = 0.0;

View File

@ -33,16 +33,23 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
double cur_word_space = state->getWordSpace();
double cur_horiz_scaling = state->getHorizScaling();
bool drawChars = true;
// Writing mode fonts and Type 3 fonts are rendered as images
// I don't find a way to display writing mode fonts in HTML except for one div for each character, which is too costly
// For type 3 fonts, due to the font matrix, still it's hard to show it on HTML
if( (font == nullptr)
|| (font->getWMode())
|| ((font->getType() == fontType3) && (!param.process_type3))
if(state->getFont()
&& ( (state->getFont()->getWMode())
|| ((state->getFont()->getType() == fontType3) && (!param.process_type3))
|| (state->getRender() >= 4)
)
)
{
return;
// We still want to go through the loop to ensure characters are added to the covered_chars array
drawChars = false;
//printf("%d / %d / %d\n", state->getFont()->getWMode(), (state->getFont()->getType() == fontType3), state->getRender());
}
// see if the line has to be closed due to state change
@ -74,7 +81,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
while (len > 0)
{
auto n = font->getNextChar(p, len, &code, &u, &uLen, &ax, &ay, &ox, &oy);
HR_DEBUG(printf("HTMLRenderer::drawString:unicode=%lc(%d)\n", (wchar_t)u[0], u[0]));
HR_DEBUG(printf("HTMLRenderer::drawString:unicode=%lc(%d)\n", u ? (wchar_t)u[0] : ' ', u ? u[0] : -1));
if(!(equal(ox, 0) && equal(oy, 0)))
{
@ -82,7 +89,34 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
}
ddx = ax * cur_font_size + cur_letter_space;
ddy = ay * cur_font_size;
tracer.draw_char(state, dx, dy, ax, ay);
double width = 0, height = font->getAscent();
if (font->isCIDFont()) {
char buf[2];
buf[0] = (code >> 8) & 0xff;
buf[1] = (code & 0xff);
width = ((GfxCIDFont *)font)->getWidth(buf, 2);
} else {
width = ((Gfx8BitFont *)font)->getWidth(code);
}
if (width == 0 || height == 0) {
//cerr << "CID: " << font->isCIDFont() << ", char:" << code << ", width:" << width << ", ax:" << ax << ", height:" << height << ", ay:" << ay << endl;
}
if (width == 0) {
width = ax;
if (width == 0) {
width = 0.001;
}
}
if (height == 0) {
height = ay;
if (height == 0) {
height = 0.001;
}
}
tracer.draw_char(state, dx, dy, width, height, !drawChars || inTransparencyGroup);
bool is_space = false;
if (n == 1 && *p == ' ')
@ -99,6 +133,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
is_space = true;
}
if(is_space && (param.space_as_offset))
{
html_text_page.get_cur_line()->append_padding_char();
@ -158,7 +193,7 @@ bool HTMLRenderer::is_char_covered(int index)
{
std::cerr << "Warning: HTMLRenderer::is_char_covered: index out of bound: "
<< index << ", size: " << covered.size() <<endl;
return false;
return true; // Something's gone wrong so assume covered so at least something is output
}
return covered[index];
}

View File

@ -250,7 +250,10 @@ void HTMLTextLine::dump_text(ostream & out)
if(std::abs(target - space_off) <= param.h_eps)
{
Unicode u = ' ';
// Sometimes we guess wrong whether we have a valid space character, so ensure it is always hidden
out << "<span class=\"" << CSS::WHITESPACE_CN << "\">";
writeUnicodes(out, &u, 1);
out << "</span>";
actual_offset = space_off;
done = true;
}
@ -378,13 +381,12 @@ void HTMLTextLine::optimize_normal(std::vector<HTMLTextLine*> & lines)
new_offsets.reserve(offsets.size());
auto offset_iter1 = offsets.begin();
for(auto state_iter2 = states.begin(), state_iter1 = state_iter2++;
state_iter1 != states.end();
++state_iter1, ++state_iter2)
for(auto state_iter1 = states.begin(); state_iter1 != states.end(); ++state_iter1)
{
const auto state_iter2 = std::next(state_iter1);
const size_t text_idx1 = state_iter1->start_idx;
const size_t text_idx2 = (state_iter2 == states.end()) ? text.size() : state_iter2->start_idx;
size_t text_count = text_idx2 - text_idx1;
const size_t text_count = text_idx2 - text_idx1;
// there might be some offsets before the first state
while((offset_iter1 != offsets.end())

View File

@ -22,7 +22,10 @@ struct Param
double zoom;
double fit_width, fit_height;
int use_cropbox;
double h_dpi, v_dpi;
double desired_dpi;
double actual_dpi;
double max_dpi;
double text_dpi;
// output
int embed_css;
@ -79,6 +82,7 @@ struct Param
std::string tmp_dir;
int debug;
int proof;
int quiet;
std::string input_filename, output_filename;
};

View File

@ -45,7 +45,8 @@ void Preprocessor::process(PDFDoc * doc)
int page_count = (param.last_page - param.first_page + 1);
for(int i = param.first_page; i <= param.last_page ; ++i)
{
cerr << "Preprocessing: " << (i-param.first_page) << "/" << page_count << '\r' << flush;
if(param.quiet == 0)
cerr << "Preprocessing: " << (i - param.first_page) << "/" << page_count << '\r' << flush;
doc->displayPage(this, i, DEFAULT_DPI, DEFAULT_DPI,
0,
@ -54,9 +55,11 @@ void Preprocessor::process(PDFDoc * doc)
false, // printing
nullptr, nullptr, nullptr, nullptr);
}
if(page_count >= 0)
if(page_count >= 0 && param.quiet == 0)
cerr << "Preprocessing: " << page_count << "/" << page_count;
cerr << endl;
if(param.quiet == 0)
cerr << endl;
}
void Preprocessor::drawChar(GfxState *state, double x, double y,

View File

@ -43,7 +43,18 @@ public:
// install new_value into the map
// return the corresponding id
long long install(double new_value, double * actual_value_ptr = nullptr) {
auto iter = value_map.lower_bound(new_value - eps);
// DCRH: Fix for when eps check fails and yet map thinks the keys are the same
// (DEV1-RYR-LETTER example)
auto iter = value_map.find(new_value);
if (iter != value_map.end()) {
if(actual_value_ptr != nullptr)
*actual_value_ptr = iter->first;
return iter->second;
}
iter = value_map.lower_bound(new_value - eps);
if((iter != value_map.end()) && (std::abs(iter->first - new_value) <= eps))
{
if(actual_value_ptr != nullptr)
@ -84,7 +95,7 @@ protected:
// Be careful about the mixed usage of Matrix and const double *
// the input is usually double *, which might be changed, so we have to copy the content out
// in the map we use Matrix instead of double * such that the array may be automatically release when deconstructing
// in the map we use Matrix instead of double * such that the array may be automatically release when destructing
template <class Imp>
class StateManager<Matrix, Imp>
{
@ -96,7 +107,7 @@ public:
// return id
long long install(const double * new_value) {
Matrix m;
memcpy(m.m, new_value, sizeof(m.m));
memcpy(m.m, new_value, 4 * sizeof(double));
auto iter = value_map.lower_bound(m);
if((iter != value_map.end()) && (tm_equal(m.m, iter->first.m, 4)))
{

View File

@ -111,7 +111,7 @@ void prepare_directories()
errno = 0;
unique_ptr<char> pBuf(new char[tmp_dir.size() + 1]);
unique_ptr<char[]> pBuf(new char[tmp_dir.size() + 1]);
strcpy(pBuf.get(), tmp_dir.c_str());
auto p = mkdtemp(pBuf.get());
if(p == nullptr)
@ -139,8 +139,7 @@ void parse_options (int argc, char **argv)
.add("fit-width", &param.fit_width, 0, "fit width to <fp> pixels", true)
.add("fit-height", &param.fit_height, 0, "fit height to <fp> pixels", true)
.add("use-cropbox", &param.use_cropbox, 1, "use CropBox instead of MediaBox")
.add("hdpi", &param.h_dpi, 144.0, "horizontal resolution for graphics in DPI")
.add("vdpi", &param.v_dpi, 144.0, "vertical resolution for graphics in DPI")
.add("dpi", &param.desired_dpi, 144.0, "Resolution for graphics in DPI")
// output files
.add("embed", "specify which elements should be embedded into output", embed_parser, true)
@ -160,7 +159,7 @@ void parse_options (int argc, char **argv)
.add("process-form", &param.process_form, 0, "include text fields and radio buttons")
.add("printing", &param.printing, 1, "enable printing support")
.add("fallback", &param.fallback, 0, "output in fallback mode")
.add("tmp-file-size-limit", &param.tmp_file_size_limit, -1, "Maximum size (in KB) used by temporary files, -1 for no limit.")
.add("tmp-file-size-limit", &param.tmp_file_size_limit, -1, "Maximum size (in KB) used by temporary files, -1 for no limit")
// fonts
.add("embed-external-font", &param.embed_external_font, 1, "embed local match for external fonts")
@ -181,13 +180,14 @@ void parse_options (int argc, char **argv)
.add("space-as-offset", &param.space_as_offset, 0, "treat space characters as offsets")
.add("tounicode", &param.tounicode, 0, "how to handle ToUnicode CMaps (0=auto, 1=force, -1=ignore)")
.add("optimize-text", &param.optimize_text, 0, "try to reduce the number of HTML elements used for text")
.add("correct-text-visibility", &param.correct_text_visibility, 0, "try to detect texts covered by other graphics and properly arrange them")
.add("correct-text-visibility", &param.correct_text_visibility, 1, "0: Don't do text visibility checks. 1: Fully occluded text handled. 2: Partially occluded text handled")
.add("covered-text-dpi", &param.text_dpi, 300, "Rendering DPI to use if correct-text-visibility == 2 and there is partially covered text on the page")
// background image
.add("bg-format", &param.bg_format, "png", "specify background image format")
.add("svg-node-count-limit", &param.svg_node_count_limit, -1, "if node count in a svg background image exceeds this limit,"
" fall back this page to bitmap background; negative value means no limit.")
.add("svg-embed-bitmap", &param.svg_embed_bitmap, 1, "1: embed bitmaps in svg background; 0: dump bitmaps to external files if possible.")
" fall back this page to bitmap background; negative value means no limit")
.add("svg-embed-bitmap", &param.svg_embed_bitmap, 1, "1: embed bitmaps in svg background; 0: dump bitmaps to external files if possible")
// encryption
.add("owner-password,o", &param.owner_password, "", "owner password (for encrypted files)", true)
@ -196,11 +196,12 @@ void parse_options (int argc, char **argv)
// misc.
.add("clean-tmp", &param.clean_tmp, 1, "remove temporary files after conversion")
.add("tmp-dir", &param.tmp_dir, param.tmp_dir, "specify the location of temporary directory.")
.add("tmp-dir", &param.tmp_dir, param.tmp_dir, "specify the location of temporary directory")
.add("data-dir", &param.data_dir, param.data_dir, "specify data directory")
.add("poppler-data-dir", &param.poppler_data_dir, param.poppler_data_dir, "specify poppler data directory")
.add("debug", &param.debug, 0, "print debugging information")
.add("proof", &param.proof, 0, "texts are drawn on both text layer and background for proof.")
.add("proof", &param.proof, 0, "texts are drawn on both text layer and background for proof")
.add("quiet", &param.quiet, 0, "perform operations quietly")
// meta
.add("version,v", "print copyright and version info", &show_version_and_exit)

View File

@ -126,6 +126,15 @@ void ffw_load_font(const char * filename)
assert(font->fv);
cur_fv = font->fv;
// If we are a composite font, then ensure the cidmaster has the same ascent/descent values as the first subfont.
// If there are more than one subfont then what do we do???
if (cur_fv->cidmaster && (cur_fv->cidmaster->ascent != cur_fv->sf->ascent || cur_fv->cidmaster->descent != cur_fv->sf->descent)) {
printf("ffw_load_font:Warning ascent/descent mismatch for CID font: %d/%d => %d/%d\n",
cur_fv->cidmaster->ascent, cur_fv->cidmaster->descent, cur_fv->sf->ascent, cur_fv->sf->descent);
cur_fv->cidmaster->ascent = cur_fv->sf->ascent;
cur_fv->cidmaster->descent = cur_fv->sf->descent;
}
}
/*

View File

@ -20,10 +20,10 @@ using std::ostream;
Unicode map_to_private(CharCode code)
{
Unicode private_mapping = (Unicode)(code + 0xE000);
if(private_mapping > 0xF8FF)
Unicode private_mapping = (Unicode)(code + 0xE600); // DCRH: Stupid mobile safari uses code points in 0xe000 - 0xe5ff range to switch to emoji font
if(private_mapping > 0xF65F) // DCRH: More emoji-avoiding for mobile safari (see http://www.fileformat.info/info/unicode/block/private_use_area/utf8test.htm)
{
private_mapping = (Unicode)((private_mapping - 0xF8FF) + 0xF0000);
private_mapping = (Unicode)((private_mapping - 0xF65F) + 0xF0000);
if(private_mapping > 0xFFFFD)
{
private_mapping = (Unicode)((private_mapping - 0xFFFFD) + 0x100000);
@ -40,7 +40,9 @@ Unicode unicode_from_font (CharCode code, GfxFont * font)
{
if(!font->isCIDFont())
{
char * cname = dynamic_cast<Gfx8BitFont*>(font)->getCharName(code);
auto * font2 = dynamic_cast<Gfx8BitFont*>(font);
assert(font2 != nullptr);
char * cname = font2->getCharName(code);
// may be untranslated ligature
if(cname)
{

View File

@ -59,6 +59,13 @@ namespace pdf2htmlEX {
inline bool is_illegal_unicode(Unicode c)
{
return (c < 0x20) || (c >= 0x7F && c <= 0xA0) || (c == 0xAD)
|| (c >= 0x300 && c <= 0x36f) // DCRH Combining diacriticals
|| (c >= 0x1ab0 && c <= 0x1aff) // DCRH Combining diacriticals
|| (c >= 0x1dc0 && c <= 0x1dff) // DCRH Combining diacriticals
|| (c >= 0x20d0 && c <= 0x20ff) // DCRH Combining diacriticals
|| (c >= 0xfe20 && c <= 0xfe2f) // DCRH Combining diacriticals
|| (c >= 0x900 && c <= 0x97f) // DCRH Devanagari - Webkit struggles with spacing for these code points
|| (c >= 0xa00 && c <= 0xa7f) // DCRH Gurmukhi - Webkit struggles with spacing for these code points
|| (c == 0x061C) || (c == 0x1361)
|| (c >= 0x200B && c <= 0x200F) || (c == 0x2028) || (c == 0x2029)
|| (c >= 0x202A && c <= 0x202E) || (c >= 0x2066 && c <= 0x2069)