1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 04:50:09 +00:00

New master (#2)

* Show header in font map files

* fix a usage of unique_ptr with array

* Added '--quiet' argument to hide progress messages (resolves #503)

* Revert cout messages to cerr (see #622)

* bump version

* fix build; fix some coverity warnings

* Many bug fixes and improvements, including:

- Incorporated latest Cairo files from cairo-0.15.2
- Moved build to out-of-source
- Added clean script
- Rewritten correct_text_visibility option to improve accuracy
- Transparent characters drawn on background layer
- Improved bad unicode detection

* Many bug fixes and improvements, including:

- Incorporated latest Cairo files from cairo-0.15.2
- Moved build to out-of-source
- Added clean script
- Rewritten correct_text_visibility option to improve accuracy
- Transparent characters drawn on background layer
- Improved bad unicode detection

* Rationlise DPI to single number.
Implement actual_dpi - clamp maximum background image size in cases of huge PDF pages

* DPI fixes - increase DPI when partially covered text to covered-text-dpi
Add font-style italic for oblique fonts
Reduce char bbox for occlusion tests

* Don't shrink bbox - not required if zoom=25 used

* Ignore occlusion from stroke/fill with opacity < 0.5
Better compute char bbox for occlusion
Use 10% inset for char bbox for occlusion
Back out adding font-weight: bold to potentially bold fonts
Fix bug to ensure CID ascent/descent matches subfont values

* Removed zero char logging

* Remove forced italic - missing italic is due to fontforge bug which needs fixing

* Typos fixed, readme updated

* Typos

* Increase maximum background image width
Fix private use range to avoid stupid mobile safari switching to emoji font

* included -pthread switch to link included 3rdparty poppler files.

* Updated files from poppler 0.59.0 and adjusted includes.

* Support updated "Object" class from poppler 0.59.0
This commit is contained in:
Trent Petersen 2018-01-10 13:31:38 -06:00 committed by GitHub
parent f12fc15515
commit 9ed21007e5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
34 changed files with 1397 additions and 773 deletions

View File

@ -17,16 +17,16 @@
// Copyright (C) 2005-2007 Jeff Muizelaar <jeff@infidigm.net> // Copyright (C) 2005-2007 Jeff Muizelaar <jeff@infidigm.net>
// Copyright (C) 2005, 2006 Kristian Høgsberg <krh@redhat.com> // Copyright (C) 2005, 2006 Kristian Høgsberg <krh@redhat.com>
// Copyright (C) 2005 Martin Kretzschmar <martink@gnome.org> // Copyright (C) 2005 Martin Kretzschmar <martink@gnome.org>
// Copyright (C) 2005, 2009, 2012, 2013 Albert Astals Cid <aacid@kde.org>
// Copyright (C) 2006, 2007, 2010, 2011 Carlos Garcia Campos <carlosgc@gnome.org> // Copyright (C) 2006, 2007, 2010, 2011 Carlos Garcia Campos <carlosgc@gnome.org>
// Copyright (C) 2007 Koji Otani <sho@bbr.jp> // Copyright (C) 2007 Koji Otani <sho@bbr.jp>
// Copyright (C) 2008, 2009 Chris Wilson <chris@chris-wilson.co.uk> // Copyright (C) 2008, 2009 Chris Wilson <chris@chris-wilson.co.uk>
// Copyright (C) 2008, 2012 Adrian Johnson <ajohnson@redneon.com> // Copyright (C) 2008, 2012, 2014, 2016 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2009 Darren Kenny <darren.kenny@sun.com> // Copyright (C) 2009 Darren Kenny <darren.kenny@sun.com>
// Copyright (C) 2010 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp> // Copyright (C) 2010 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
// Copyright (C) 2010 Jan Kümmel <jan+freedesktop@snorc.org> // Copyright (C) 2010 Jan Kümmel <jan+freedesktop@snorc.org>
// Copyright (C) 2012 Hib Eris <hib@hiberis.nl> // Copyright (C) 2012 Hib Eris <hib@hiberis.nl>
// Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de> // Copyright (C) 2013 Thomas Freitag <Thomas.Freitag@alfa.de>
// Copyright (C) 2015, 2016 Jason Crain <jason@aquaticape.us>
// //
// To see a description of the changes please see the Changelog file that // To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git // came with your tarball or type make ChangeLog if you are building from git
@ -58,16 +58,11 @@
#pragma implementation #pragma implementation
#endif #endif
/*
* pdf2htmlEX: disabled multi thread
#if MULTITHREADED #if MULTITHREADED
# define fontEngineLocker() MutexLocker locker(&mutex) # define fontEngineLocker() MutexLocker locker(&mutex)
#else #else
*/
# define fontEngineLocker() # define fontEngineLocker()
/*
#endif #endif
*/
//------------------------------------------------------------------------ //------------------------------------------------------------------------
// CairoFont // CairoFont
@ -116,7 +111,7 @@ CairoFont::getGlyph(CharCode code,
double double
CairoFont::getSubstitutionCorrection(GfxFont *gfxFont) CairoFont::getSubstitutionCorrection(GfxFont *gfxFont)
{ {
double w1, w2; double w1, w2, w3;
CharCode code; CharCode code;
char *name; char *name;
@ -146,7 +141,8 @@ CairoFont::getSubstitutionCorrection(GfxFont *gfxFont)
cairo_font_options_destroy(options); cairo_font_options_destroy(options);
w2 = extents.x_advance; w2 = extents.x_advance;
} }
if (!gfxFont->isSymbolic()) { w3 = ((Gfx8BitFont *)gfxFont)->getWidth(0);
if (!gfxFont->isSymbolic() && w2 > 0 && w1 > w3) {
// if real font is substantially narrower than substituted // if real font is substantially narrower than substituted
// font, reduce the font size accordingly // font, reduce the font size accordingly
if (w1 > 0.01 && w1 < 0.9 * w2) { if (w1 > 0.01 && w1 < 0.9 * w2) {
@ -260,12 +256,16 @@ _ft_done_face (void *closure)
else else
_ft_open_faces = data->next; _ft_open_faces = data->next;
if (data->fd != -1) {
#if defined(__SUNPRO_CC) && defined(__sun) && defined(__SVR4) #if defined(__SUNPRO_CC) && defined(__sun) && defined(__SVR4)
munmap ((char*)data->bytes, data->size); munmap ((char*)data->bytes, data->size);
#else #else
munmap (data->bytes, data->size); munmap (data->bytes, data->size);
#endif #endif
close (data->fd); close (data->fd);
} else {
gfree (data->bytes);
}
FT_Done_Face (data->face); FT_Done_Face (data->face);
gfree (data); gfree (data);
@ -322,6 +322,8 @@ _ft_new_face (FT_Library lib,
munmap (tmpl.bytes, tmpl.size); munmap (tmpl.bytes, tmpl.size);
#endif #endif
close (tmpl.fd); close (tmpl.fd);
} else {
gfree (tmpl.bytes);
} }
*face_out = l->face; *face_out = l->face;
*font_face_out = cairo_font_face_reference (l->font_face); *font_face_out = cairo_font_face_reference (l->font_face);
@ -399,7 +401,7 @@ CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref,
GfxFontType fontType; GfxFontType fontType;
GfxFontLoc *fontLoc; GfxFontLoc *fontLoc;
char **enc; char **enc;
char *name; const char *name;
FoFiTrueType *ff; FoFiTrueType *ff;
FoFiType1C *ff1c; FoFiType1C *ff1c;
Ref ref; Ref ref;
@ -408,7 +410,7 @@ CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref,
int *codeToGID; int *codeToGID;
Guint codeToGIDLen; Guint codeToGIDLen;
codeToGID = NULL; codeToGID = NULL;
codeToGIDLen = 0; codeToGIDLen = 0;
font_data = NULL; font_data = NULL;
@ -417,12 +419,11 @@ CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref,
fileNameC = NULL; fileNameC = NULL;
GBool substitute = gFalse; GBool substitute = gFalse;
ref = *gfxFont->getID(); ref = *gfxFont->getID();
fontType = gfxFont->getType(); fontType = gfxFont->getType();
// pdf2htmlEX: changed gFlase to nullptr if (!(fontLoc = gfxFont->locateFont(xref, NULL))) {
if (!(fontLoc = gfxFont->locateFont(xref, nullptr))) {
error(errSyntaxError, -1, "Couldn't find a font for '{0:s}'", error(errSyntaxError, -1, "Couldn't find a font for '{0:s}'",
gfxFont->getName() ? gfxFont->getName()->getCString() gfxFont->getName() ? gfxFont->getName()->getCString()
: "(unnamed)"); : "(unnamed)");
@ -454,15 +455,26 @@ CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref,
error(errSyntaxError, -1, "could not create type1 face"); error(errSyntaxError, -1, "could not create type1 face");
goto err2; goto err2;
} }
enc = ((Gfx8BitFont *)gfxFont)->getEncoding(); enc = ((Gfx8BitFont *)gfxFont)->getEncoding();
codeToGID = (int *)gmallocn(256, sizeof(int)); codeToGID = (int *)gmallocn(256, sizeof(int));
codeToGIDLen = 256; codeToGIDLen = 256;
for (i = 0; i < 256; ++i) { for (i = 0; i < 256; ++i) {
codeToGID[i] = 0; codeToGID[i] = 0;
if ((name = enc[i])) { if ((name = enc[i])) {
codeToGID[i] = FT_Get_Name_Index(face, name); codeToGID[i] = FT_Get_Name_Index(face, (char*)name);
if (codeToGID[i] == 0) {
Unicode u;
u = globalParams->mapNameToUnicodeText (name);
codeToGID[i] = FT_Get_Char_Index (face, u);
}
if (codeToGID[i] == 0) {
name = GfxFont::getAlternateName(name);
if (name) {
codeToGID[i] = FT_Get_Name_Index(face, (char*)name);
}
}
} }
} }
break; break;
@ -491,6 +503,7 @@ CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref,
codeToGIDLen = n; codeToGIDLen = n;
/* Fall through */ /* Fall through */
case fontTrueType: case fontTrueType:
case fontTrueTypeOT:
if (font_data != NULL) { if (font_data != NULL) {
ff = FoFiTrueType::make(font_data, font_data_len); ff = FoFiTrueType::make(font_data, font_data_len);
} else { } else {
@ -501,7 +514,7 @@ CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref,
goto err2; goto err2;
} }
/* This might be set already for the CIDType2 case */ /* This might be set already for the CIDType2 case */
if (fontType == fontTrueType) { if (fontType == fontTrueType || fontType == fontTrueTypeOT) {
codeToGID = ((Gfx8BitFont *)gfxFont)->getCodeToGIDMap(ff); codeToGID = ((Gfx8BitFont *)gfxFont)->getCodeToGIDMap(ff);
codeToGIDLen = 256; codeToGIDLen = 256;
} }
@ -511,7 +524,7 @@ CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref,
goto err2; goto err2;
} }
break; break;
case fontCIDType0: case fontCIDType0:
case fontCIDType0C: case fontCIDType0C:
@ -532,13 +545,45 @@ CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref,
} }
if (! _ft_new_face (lib, fileNameC, font_data, font_data_len, &face, &font_face)) { if (! _ft_new_face (lib, fileNameC, font_data, font_data_len, &face, &font_face)) {
gfree(codeToGID);
codeToGID = NULL;
error(errSyntaxError, -1, "could not create cid face\n"); error(errSyntaxError, -1, "could not create cid face\n");
goto err2; goto err2;
} }
break; break;
case fontCIDType0COT:
codeToGID = NULL;
n = 0;
if (((GfxCIDFont *)gfxFont)->getCIDToGID()) {
n = ((GfxCIDFont *)gfxFont)->getCIDToGIDLen();
if (n) {
codeToGID = (int *)gmallocn(n, sizeof(int));
memcpy(codeToGID, ((GfxCIDFont *)gfxFont)->getCIDToGID(),
n * sizeof(int));
}
}
codeToGIDLen = n;
if (!codeToGID) {
if (!useCIDs) {
if (font_data != NULL) {
ff = FoFiTrueType::make(font_data, font_data_len);
} else {
ff = FoFiTrueType::load(fileNameC);
}
if (ff) {
if (ff->isOpenTypeCFF()) {
codeToGID = ff->getCIDToGIDMap((int *)&codeToGIDLen);
}
delete ff;
}
}
}
if (! _ft_new_face (lib, fileNameC, font_data, font_data_len, &face, &font_face)) {
error(errSyntaxError, -1, "could not create cid (OT) face\n");
goto err2;
}
break;
default: default:
fprintf (stderr, "font type %d not handled\n", (int)fontType); fprintf (stderr, "font type %d not handled\n", (int)fontType);
goto err2; goto err2;
@ -554,6 +599,8 @@ CairoFreeTypeFont *CairoFreeTypeFont::create(GfxFont *gfxFont, XRef *xref,
err2: err2:
/* hmm? */ /* hmm? */
delete fontLoc; delete fontLoc;
gfree (codeToGID);
gfree (font_data);
fprintf (stderr, "some font thing failed\n"); fprintf (stderr, "some font thing failed\n");
return NULL; return NULL;
} }
@ -659,7 +706,8 @@ _render_type3_glyph (cairo_scaled_font_t *scaled_font,
output_dev->startDoc(info->doc, info->fontEngine); output_dev->startDoc(info->doc, info->fontEngine);
output_dev->startPage (1, gfx->getState(), gfx->getXRef()); output_dev->startPage (1, gfx->getState(), gfx->getXRef());
output_dev->setInType3Char(gTrue); output_dev->setInType3Char(gTrue);
gfx->display(charProcs->getVal(glyph, &charProc)); charProc = charProcs->getVal(glyph);
gfx->display(&charProc);
output_dev->getType3GlyphWidth (&wx, &wy); output_dev->getType3GlyphWidth (&wx, &wy);
cairo_matrix_transform_distance (&matrix, &wx, &wy); cairo_matrix_transform_distance (&matrix, &wx, &wy);
@ -678,7 +726,6 @@ _render_type3_glyph (cairo_scaled_font_t *scaled_font,
delete gfx; delete gfx;
delete output_dev; delete output_dev;
charProc.free();
return CAIRO_STATUS_SUCCESS; return CAIRO_STATUS_SUCCESS;
} }
@ -762,33 +809,27 @@ CairoFontEngine::CairoFontEngine(FT_Library libA) {
for (i = 0; i < cairoFontCacheSize; ++i) { for (i = 0; i < cairoFontCacheSize; ++i) {
fontCache[i] = NULL; fontCache[i] = NULL;
} }
FT_Int major, minor, patch; FT_Int major, minor, patch;
// as of FT 2.1.8, CID fonts are indexed by CID instead of GID // as of FT 2.1.8, CID fonts are indexed by CID instead of GID
FT_Library_Version(lib, &major, &minor, &patch); FT_Library_Version(lib, &major, &minor, &patch);
useCIDs = major > 2 || useCIDs = major > 2 ||
(major == 2 && (minor > 1 || (minor == 1 && patch > 7))); (major == 2 && (minor > 1 || (minor == 1 && patch > 7)));
/*
* pdf2htmlEX: disabled multi thread
#if MULTITHREADED #if MULTITHREADED
gInitMutex(&mutex); gInitMutex(&mutex);
#endif #endif
*/
} }
CairoFontEngine::~CairoFontEngine() { CairoFontEngine::~CairoFontEngine() {
int i; int i;
for (i = 0; i < cairoFontCacheSize; ++i) { for (i = 0; i < cairoFontCacheSize; ++i) {
if (fontCache[i]) if (fontCache[i])
delete fontCache[i]; delete fontCache[i];
} }
/*
* pdf2htmlEX: disabled multi thread
#if MULTITHREADED #if MULTITHREADED
gDestroyMutex(&mutex); gDestroyMutex(&mutex);
#endif #endif
*/
} }
CairoFont * CairoFont *
@ -797,7 +838,7 @@ CairoFontEngine::getFont(GfxFont *gfxFont, PDFDoc *doc, GBool printing, XRef *xr
Ref ref; Ref ref;
CairoFont *font; CairoFont *font;
GfxFontType fontType; GfxFontType fontType;
fontEngineLocker(); fontEngineLocker();
ref = *gfxFont->getID(); ref = *gfxFont->getID();
@ -811,7 +852,7 @@ CairoFontEngine::getFont(GfxFont *gfxFont, PDFDoc *doc, GBool printing, XRef *xr
return font; return font;
} }
} }
fontType = gfxFont->getType(); fontType = gfxFont->getType();
if (fontType == fontType3) if (fontType == fontType3)
font = CairoType3Font::create (gfxFont, doc, this, printing, xref); font = CairoType3Font::create (gfxFont, doc, this, printing, xref);

View File

@ -74,7 +74,7 @@ protected:
class CairoFreeTypeFont : public CairoFont { class CairoFreeTypeFont : public CairoFont {
public: public:
static CairoFreeTypeFont *create(GfxFont *gfxFont, XRef *xref, FT_Library lib, GBool useCIDs); static CairoFreeTypeFont *create(GfxFont *gfxFont, XRef *xref, FT_Library lib, GBool useCIDs);
virtual ~CairoFreeTypeFont(); ~CairoFreeTypeFont();
private: private:
CairoFreeTypeFont(Ref ref, cairo_font_face_t *cairo_font_face, CairoFreeTypeFont(Ref ref, cairo_font_face_t *cairo_font_face,
@ -88,9 +88,9 @@ public:
static CairoType3Font *create(GfxFont *gfxFont, PDFDoc *doc, static CairoType3Font *create(GfxFont *gfxFont, PDFDoc *doc,
CairoFontEngine *fontEngine, CairoFontEngine *fontEngine,
GBool printing, XRef *xref); GBool printing, XRef *xref);
virtual ~CairoType3Font(); ~CairoType3Font();
virtual GBool matches(Ref &other, GBool printing); GBool matches(Ref &other, GBool printing) override;
private: private:
CairoType3Font(Ref ref, PDFDoc *doc, CairoType3Font(Ref ref, PDFDoc *doc,
@ -121,12 +121,9 @@ private:
CairoFont *fontCache[cairoFontCacheSize]; CairoFont *fontCache[cairoFontCacheSize];
FT_Library lib; FT_Library lib;
GBool useCIDs; GBool useCIDs;
/*
* pdf2htmlEX: disabled multi thread
#if MULTITHREADED #if MULTITHREADED
GooMutex mutex; GooMutex mutex;
#endif #endif
*/
}; };
#endif #endif

File diff suppressed because it is too large Load Diff

View File

@ -18,9 +18,11 @@
// Copyright (C) 2005, 2006 Kristian Høgsberg <krh@redhat.com> // Copyright (C) 2005, 2006 Kristian Høgsberg <krh@redhat.com>
// Copyright (C) 2005 Nickolay V. Shmyrev <nshmyrev@yandex.ru> // Copyright (C) 2005 Nickolay V. Shmyrev <nshmyrev@yandex.ru>
// Copyright (C) 2006-2011, 2013 Carlos Garcia Campos <carlosgc@gnome.org> // Copyright (C) 2006-2011, 2013 Carlos Garcia Campos <carlosgc@gnome.org>
// Copyright (C) 2008, 2009, 2011-2013 Adrian Johnson <ajohnson@redneon.com> // Copyright (C) 2008, 2009, 2011-2016 Adrian Johnson <ajohnson@redneon.com>
// Copyright (C) 2008 Michael Vrable <mvrable@cs.ucsd.edu> // Copyright (C) 2008 Michael Vrable <mvrable@cs.ucsd.edu>
// Copyright (C) 2010-2013 Thomas Freitag <Thomas.Freitag@alfa.de> // Copyright (C) 2010-2013 Thomas Freitag <Thomas.Freitag@alfa.de>
// Copyright (C) 2015 Suzuki Toshiya <mpsuzuki@hiroshima-u.ac.jp>
// Copyright (C) 2016 Jason Crain <jason@aquaticape.us>
// //
// To see a description of the changes please see the Changelog file that // To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git // came with your tarball or type make ChangeLog if you are building from git
@ -63,14 +65,14 @@ public:
// Set the image cairo surface // Set the image cairo surface
void setImage (cairo_surface_t *image); void setImage (cairo_surface_t *image);
// Get the image cairo surface // Get the image cairo surface
cairo_surface_t *getImage () const { return image; } cairo_surface_t *getImage () const { return image; }
// Get the image rectangle // Get the image rectangle
void getRect (double *xa1, double *ya1, double *xa2, double *ya2) void getRect (double *xa1, double *ya1, double *xa2, double *ya2)
{ *xa1 = x1; *ya1 = y1; *xa2 = x2; *ya2 = y2; } { *xa1 = x1; *ya1 = y1; *xa2 = x2; *ya2 = y2; }
private: private:
cairo_surface_t *image; // image cairo surface cairo_surface_t *image; // image cairo surface
double x1, y1; // upper left corner double x1, y1; // upper left corner
@ -95,114 +97,125 @@ public:
// Does this device use upside-down coordinates? // Does this device use upside-down coordinates?
// (Upside-down means (0,0) is the top left corner of the page.) // (Upside-down means (0,0) is the top left corner of the page.)
virtual GBool upsideDown() { return gTrue; } GBool upsideDown() override { return gTrue; }
// Does this device use drawChar() or drawString()? // Does this device use drawChar() or drawString()?
virtual GBool useDrawChar() { return gTrue; } GBool useDrawChar() override { return gTrue; }
// Does this device use tilingPatternFill()? If this returns false, // Does this device use tilingPatternFill()? If this returns false,
// tiling pattern fills will be reduced to a series of other drawing // tiling pattern fills will be reduced to a series of other drawing
// operations. // operations.
virtual GBool useTilingPatternFill() { return gTrue; } GBool useTilingPatternFill() override { return gTrue; }
// Does this device use functionShadedFill(), axialShadedFill(), and // Does this device use functionShadedFill(), axialShadedFill(), and
// radialShadedFill()? If this returns false, these shaded fills // radialShadedFill()? If this returns false, these shaded fills
// will be reduced to a series of other drawing operations. // will be reduced to a series of other drawing operations.
#if CAIRO_VERSION >= CAIRO_VERSION_ENCODE(1, 12, 0) #if CAIRO_VERSION >= CAIRO_VERSION_ENCODE(1, 12, 0)
virtual GBool useShadedFills(int type) { return type <= 7; } GBool useShadedFills(int type) override { return type <= 7; }
#else #else
virtual GBool useShadedFills(int type) { return type < 4; } GBool useShadedFills(int type) override { return type > 1 && type < 4; }
#endif #endif
// Does this device use FillColorStop()? // Does this device use FillColorStop()?
virtual GBool useFillColorStop() { return gTrue; } GBool useFillColorStop() override { return gTrue; }
// Does this device use beginType3Char/endType3Char? Otherwise, // Does this device use beginType3Char/endType3Char? Otherwise,
// text in Type 3 fonts will be drawn with drawChar/drawString. // text in Type 3 fonts will be drawn with drawChar/drawString.
virtual GBool interpretType3Chars() { return gFalse; } GBool interpretType3Chars() override { return gFalse; }
// Does this device need to clip pages to the crop box even when the
// box is the crop box?
GBool needClipToCropBox() override { return gTrue; }
// Does this device need to clip pages to the crop box even when the
// box is the crop box?
virtual GBool needClipToCropBox() { return gTrue; }
//----- initialization and control //----- initialization and control
// Start a page. // Start a page.
virtual void startPage(int pageNum, GfxState *state, XRef *xref); void startPage(int pageNum, GfxState *state, XRef *xref) override;
// End a page. // End a page.
virtual void endPage(); void endPage() override;
//----- save/restore graphics state //----- save/restore graphics state
virtual void saveState(GfxState *state); void saveState(GfxState *state) override;
virtual void restoreState(GfxState *state); void restoreState(GfxState *state) override;
//----- update graphics state //----- update graphics state
virtual void updateAll(GfxState *state); void updateAll(GfxState *state) override;
virtual void setDefaultCTM(double *ctm); void setDefaultCTM(double *ctm) override;
virtual void updateCTM(GfxState *state, double m11, double m12, void updateCTM(GfxState *state, double m11, double m12,
double m21, double m22, double m31, double m32); double m21, double m22, double m31, double m32) override;
virtual void updateLineDash(GfxState *state); void updateLineDash(GfxState *state) override;
virtual void updateFlatness(GfxState *state); void updateFlatness(GfxState *state) override;
virtual void updateLineJoin(GfxState *state); void updateLineJoin(GfxState *state) override;
virtual void updateLineCap(GfxState *state); void updateLineCap(GfxState *state) override;
virtual void updateMiterLimit(GfxState *state); void updateMiterLimit(GfxState *state) override;
virtual void updateLineWidth(GfxState *state); void updateLineWidth(GfxState *state) override;
virtual void updateFillColor(GfxState *state); void updateFillColor(GfxState *state) override;
virtual void updateStrokeColor(GfxState *state); void updateStrokeColor(GfxState *state) override;
virtual void updateFillOpacity(GfxState *state); void updateFillOpacity(GfxState *state) override;
virtual void updateStrokeOpacity(GfxState *state); void updateStrokeOpacity(GfxState *state) override;
virtual void updateFillColorStop(GfxState *state, double offset); void updateFillColorStop(GfxState *state, double offset) override;
virtual void updateBlendMode(GfxState *state); void updateBlendMode(GfxState *state) override;
//----- update text state //----- update text state
virtual void updateFont(GfxState *state); void updateFont(GfxState *state) override;
//----- path painting //----- path painting
virtual void stroke(GfxState *state); void stroke(GfxState *state) override;
virtual void fill(GfxState *state); void fill(GfxState *state) override;
virtual void eoFill(GfxState *state); void eoFill(GfxState *state) override;
virtual void clipToStrokePath(GfxState *state); void clipToStrokePath(GfxState *state) override;
virtual GBool tilingPatternFill(GfxState *state, Gfx *gfx, Catalog *cat, Object *str, GBool tilingPatternFill(GfxState *state, Gfx *gfx, Catalog *cat, Object *str,
double *pmat, int paintType, int tilingType, Dict *resDict, double *pmat, int paintType, int tilingType, Dict *resDict,
double *mat, double *bbox, double *mat, double *bbox,
int x0, int y0, int x1, int y1, int x0, int y0, int x1, int y1,
double xStep, double yStep); double xStep, double yStep) override;
virtual GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax);
virtual GBool axialShadedSupportExtend(GfxState *state, GfxAxialShading *shading);
virtual GBool radialShadedFill(GfxState *state, GfxRadialShading *shading, double sMin, double sMax);
virtual GBool radialShadedSupportExtend(GfxState *state, GfxRadialShading *shading);
#if CAIRO_VERSION >= CAIRO_VERSION_ENCODE(1, 12, 0) #if CAIRO_VERSION >= CAIRO_VERSION_ENCODE(1, 12, 0)
virtual GBool gouraudTriangleShadedFill(GfxState *state, GfxGouraudTriangleShading *shading); GBool functionShadedFill(GfxState *state, GfxFunctionShading *shading) override;
virtual GBool patchMeshShadedFill(GfxState *state, GfxPatchMeshShading *shading); #endif
GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax) override;
GBool axialShadedSupportExtend(GfxState *state, GfxAxialShading *shading) override;
GBool radialShadedFill(GfxState *state, GfxRadialShading *shading, double sMin, double sMax) override;
GBool radialShadedSupportExtend(GfxState *state, GfxRadialShading *shading) override;
#if CAIRO_VERSION >= CAIRO_VERSION_ENCODE(1, 12, 0)
GBool gouraudTriangleShadedFill(GfxState *state, GfxGouraudTriangleShading *shading) override;
GBool patchMeshShadedFill(GfxState *state, GfxPatchMeshShading *shading) override;
#endif #endif
//----- path clipping //----- path clipping
virtual void clip(GfxState *state); void clip(GfxState *state) override;
virtual void eoClip(GfxState *state); void eoClip(GfxState *state) override;
//----- text drawing //----- text drawing
void beginString(GfxState *state, GooString *s); void beginString(GfxState *state, GooString *s) override;
void endString(GfxState *state); void endString(GfxState *state) override;
void drawChar(GfxState *state, double x, double y, void drawChar(GfxState *state, double x, double y,
double dx, double dy, double dx, double dy,
double originX, double originY, double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen); CharCode code, int nBytes, Unicode *u, int uLen) override;
void beginActualText(GfxState *state, GooString *text); void beginActualText(GfxState *state, GooString *text) override;
void endActualText(GfxState *state); void endActualText(GfxState *state) override;
virtual GBool beginType3Char(GfxState *state, double x, double y, GBool beginType3Char(GfxState *state, double x, double y,
double dx, double dy, double dx, double dy,
CharCode code, Unicode *u, int uLen); CharCode code, Unicode *u, int uLen) override;
virtual void endType3Char(GfxState *state); void endType3Char(GfxState *state) override;
virtual void beginTextObject(GfxState *state); void beginTextObject(GfxState *state) override;
virtual void endTextObject(GfxState *state); void endTextObject(GfxState *state) override;
//----- image drawing //----- image drawing
virtual void drawImageMask(GfxState *state, Object *ref, Stream *str, void drawImageMask(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert, GBool interpolate, int width, int height, GBool invert, GBool interpolate,
GBool inlineImg); GBool inlineImg) override;
virtual void setSoftMaskFromImageMask(GfxState *state, void setSoftMaskFromImageMask(GfxState *state,
Object *ref, Stream *str, Object *ref, Stream *str,
int width, int height, GBool invert, int width, int height, GBool invert,
GBool inlineImg, double *baseMatrix); GBool inlineImg, double *baseMatrix) override;
virtual void unsetSoftMaskFromImageMask(GfxState *state, double *baseMatrix); void unsetSoftMaskFromImageMask(GfxState *state, double *baseMatrix) override;
void drawImageMaskPrescaled(GfxState *state, Object *ref, Stream *str, void drawImageMaskPrescaled(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert, GBool interpolate, int width, int height, GBool invert, GBool interpolate,
GBool inlineImg); GBool inlineImg);
@ -210,53 +223,54 @@ public:
int width, int height, GBool invert, GBool interpolate, int width, int height, GBool invert, GBool interpolate,
GBool inlineImg); GBool inlineImg);
virtual void drawImage(GfxState *state, Object *ref, Stream *str, void drawImage(GfxState *state, Object *ref, Stream *str,
int width, int height, GfxImageColorMap *colorMap, int width, int height, GfxImageColorMap *colorMap,
GBool interpolate, int *maskColors, GBool inlineImg); GBool interpolate, int *maskColors, GBool inlineImg) override;
virtual void drawSoftMaskedImage(GfxState *state, Object *ref, Stream *str, void drawSoftMaskedImage(GfxState *state, Object *ref, Stream *str,
int width, int height, int width, int height,
GfxImageColorMap *colorMap, GfxImageColorMap *colorMap,
GBool interpolate, GBool interpolate,
Stream *maskStr, Stream *maskStr,
int maskWidth, int maskHeight, int maskWidth, int maskHeight,
GfxImageColorMap *maskColorMap, GfxImageColorMap *maskColorMap,
GBool maskInterpolate); GBool maskInterpolate) override;
virtual void drawMaskedImage(GfxState *state, Object *ref, Stream *str, void drawMaskedImage(GfxState *state, Object *ref, Stream *str,
int width, int height, int width, int height,
GfxImageColorMap *colorMap, GfxImageColorMap *colorMap,
GBool interpolate, GBool interpolate,
Stream *maskStr, Stream *maskStr,
int maskWidth, int maskHeight, int maskWidth, int maskHeight,
GBool maskInvert, GBool maskInterpolate); GBool maskInvert, GBool maskInterpolate) override;
//----- transparency groups and soft masks //----- transparency groups and soft masks
virtual void beginTransparencyGroup(GfxState * /*state*/, double * /*bbox*/, void beginTransparencyGroup(GfxState * /*state*/, double * /*bbox*/,
GfxColorSpace * /*blendingColorSpace*/, GfxColorSpace * /*blendingColorSpace*/,
GBool /*isolated*/, GBool /*knockout*/, GBool /*isolated*/, GBool /*knockout*/,
GBool /*forSoftMask*/); GBool /*forSoftMask*/) override;
virtual void endTransparencyGroup(GfxState * /*state*/); void endTransparencyGroup(GfxState * /*state*/) override;
void popTransparencyGroup(); void popTransparencyGroup();
virtual void paintTransparencyGroup(GfxState * /*state*/, double * /*bbox*/); void paintTransparencyGroup(GfxState * /*state*/, double * /*bbox*/) override;
virtual void setSoftMask(GfxState * /*state*/, double * /*bbox*/, GBool /*alpha*/, void setSoftMask(GfxState * /*state*/, double * /*bbox*/, GBool /*alpha*/,
Function * /*transferFunc*/, GfxColor * /*backdropColor*/); Function * /*transferFunc*/, GfxColor * /*backdropColor*/) override;
virtual void clearSoftMask(GfxState * /*state*/); void clearSoftMask(GfxState * /*state*/) override;
//----- Type 3 font operators //----- Type 3 font operators
virtual void type3D0(GfxState *state, double wx, double wy); void type3D0(GfxState *state, double wx, double wy) override;
virtual void type3D1(GfxState *state, double wx, double wy, void type3D1(GfxState *state, double wx, double wy,
double llx, double lly, double urx, double ury); double llx, double lly, double urx, double ury) override;
//----- special access //----- special access
// Called to indicate that a new PDF document has been loaded. // Called to indicate that a new PDF document has been loaded.
void startDoc(PDFDoc *docA, CairoFontEngine *fontEngine = NULL); void startDoc(PDFDoc *docA, CairoFontEngine *fontEngine = NULL);
GBool isReverseVideo() { return gFalse; } GBool isReverseVideo() { return gFalse; }
void setCairo (cairo_t *cr); void setCairo (cairo_t *cr);
void setTextPage (TextPage *text); void setTextPage (TextPage *text);
void setPrinting (GBool printing) { this->printing = printing; needFontUpdate = gTrue; } void setPrinting (GBool printing) { this->printing = printing; needFontUpdate = gTrue; }
void setAntialias(cairo_antialias_t antialias);
void setInType3Char(GBool inType3Char) { this->inType3Char = inType3Char; } void setInType3Char(GBool inType3Char) { this->inType3Char = inType3Char; }
void getType3GlyphWidth (double *wx, double *wy) { *wx = t3_glyph_wx; *wy = t3_glyph_wy; } void getType3GlyphWidth (double *wx, double *wy) { *wx = t3_glyph_wx; *wy = t3_glyph_wy; }
@ -272,11 +286,14 @@ protected:
cairo_filter_t getFilterForSurface(cairo_surface_t *image, cairo_filter_t getFilterForSurface(cairo_surface_t *image,
GBool interpolate); GBool interpolate);
GBool getStreamData (Stream *str, char **buffer, int *length); GBool getStreamData (Stream *str, char **buffer, int *length);
// pdf2htmlEX: make setMimeData virtual, we need to override it void setMimeData(GfxState *state, Stream *str, Object *ref,
virtual GfxImageColorMap *colorMap, cairo_surface_t *image);
void setMimeData(Stream *str, Object *ref, cairo_surface_t *image);
void fillToStrokePathClip(GfxState *state); void fillToStrokePathClip(GfxState *state);
void alignStrokeCoords(GfxSubpath *subpath, int i, double *x, double *y); void alignStrokeCoords(GfxSubpath *subpath, int i, double *x, double *y);
#if CAIRO_VERSION >= CAIRO_VERSION_ENCODE(1, 14, 0)
GBool setMimeDataForJBIG2Globals (Stream *str, cairo_surface_t *image);
#endif
static void setContextAntialias(cairo_t *cr, cairo_antialias_t antialias);
GfxRGB fill_color, stroke_color; GfxRGB fill_color, stroke_color;
cairo_pattern_t *fill_pattern, *stroke_pattern; cairo_pattern_t *fill_pattern, *stroke_pattern;
@ -298,6 +315,7 @@ protected:
cairo_line_cap_t cap; cairo_line_cap_t cap;
cairo_line_join_t join; cairo_line_join_t join;
double miter; double miter;
int ref_count;
} *strokePathClip; } *strokePathClip;
PDFDoc *doc; // the current document PDFDoc *doc; // the current document
@ -313,6 +331,7 @@ protected:
GBool needFontUpdate; // set when the font needs to be updated GBool needFontUpdate; // set when the font needs to be updated
GBool printing; GBool printing;
GBool use_show_text_glyphs; GBool use_show_text_glyphs;
GBool text_matrix_valid;
cairo_surface_t *surface; cairo_surface_t *surface;
cairo_glyph_t *glyphs; cairo_glyph_t *glyphs;
int glyphCount; int glyphCount;
@ -327,7 +346,7 @@ protected:
double t3_glyph_wx, t3_glyph_wy; double t3_glyph_wx, t3_glyph_wy;
GBool t3_glyph_has_bbox; GBool t3_glyph_has_bbox;
double t3_glyph_bbox[4]; double t3_glyph_bbox[4];
cairo_antialias_t antialias;
GBool prescaleImages; GBool prescaleImages;
TextPage *text; // text for the current page TextPage *text; // text for the current page
@ -373,118 +392,118 @@ public:
// Does this device use upside-down coordinates? // Does this device use upside-down coordinates?
// (Upside-down means (0,0) is the top left corner of the page.) // (Upside-down means (0,0) is the top left corner of the page.)
virtual GBool upsideDown() { return gTrue; } GBool upsideDown() override { return gTrue; }
// Does this device use drawChar() or drawString()? // Does this device use drawChar() or drawString()?
virtual GBool useDrawChar() { return gFalse; } GBool useDrawChar() override { return gFalse; }
// Does this device use tilingPatternFill()? If this returns false, // Does this device use tilingPatternFill()? If this returns false,
// tiling pattern fills will be reduced to a series of other drawing // tiling pattern fills will be reduced to a series of other drawing
// operations. // operations.
virtual GBool useTilingPatternFill() { return gTrue; } GBool useTilingPatternFill() override { return gTrue; }
// Does this device use functionShadedFill(), axialShadedFill(), and // Does this device use functionShadedFill(), axialShadedFill(), and
// radialShadedFill()? If this returns false, these shaded fills // radialShadedFill()? If this returns false, these shaded fills
// will be reduced to a series of other drawing operations. // will be reduced to a series of other drawing operations.
#if CAIRO_VERSION >= CAIRO_VERSION_ENCODE(1, 11, 2) #if CAIRO_VERSION >= CAIRO_VERSION_ENCODE(1, 11, 2)
virtual GBool useShadedFills(int type) { return type <= 7; } GBool useShadedFills(int type) override { return type <= 7; }
#else #else
virtual GBool useShadedFills(int type) { return type < 4; } GBool useShadedFills(int type) override { return type < 4; }
#endif #endif
// Does this device use FillColorStop()? // Does this device use FillColorStop()?
virtual GBool useFillColorStop() { return gFalse; } GBool useFillColorStop() override { return gFalse; }
// Does this device use beginType3Char/endType3Char? Otherwise, // Does this device use beginType3Char/endType3Char? Otherwise,
// text in Type 3 fonts will be drawn with drawChar/drawString. // text in Type 3 fonts will be drawn with drawChar/drawString.
virtual GBool interpretType3Chars() { return gFalse; } GBool interpretType3Chars() override { return gFalse; }
// Does this device need non-text content? // Does this device need non-text content?
virtual GBool needNonText() { return gTrue; } GBool needNonText() override { return gTrue; }
//----- save/restore graphics state //----- save/restore graphics state
virtual void saveState(GfxState *state) { } void saveState(GfxState *state) override { }
virtual void restoreState(GfxState *state) { } void restoreState(GfxState *state) override { }
//----- update graphics state //----- update graphics state
virtual void updateAll(GfxState *state) { } void updateAll(GfxState *state) override { }
virtual void setDefaultCTM(double *ctm) { } void setDefaultCTM(double *ctm) override { }
virtual void updateCTM(GfxState *state, double m11, double m12, void updateCTM(GfxState *state, double m11, double m12,
double m21, double m22, double m31, double m32) { } double m21, double m22, double m31, double m32) override { }
virtual void updateLineDash(GfxState *state) { } void updateLineDash(GfxState *state) override { }
virtual void updateFlatness(GfxState *state) { } void updateFlatness(GfxState *state) override { }
virtual void updateLineJoin(GfxState *state) { } void updateLineJoin(GfxState *state) override { }
virtual void updateLineCap(GfxState *state) { } void updateLineCap(GfxState *state) override { }
virtual void updateMiterLimit(GfxState *state) { } void updateMiterLimit(GfxState *state) override { }
virtual void updateLineWidth(GfxState *state) { } void updateLineWidth(GfxState *state) override { }
virtual void updateFillColor(GfxState *state) { } void updateFillColor(GfxState *state) override { }
virtual void updateStrokeColor(GfxState *state) { } void updateStrokeColor(GfxState *state) override { }
virtual void updateFillOpacity(GfxState *state) { } void updateFillOpacity(GfxState *state) override { }
virtual void updateStrokeOpacity(GfxState *state) { } void updateStrokeOpacity(GfxState *state) override { }
virtual void updateBlendMode(GfxState *state) { } void updateBlendMode(GfxState *state) override { }
//----- update text state //----- update text state
virtual void updateFont(GfxState *state) { } void updateFont(GfxState *state) override { }
//----- path painting //----- path painting
virtual void stroke(GfxState *state) { } void stroke(GfxState *state) override { }
virtual void fill(GfxState *state) { } void fill(GfxState *state) override { }
virtual void eoFill(GfxState *state) { } void eoFill(GfxState *state) override { }
virtual void clipToStrokePath(GfxState *state) { } void clipToStrokePath(GfxState *state) override { }
virtual GBool tilingPatternFill(GfxState *state, Gfx *gfx, Catalog *cat, Object *str, GBool tilingPatternFill(GfxState *state, Gfx *gfx, Catalog *cat, Object *str,
double *pmat, int paintType, int tilingType, Dict *resDict, double *pmat, int paintType, int tilingType, Dict *resDict,
double *mat, double *bbox, double *mat, double *bbox,
int x0, int y0, int x1, int y1, int x0, int y0, int x1, int y1,
double xStep, double yStep) { return gTrue; } double xStep, double yStep) override { return gTrue; }
virtual GBool axialShadedFill(GfxState *state, GBool axialShadedFill(GfxState *state,
GfxAxialShading *shading, GfxAxialShading *shading,
double tMin, double tMax) { return gTrue; } double tMin, double tMax) override { return gTrue; }
virtual GBool radialShadedFill(GfxState *state, GBool radialShadedFill(GfxState *state,
GfxRadialShading *shading, GfxRadialShading *shading,
double sMin, double sMax) { return gTrue; } double sMin, double sMax) override { return gTrue; }
//----- path clipping //----- path clipping
virtual void clip(GfxState *state) { } void clip(GfxState *state) override { }
virtual void eoClip(GfxState *state) { } void eoClip(GfxState *state) override { }
//----- image drawing //----- image drawing
virtual void drawImageMask(GfxState *state, Object *ref, Stream *str, void drawImageMask(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert, int width, int height, GBool invert,
GBool interpolate, GBool inlineImg); GBool interpolate, GBool inlineImg) override;
virtual void drawImage(GfxState *state, Object *ref, Stream *str, void drawImage(GfxState *state, Object *ref, Stream *str,
int width, int height, GfxImageColorMap *colorMap, int width, int height, GfxImageColorMap *colorMap,
GBool interpolate, int *maskColors, GBool inlineImg); GBool interpolate, int *maskColors, GBool inlineImg) override;
virtual void drawSoftMaskedImage(GfxState *state, Object *ref, Stream *str, void drawSoftMaskedImage(GfxState *state, Object *ref, Stream *str,
int width, int height, int width, int height,
GfxImageColorMap *colorMap, GfxImageColorMap *colorMap,
GBool interpolate, GBool interpolate,
Stream *maskStr, Stream *maskStr,
int maskWidth, int maskHeight, int maskWidth, int maskHeight,
GfxImageColorMap *maskColorMap, GfxImageColorMap *maskColorMap,
GBool maskInterpolate); GBool maskInterpolate) override;
virtual void drawMaskedImage(GfxState *state, Object *ref, Stream *str, void drawMaskedImage(GfxState *state, Object *ref, Stream *str,
int width, int height, int width, int height,
GfxImageColorMap *colorMap, GfxImageColorMap *colorMap,
GBool interpolate, GBool interpolate,
Stream *maskStr, Stream *maskStr,
int maskWidth, int maskHeight, int maskWidth, int maskHeight,
GBool maskInvert, GBool maskInterpolate); GBool maskInvert, GBool maskInterpolate) override;
virtual void setSoftMaskFromImageMask(GfxState *state, Object *ref, Stream *str, void setSoftMaskFromImageMask(GfxState *state, Object *ref, Stream *str,
int width, int height, GBool invert, int width, int height, GBool invert,
GBool inlineImg, double *baseMatrix); GBool inlineImg, double *baseMatrix) override;
virtual void unsetSoftMaskFromImageMask(GfxState *state, double *baseMatrix) {} void unsetSoftMaskFromImageMask(GfxState *state, double *baseMatrix) override {}
//----- transparency groups and soft masks //----- transparency groups and soft masks
virtual void beginTransparencyGroup(GfxState * /*state*/, double * /*bbox*/, void beginTransparencyGroup(GfxState * /*state*/, double * /*bbox*/,
GfxColorSpace * /*blendingColorSpace*/, GfxColorSpace * /*blendingColorSpace*/,
GBool /*isolated*/, GBool /*knockout*/, GBool /*isolated*/, GBool /*knockout*/,
GBool /*forSoftMask*/) {} GBool /*forSoftMask*/) override {}
virtual void endTransparencyGroup(GfxState * /*state*/) {} void endTransparencyGroup(GfxState * /*state*/) override {}
virtual void paintTransparencyGroup(GfxState * /*state*/, double * /*bbox*/) {} void paintTransparencyGroup(GfxState * /*state*/, double * /*bbox*/) override {}
virtual void setSoftMask(GfxState * /*state*/, double * /*bbox*/, GBool /*alpha*/, void setSoftMask(GfxState * /*state*/, double * /*bbox*/, GBool /*alpha*/,
Function * /*transferFunc*/, GfxColor * /*backdropColor*/) {} Function * /*transferFunc*/, GfxColor * /*backdropColor*/) override {}
virtual void clearSoftMask(GfxState * /*state*/) {} void clearSoftMask(GfxState * /*state*/) override {}
//----- Image list //----- Image list
// By default images are not rendred // By default images are not rendred
@ -498,7 +517,7 @@ private:
void saveImage(CairoImage *image); void saveImage(CairoImage *image);
void getBBox(GfxState *state, int width, int height, void getBBox(GfxState *state, int width, int height,
double *x1, double *y1, double *x2, double *y2); double *x1, double *y1, double *x2, double *y2);
CairoImage **images; CairoImage **images;
int numImages; int numImages;
int size; int size;

View File

@ -51,6 +51,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <math.h> #include <math.h>
#include "goo/gmem.h" #include "goo/gmem.h"
//#include "goo/gtypes_p.h"
#include "CairoRescaleBox.h" #include "CairoRescaleBox.h"
@ -374,4 +375,4 @@ cleanup:
free (scanline); free (scanline);
return retval; return retval;
} }

View File

@ -58,4 +58,4 @@ public:
}; };
#endif /* CAIRO_RESCALE_BOX_H */ #endif /* CAIRO_RESCALE_BOX_H */

View File

@ -9,7 +9,7 @@ option(ENABLE_SVG "Enable SVG support, for generating SVG background images and
include_directories(${CMAKE_SOURCE_DIR}/src) include_directories(${CMAKE_SOURCE_DIR}/src)
set(PDF2HTMLEX_VERSION "0.14.6") set(PDF2HTMLEX_VERSION "0.15.0")
set(ARCHIVE_NAME pdf2htmlex-${PDF2HTMLEX_VERSION}) set(ARCHIVE_NAME pdf2htmlex-${PDF2HTMLEX_VERSION})
add_custom_target(dist add_custom_target(dist
COMMAND git archive --prefix=${ARCHIVE_NAME}/ HEAD COMMAND git archive --prefix=${ARCHIVE_NAME}/ HEAD
@ -81,7 +81,7 @@ endif()
if(CYGWIN) if(CYGWIN)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++0x") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++0x")
else() else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x -pthread")
endif() endif()
# check the C++11 features we need # check the C++11 features we need

View File

@ -2,6 +2,28 @@ pdf2htmlEX is no longer under active development. New maintainers are [wanted](h
#![](http://coolwanglu.github.io/pdf2htmlEX/images/pdf2htmlEX-64x64.png) pdf2htmlEX #![](http://coolwanglu.github.io/pdf2htmlEX/images/pdf2htmlEX-64x64.png) pdf2htmlEX
# My branch differences:
This is my branch of pdf2htmlEX which I maintain for my own purposes. I have made a number of changes and improvements over the original code:
* Lots of bugs fixes, mostly of edge cases
* Integration of latest Cairo code
* Out of source building
* Rewritten handling of obscured/partially obscured text - now much more accurate
* Some support for transparent text
* Improvement of DPI settings - clamping of DPI to ensure output graphic isn't too big
`--correct-text-visibility` tracks the visibility of 4 sample points for each character (currently the 4 corners of the character's bounding box, inset slightly) to determine visibility.
It now has two modes. 1 = Fully occluded text handled (i.e. doesn't get put into the HTML layer). 2 = Partially occluded text handled.
The default is now "1", so fully occluded text should no longer show through. If "2" is selected then if the character is partially occluded it will be drawn in the background layer. In this case, the rendered DPI of the page will be automatically increased to `--covered-text-dpi` (default: 300) to reduce the impact of rasterized text.
For maximum accuracy I strongly recommend using the output options: `--font-size-multiplier 1 --zoom 25`. This will circumvent rounding errors inside web browsers. You will then have to scale down the resulting HTML page using an appropriate "scale" transform.
If you are concerned about file size of the resulting HTML, then I recommend patching fontforge to prevent it writing the current time into the dumped fonts, and then post-process the pdf2htmlEX data to remove duplicate files - there will usually be many duplicate background images and fonts.
# Original README.md follows...
<!-- <!--
[![Build Status](https://travis-ci.org/coolwanglu/pdf2htmlEX.png?branch=master)](https://travis-ci.org/coolwanglu/pdf2htmlEX) [![Build Status](https://travis-ci.org/coolwanglu/pdf2htmlEX.png?branch=master)](https://travis-ci.org/coolwanglu/pdf2htmlEX)
--> -->

4
dobuild Executable file
View File

@ -0,0 +1,4 @@
mkdir build
cd build
cmake ..
make install

1
doclean Executable file
View File

@ -0,0 +1 @@
rm -rf build pdf2htmlEX.1 share/*.css share/*.js share/*.min.* src/pdf2htmlEX-config.h src/util/css_const.h

View File

@ -247,9 +247,10 @@ If set to 0, pdf2htmlEX would try its best to balance the two methods above.
If set to 1, pdf2htmlEX will try to reduce the number of HTML elements used for text. Turn it off if anything goes wrong. If set to 1, pdf2htmlEX will try to reduce the number of HTML elements used for text. Turn it off if anything goes wrong.
.TP .TP
.B --correct-text-visibility <0|1> (Default: 0) .B --correct-text-visibility <0|1|2> (Default: 1)
If set to 1, pdf2htmlEX will try to detect texts covered by other graphics and properly arrange them, 0 : Do not do visibility calculations for text
i.e. covered texts are made transparent in text layer, and are drawn on background layer. 1 : Text fully occluded will be drawn in the background layer
2 : Text partially occluded will be drawn in the background layer (more false positives than option "1")
.SS Background Image .SS Background Image

View File

@ -901,7 +901,7 @@ Viewer.prototype = {
var self = this; var self = this;
/** /**
* page should have type Page * page should have type Page
* @param{Page} page * @param{Page} page
*/ */
var transform_and_scroll = function(page) { var transform_and_scroll = function(page) {

View File

@ -134,9 +134,9 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
if(param.embed_image) if(param.embed_image)
html_renderer->tmp_files.add(fn); html_renderer->tmp_files.add(fn);
surface = cairo_svg_surface_create(fn.c_str(), page_width * param.h_dpi / DEFAULT_DPI, page_height * param.v_dpi / DEFAULT_DPI); surface = cairo_svg_surface_create(fn.c_str(), page_width * param.actual_dpi / DEFAULT_DPI, page_height * param.actual_dpi / DEFAULT_DPI);
cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2); cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2);
cairo_surface_set_fallback_resolution(surface, param.h_dpi, param.v_dpi); cairo_surface_set_fallback_resolution(surface, param.actual_dpi, param.actual_dpi);
cairo_t * cr = cairo_create(surface); cairo_t * cr = cairo_create(surface);
setCairo(cr); setCairo(cr);
@ -144,15 +144,15 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
bitmaps_in_current_page.clear(); bitmaps_in_current_page.clear();
bool process_annotation = param.process_annotation; bool process_annotation = param.process_annotation;
doc->displayPage(this, pageno, param.h_dpi, param.v_dpi, doc->displayPage(this, pageno, param.actual_dpi, param.actual_dpi,
0, 0,
(!(param.use_cropbox)), (!(param.use_cropbox)),
false, false,
false, false,
nullptr, nullptr, &annot_cb, &process_annotation); nullptr, nullptr, &annot_cb, &process_annotation);
setCairo(nullptr); setCairo(nullptr);
{ {
auto status = cairo_status(cr); auto status = cairo_status(cr);
cairo_destroy(cr); cairo_destroy(cr);
@ -198,7 +198,7 @@ bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
void CairoBackgroundRenderer::embed_image(int pageno) void CairoBackgroundRenderer::embed_image(int pageno)
{ {
auto & f_page = *(html_renderer->f_curpage); auto & f_page = *(html_renderer->f_curpage);
// SVGs introduced by <img> or background-image can't have external resources; // SVGs introduced by <img> or background-image can't have external resources;
// SVGs introduced by <embed> and <object> can, but they are more expensive for browsers. // SVGs introduced by <embed> and <object> can, but they are more expensive for browsers.
// So we use <img> if the SVG contains no external bitmaps, and use <embed> otherwise. // So we use <img> if the SVG contains no external bitmaps, and use <embed> otherwise.
@ -235,11 +235,11 @@ string CairoBackgroundRenderer::build_bitmap_path(int id)
return string(html_renderer->str_fmt("%s/o%d.jpg", param.dest_dir.c_str(), id)); return string(html_renderer->str_fmt("%s/o%d.jpg", param.dest_dir.c_str(), id));
} }
// Override CairoOutputDev::setMimeData() and dump bitmaps in SVG to external files. // Override CairoOutputDev::setMimeData() and dump bitmaps in SVG to external files.
void CairoBackgroundRenderer::setMimeData(Stream *str, Object *ref, cairo_surface_t *image) void CairoBackgroundRenderer::setMimeData(GfxState *state, Stream *str, Object *ref, GfxImageColorMap *colorMap, cairo_surface_t *image)
{ {
if (param.svg_embed_bitmap) if (param.svg_embed_bitmap)
{ {
CairoOutputDev::setMimeData(str, ref, image); CairoOutputDev::setMimeData(state, str, ref, colorMap, image);
return; return;
} }
@ -263,21 +263,20 @@ void CairoBackgroundRenderer::setMimeData(Stream *str, Object *ref, cairo_surfac
// //
// In PDF, jpeg stream objects can also specify other color spaces like DeviceN and Separation, // In PDF, jpeg stream objects can also specify other color spaces like DeviceN and Separation,
// It is also not safe to dump them directly. // It is also not safe to dump them directly.
Object obj; Object obj = str->getDict()->lookup("ColorSpace");
str->getDict()->lookup("ColorSpace", &obj);
if (!obj.isName() || (strcmp(obj.getName(), "DeviceRGB") && strcmp(obj.getName(), "DeviceGray")) ) if (!obj.isName() || (strcmp(obj.getName(), "DeviceRGB") && strcmp(obj.getName(), "DeviceGray")) )
{ {
obj.free(); //obj.free();
return; return;
} }
obj.free(); //obj.free();
str->getDict()->lookup("Decode", &obj); obj = str->getDict()->lookup("Decode");
if (obj.isArray()) if (obj.isArray())
{ {
obj.free(); //obj.free();
return; return;
} }
obj.free(); //obj.free();
int imgId = ref->getRef().num; int imgId = ref->getRef().num;
auto uri = strdup((char*) html_renderer->str_fmt("o%d.jpg", imgId)); auto uri = strdup((char*) html_renderer->str_fmt("o%d.jpg", imgId));

View File

@ -51,7 +51,7 @@ public:
void updateRender(GfxState *state); void updateRender(GfxState *state);
protected: protected:
virtual void setMimeData(Stream *str, Object *ref, cairo_surface_t *image); virtual void setMimeData(GfxState *state, Stream *str, Object *ref, GfxImageColorMap *colorMap, cairo_surface_t *image);
protected: protected:
HTMLRenderer * html_renderer; HTMLRenderer * html_renderer;

View File

@ -29,7 +29,7 @@ using std::unique_ptr;
const SplashColor SplashBackgroundRenderer::white = {255,255,255}; const SplashColor SplashBackgroundRenderer::white = {255,255,255};
SplashBackgroundRenderer::SplashBackgroundRenderer(const string & imgFormat, HTMLRenderer * html_renderer, const Param & param) SplashBackgroundRenderer::SplashBackgroundRenderer(const string & imgFormat, HTMLRenderer * html_renderer, const Param & param)
: SplashOutputDev(splashModeRGB8, 4, gFalse, (SplashColorPtr)(&white)) : SplashOutputDev(splashModeRGB8, 4, gFalse, (SplashColorPtr)(&white), gTrue, splashThinLineSolid) // DCRH: Make thin line mode = solid
, html_renderer(html_renderer) , html_renderer(html_renderer)
, param(param) , param(param)
, format(imgFormat) , format(imgFormat)
@ -67,30 +67,10 @@ void SplashBackgroundRenderer::drawChar(GfxState *state, double x, double y,
double originX, double originY, double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen) CharCode code, int nBytes, Unicode *u, int uLen)
{ {
// draw characters as image when if (param.proof || html_renderer->is_char_covered(drawn_char_count)) {
// - in fallback mode
// - OR there is special filling method
// - OR using a writing mode font
// - OR using a Type 3 font while param.process_type3 is not enabled
// - OR the text is used as path
if((param.fallback || param.proof)
|| ( (state->getFont())
&& ( (state->getFont()->getWMode())
|| ((state->getFont()->getType() == fontType3) && (!param.process_type3))
|| (state->getRender() >= 4)
)
)
)
{
SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code,nBytes,u,uLen); SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code,nBytes,u,uLen);
} }
// If a char is treated as image, it is not subject to cover test drawn_char_count++;
// (see HTMLRenderer::drawString), so don't increase drawn_char_count.
else if (param.correct_text_visibility) {
if (html_renderer->is_char_covered(drawn_char_count))
SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code,nBytes,u,uLen);
drawn_char_count++;
}
} }
void SplashBackgroundRenderer::beginTextObject(GfxState *state) void SplashBackgroundRenderer::beginTextObject(GfxState *state)
@ -134,7 +114,8 @@ bool SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
{ {
drawn_char_count = 0; drawn_char_count = 0;
bool process_annotation = param.process_annotation; bool process_annotation = param.process_annotation;
doc->displayPage(this, pageno, param.h_dpi, param.v_dpi,
doc->displayPage(this, pageno, param.actual_dpi, param.actual_dpi,
0, 0,
(!(param.use_cropbox)), (!(param.use_cropbox)),
false, false, false, false,
@ -159,8 +140,8 @@ void SplashBackgroundRenderer::embed_image(int pageno)
dump_image((char*)fn, xmin, ymin, xmax, ymax); dump_image((char*)fn, xmin, ymin, xmax, ymax);
} }
double h_scale = html_renderer->text_zoom_factor() * DEFAULT_DPI / param.h_dpi; double h_scale = html_renderer->text_zoom_factor() * DEFAULT_DPI / param.actual_dpi;
double v_scale = html_renderer->text_zoom_factor() * DEFAULT_DPI / param.v_dpi; double v_scale = html_renderer->text_zoom_factor() * DEFAULT_DPI / param.actual_dpi;
auto & f_page = *(html_renderer->f_curpage); auto & f_page = *(html_renderer->f_curpage);
auto & all_manager = html_renderer->all_manager; auto & all_manager = html_renderer->all_manager;
@ -227,7 +208,7 @@ void SplashBackgroundRenderer::dump_image(const char * filename, int x1, int y1,
throw string("Image format not supported: ") + format; throw string("Image format not supported: ") + format;
} }
if(!writer->init(f, width, height, param.h_dpi, param.v_dpi)) if(!writer->init(f, width, height, param.actual_dpi, param.actual_dpi))
throw "Cannot initialize image writer"; throw "Cannot initialize image writer";
auto * bitmap = getBitmap(); auto * bitmap = getBitmap();

View File

@ -7,43 +7,110 @@
#include "CoveredTextDetector.h" #include "CoveredTextDetector.h"
#include <algorithm>
#include "util/math.h" #include "util/math.h"
//#define DEBUG
namespace pdf2htmlEX { namespace pdf2htmlEX {
CoveredTextDetector::CoveredTextDetector(Param & param): param(param)
{
}
void CoveredTextDetector::reset() void CoveredTextDetector::reset()
{ {
char_bboxes.clear(); char_bboxes.clear();
chars_covered.clear(); chars_covered.clear();
char_pts_visible.clear();
} }
void CoveredTextDetector::add_char_bbox(double * bbox) void CoveredTextDetector::add_char_bbox(cairo_t *cairo, double * bbox)
{ {
char_bboxes.insert(char_bboxes.end(), bbox, bbox + 4); char_bboxes.insert(char_bboxes.end(), bbox, bbox + 4);
chars_covered.push_back(false); chars_covered.push_back(false);
char_pts_visible.push_back(1|2|4|8);
} }
void CoveredTextDetector::add_char_bbox_clipped(double * bbox, bool patially) void CoveredTextDetector::add_char_bbox_clipped(cairo_t *cairo, double * bbox, int pts_visible)
{ {
#ifdef DEBUG
printf("add_char_bbox_clipped: pts_visible:%x: [%f,%f,%f,%f]\n", pts_visible, bbox[0], bbox[1], bbox[2], bbox[3]);
#endif
char_bboxes.insert(char_bboxes.end(), bbox, bbox + 4); char_bboxes.insert(char_bboxes.end(), bbox, bbox + 4);
chars_covered.push_back(true); char_pts_visible.push_back(pts_visible);
if (patially)
add_non_char_bbox(bbox, chars_covered.size() - 1); // DCRH: Hide if no points are visible, or if some points are visible and correct_text_visibility == 2
if (pts_visible == 0 || param.correct_text_visibility == 2) {
chars_covered.push_back(true);
if (pts_visible > 0 && param.correct_text_visibility == 2) {
param.actual_dpi = std::min(param.text_dpi, param.max_dpi); // Char partially covered so increase background resolution
}
} else {
chars_covered.push_back(false);
}
} }
void CoveredTextDetector::add_non_char_bbox(double * bbox, int index) // We now track the visibility of each corner of the char bbox. Potentially we could track
// more sample points but this should be sufficient for most cases.
// We check to see if each point is covered by any stroke or fill operation
// and mark it as invisible if so
void CoveredTextDetector::add_non_char_bbox(cairo_t *cairo, double * bbox, int what)
{ {
if (index < 0) int index = chars_covered.size();
index = chars_covered.size(); for (int i = 0; i < index; i++) {
for (int i = 0; i < index; i++)
{
if (chars_covered[i]) if (chars_covered[i])
continue; continue;
double * cbbox = &char_bboxes[i * 4]; double * cbbox = &char_bboxes[i * 4];
if (bbox_intersect(cbbox, bbox)) #ifdef DEBUG
{ printf("add_non_char_bbox: what=%d, cbbox:[%f,%f,%f,%f], bbox:[%f,%f,%f,%f]\n", what, cbbox[0], cbbox[1], cbbox[2], cbbox[3], bbox[0], bbox[1], bbox[2], bbox[3]);
chars_covered[i] = true; #endif
add_non_char_bbox(cbbox, i); if (bbox_intersect(cbbox, bbox)) {
int pts_visible = char_pts_visible[i];
#ifdef DEBUG
printf("pts_visible=%x\n", pts_visible);
#endif
if ((pts_visible & 1) && cairo_in_clip(cairo, cbbox[0], cbbox[1]) &&
(what == 0 ||
(what == 1 && cairo_in_fill(cairo, cbbox[0], cbbox[1])) ||
(what == 2 && cairo_in_stroke(cairo, cbbox[0], cbbox[1])))) {
pts_visible &= ~1;
}
if ((pts_visible & 2) && cairo_in_clip(cairo, cbbox[2], cbbox[1]) &&
(what == 0 ||
(what == 1 && cairo_in_fill(cairo, cbbox[2], cbbox[1])) ||
(what == 2 && cairo_in_stroke(cairo, cbbox[2], cbbox[1])))) {
pts_visible &= ~2;
}
if ((pts_visible & 4) && cairo_in_clip(cairo, cbbox[2], cbbox[3]) &&
(what == 0 ||
(what == 1 && cairo_in_fill(cairo, cbbox[2], cbbox[3])) ||
(what == 2 && cairo_in_stroke(cairo, cbbox[2], cbbox[3])))) {
pts_visible &= ~4;
}
if ((pts_visible & 8) && cairo_in_clip(cairo, cbbox[0], cbbox[3]) &&
(what == 0 ||
(what == 1 && cairo_in_fill(cairo, cbbox[0], cbbox[3])) ||
(what == 2 && cairo_in_stroke(cairo, cbbox[0], cbbox[3])))) {
pts_visible &= ~8;
}
#ifdef DEBUG
printf("pts_visible=%x\n", pts_visible);
#endif
char_pts_visible[i] = pts_visible;
if (pts_visible == 0 || (pts_visible != (1|2|4|8) && param.correct_text_visibility == 2)) {
#ifdef DEBUG
printf("Char covered\n");
#endif
chars_covered[i] = true;
if (pts_visible > 0 && param.correct_text_visibility == 2) { // Partially visible text => increase rendering DPI
param.actual_dpi = std::min(param.text_dpi, param.max_dpi);
}
}
} else {
#ifdef DEBUG
printf("Not covered\n");
#endif
} }
} }
} }

View File

@ -9,6 +9,9 @@
#define COVEREDTEXTDETECTOR_H__ #define COVEREDTEXTDETECTOR_H__
#include <vector> #include <vector>
#include "Param.h"
#include <cairo.h>
namespace pdf2htmlEX { namespace pdf2htmlEX {
@ -19,6 +22,8 @@ class CoveredTextDetector
{ {
public: public:
CoveredTextDetector(Param & param);
/** /**
* Reset to initial state. Should be called when start drawing a page. * Reset to initial state. Should be called when start drawing a page.
*/ */
@ -28,9 +33,9 @@ public:
* Add a drawn character's bounding box. * Add a drawn character's bounding box.
* @param bbox (x0, y0, x1, y1) * @param bbox (x0, y0, x1, y1)
*/ */
void add_char_bbox(double * bbox); void add_char_bbox(cairo_t *, double * bbox);
void add_char_bbox_clipped(double * bbox, bool patially); void add_char_bbox_clipped(cairo_t *,double * bbox, int pts_covered);
/** /**
* Add a drawn non-char graphics' bounding box. * Add a drawn non-char graphics' bounding box.
@ -40,7 +45,7 @@ public:
* @param index this graphics' drawing order: assume it is drawn after (index-1)th * @param index this graphics' drawing order: assume it is drawn after (index-1)th
* char. -1 means after the last char. * char. -1 means after the last char.
*/ */
void add_non_char_bbox(double * bbox, int index = -1); void add_non_char_bbox(cairo_t *cairo, double * bbox, int what);
/** /**
* An array of flags indicating whether a char is covered by any non-char graphics. * An array of flags indicating whether a char is covered by any non-char graphics.
@ -54,6 +59,8 @@ private:
std::vector<bool> chars_covered; std::vector<bool> chars_covered;
// x00, y00, x01, y01; x10, y10, x11, y11;... // x00, y00, x01, y01; x10, y10, x11, y11;...
std::vector<double> char_bboxes; std::vector<double> char_bboxes;
std::vector<int> char_pts_visible;
Param & param;
}; };
} }

View File

@ -11,18 +11,15 @@
#include "DrawingTracer.h" #include "DrawingTracer.h"
#if !ENABLE_SVG #if !ENABLE_SVG
#warning "Cairo is disabled because ENABLE_SVG is off, --correct-text-visibility has limited functionality." #error "ENABLE_SVG must be enabled"
#endif #endif
static constexpr bool DT_DEBUG = false; //#define DEBUG
namespace pdf2htmlEX namespace pdf2htmlEX
{ {
DrawingTracer::DrawingTracer(const Param & param): param(param) DrawingTracer::DrawingTracer(const Param & param): param(param), cairo(nullptr)
#if ENABLE_SVG
, cairo(nullptr)
#endif
{ {
} }
@ -33,11 +30,8 @@ DrawingTracer::~DrawingTracer()
void DrawingTracer::reset(GfxState *state) void DrawingTracer::reset(GfxState *state)
{ {
if (!param.correct_text_visibility)
return;
finish(); finish();
#if ENABLE_SVG
// pbox is defined in device space, which is affected by zooming; // pbox is defined in device space, which is affected by zooming;
// We want to trace in page space which is stable, so invert pbox by ctm. // We want to trace in page space which is stable, so invert pbox by ctm.
double pbox[] { 0, 0, state->getPageWidth(), state->getPageHeight() }; double pbox[] { 0, 0, state->getPageWidth(), state->getPageHeight() };
@ -48,20 +42,24 @@ void DrawingTracer::reset(GfxState *state)
cairo_rectangle_t page_box { pbox[0], pbox[1], pbox[2] - pbox[0], pbox[3] - pbox[1] }; cairo_rectangle_t page_box { pbox[0], pbox[1], pbox[2] - pbox[0], pbox[3] - pbox[1] };
cairo_surface_t * surface = cairo_recording_surface_create(CAIRO_CONTENT_COLOR_ALPHA, &page_box); cairo_surface_t * surface = cairo_recording_surface_create(CAIRO_CONTENT_COLOR_ALPHA, &page_box);
cairo = cairo_create(surface); cairo = cairo_create(surface);
if (DT_DEBUG)
printf("DrawingTracer::reset:page bbox:[%f,%f,%f,%f]\n",pbox[0], pbox[1], pbox[2], pbox[3]); ctm_stack.clear();
double *identity = new double[6];
tm_init(identity);
ctm_stack.push_back(identity);
#ifdef DEBUG
printf("DrawingTracer::reset:page bbox:[%f,%f,%f,%f]\n",pbox[0], pbox[1], pbox[2], pbox[3]);
#endif #endif
} }
void DrawingTracer::finish() void DrawingTracer::finish()
{ {
#if ENABLE_SVG
if (cairo) if (cairo)
{ {
cairo_destroy(cairo); cairo_destroy(cairo);
cairo = nullptr; cairo = nullptr;
} }
#endif
} }
// Poppler won't inform us its initial CTM, and the initial CTM is affected by zoom level. // Poppler won't inform us its initial CTM, and the initial CTM is affected by zoom level.
@ -72,22 +70,17 @@ void DrawingTracer::update_ctm(GfxState *state, double m11, double m12, double m
if (!param.correct_text_visibility) if (!param.correct_text_visibility)
return; return;
#if ENABLE_SVG double *tmp = new double[6];
cairo_matrix_t matrix; tmp[0] = m11;
matrix.xx = m11; tmp[1] = m12;
matrix.yx = m12; tmp[2] = m21;
matrix.xy = m21; tmp[3] = m22;
matrix.yy = m22; tmp[4] = m31;
matrix.x0 = m31; tmp[5] = m32;
matrix.y0 = m32; double *ctm = ctm_stack.back();
cairo_transform(cairo, &matrix); tm_multiply(ctm, tmp);
#ifdef DEBUG
if (DT_DEBUG) printf("DrawingTracer::before update_ctm:ctm:[%f,%f,%f,%f,%f,%f] => [%f,%f,%f,%f,%f,%f]\n", m11, m12, m21, m22, m31, m32, ctm[0], ctm[1], ctm[2], ctm[3], ctm[4], ctm[5]);
{
cairo_matrix_t mat;
cairo_get_matrix(cairo, &mat);
printf("DrawingTracer::update_ctm:ctm:[%f,%f,%f,%f,%f,%f]\n", mat.xx, mat.yx, mat.xy, mat.yy, mat.x0, mat.y0);
}
#endif #endif
} }
@ -95,16 +88,15 @@ void DrawingTracer::clip(GfxState * state, bool even_odd)
{ {
if (!param.correct_text_visibility) if (!param.correct_text_visibility)
return; return;
#if ENABLE_SVG
do_path(state, state->getPath()); do_path(state, state->getPath());
cairo_set_fill_rule(cairo, even_odd? CAIRO_FILL_RULE_EVEN_ODD : CAIRO_FILL_RULE_WINDING); cairo_set_fill_rule(cairo, even_odd? CAIRO_FILL_RULE_EVEN_ODD : CAIRO_FILL_RULE_WINDING);
cairo_clip (cairo); cairo_clip (cairo);
if (DT_DEBUG) #ifdef DEBUG
{ {
double cbox[4]; double cbox[4];
cairo_clip_extents(cairo, cbox, cbox + 1, cbox + 2, cbox + 3); cairo_clip_extents(cairo, cbox, cbox + 1, cbox + 2, cbox + 3);
printf("DrawingTracer::clip:extents:[%f,%f,%f,%f]\n", cbox[0],cbox[1],cbox[2],cbox[3]); printf("DrawingTracer::clip:extents:even_odd=%d,[%f,%f,%f,%f]\n", even_odd, cbox[0],cbox[1],cbox[2],cbox[3]);
} }
#endif #endif
} }
@ -113,6 +105,8 @@ void DrawingTracer::clip_to_stroke_path(GfxState * state)
{ {
if (!param.correct_text_visibility) if (!param.correct_text_visibility)
return; return;
printf("TODO:clip_to_stroke_path\n");
// TODO cairo_stroke_to_path() ? // TODO cairo_stroke_to_path() ?
} }
@ -120,92 +114,112 @@ void DrawingTracer::save()
{ {
if (!param.correct_text_visibility) if (!param.correct_text_visibility)
return; return;
#if ENABLE_SVG
cairo_save(cairo); cairo_save(cairo);
if (DT_DEBUG) double *e = new double[6];
printf("DrawingTracer::save\n"); memcpy(e, ctm_stack.back(), sizeof(double) * 6);
ctm_stack.push_back(e);
#ifdef DEBUG
printf("DrawingTracer::saved: [%f,%f,%f,%f,%f,%f]\n", e[0], e[1], e[2], e[3], e[4], e[5]);
#endif #endif
} }
void DrawingTracer::restore() void DrawingTracer::restore()
{ {
if (!param.correct_text_visibility) if (!param.correct_text_visibility)
return; return;
#if ENABLE_SVG
cairo_restore(cairo); cairo_restore(cairo);
if (DT_DEBUG) ctm_stack.pop_back();
printf("DrawingTracer::restore\n");
#ifdef DEBUG
double *ctm = ctm_stack.back();
printf("DrawingTracer::restored: [%f,%f,%f,%f,%f,%f]\n", ctm[0], ctm[1], ctm[2], ctm[3], ctm[4], ctm[5]);
#endif #endif
} }
void DrawingTracer::do_path(GfxState * state, GfxPath * path) void DrawingTracer::do_path(GfxState * state, GfxPath * path)
{ {
#if ENABLE_SVG
//copy from CairoOutputDev::doPath //copy from CairoOutputDev::doPath
GfxSubpath *subpath; GfxSubpath *subpath;
int i, j; int i, j;
double x, y; double x, y;
cairo_new_path(cairo); cairo_new_path(cairo);
if (DT_DEBUG) #ifdef DEBUG
printf("DrawingTracer::do_path:new_path\n"); printf("DrawingTracer::do_path:new_path (%d subpaths)\n", path->getNumSubpaths());
#endif
for (i = 0; i < path->getNumSubpaths(); ++i) { for (i = 0; i < path->getNumSubpaths(); ++i) {
subpath = path->getSubpath(i); subpath = path->getSubpath(i);
if (subpath->getNumPoints() > 0) { if (subpath->getNumPoints() > 0) {
x = subpath->getX(0); x = subpath->getX(0);
y = subpath->getY(0); y = subpath->getY(0);
xform_pt(x, y);
cairo_move_to(cairo, x, y); cairo_move_to(cairo, x, y);
if (DT_DEBUG)
printf("DrawingTracer::do_path:move_to[%f,%f]\n",x,y);
j = 1; j = 1;
while (j < subpath->getNumPoints()) { while (j < subpath->getNumPoints()) {
if (subpath->getCurve(j)) { if (subpath->getCurve(j)) {
x = subpath->getX(j+2); x = subpath->getX(j+2);
y = subpath->getY(j+2); y = subpath->getY(j+2);
double x1 = subpath->getX(j);
double y1 = subpath->getY(j);
double x2 = subpath->getX(j+1);
double y2 = subpath->getY(j+1);
xform_pt(x, y);
xform_pt(x1, y1);
xform_pt(x2, y2);
cairo_curve_to(cairo, cairo_curve_to(cairo,
subpath->getX(j), subpath->getY(j), x1, y1,
subpath->getX(j+1), subpath->getY(j+1), x2, y2,
x, y); x, y);
if (DT_DEBUG)
printf("DrawingTracer::do_path:curve_to[%f,%f]\n",x,y);
j += 3; j += 3;
} else { } else {
x = subpath->getX(j); x = subpath->getX(j);
y = subpath->getY(j); y = subpath->getY(j);
xform_pt(x, y);
cairo_line_to(cairo, x, y); cairo_line_to(cairo, x, y);
if (DT_DEBUG)
printf("DrawingTracer::do_path:line_to[%f,%f]\n",x,y);
++j; ++j;
} }
} }
if (subpath->isClosed()) { if (subpath->isClosed()) {
cairo_close_path (cairo); cairo_close_path (cairo);
if (DT_DEBUG)
printf("DrawingTracer::do_path:close\n");
} }
} }
} }
#endif
} }
void DrawingTracer::stroke(GfxState * state) void DrawingTracer::stroke(GfxState * state)
{ {
#if ENABLE_SVG
if (!param.correct_text_visibility) if (!param.correct_text_visibility)
return; return;
if (DT_DEBUG) if (state->getStrokeOpacity() < 0.5) {
printf("DrawingTracer::stroke\n"); // Ignore partially transparent fills for occlusion purposes
return;
}
cairo_set_line_width(cairo, state->getLineWidth()); // Transform the line width by the ctm. This isn't 100% - we should really do this path segment by path segment,
// this is a reasonable approximation providing the CTM has uniform scaling X/Y
double lwx, lwy;
lwx = lwy = sqrt(0.5);
tm_transform(ctm_stack.back(), lwx, lwy, true);
double lineWidthScale = sqrt(lwx * lwx + lwy * lwy);
#ifdef DEBUG
printf("DrawingTracer::stroke. line width = %f*%f, line cap = %d\n", lineWidthScale, state->getLineWidth(), state->getLineCap());
#endif
cairo_set_line_width(cairo, lineWidthScale * state->getLineWidth());
// Line cap is important - some PDF line widths are very large
switch (state->getLineCap()) {
case 0:
cairo_set_line_cap (cairo, CAIRO_LINE_CAP_BUTT);
break;
case 1:
cairo_set_line_cap (cairo, CAIRO_LINE_CAP_ROUND);
break;
case 2:
cairo_set_line_cap (cairo, CAIRO_LINE_CAP_SQUARE);
break;
}
// GfxPath is broken into steps, each step makes up a cairo path and its bbox is used for covering test.
// TODO
// 1. path steps that are not vertical or horizontal lines may still falsely "cover" many chars,
// can we slice those steps further?
// 2. if the line width is small, can we just ignore the path?
// 3. line join feature can't be retained. We use line-cap-square to minimize the problem that
// some chars actually covered by a line join are missed. However chars covered by a acute angle
// with line-join-miter may be still recognized as not covered.
cairo_set_line_cap(cairo, CAIRO_LINE_CAP_SQUARE);
GfxPath * path = state->getPath(); GfxPath * path = state->getPath();
for (int i = 0; i < path->getNumSubpaths(); ++i) { for (int i = 0; i < path->getNumSubpaths(); ++i) {
GfxSubpath * subpath = path->getSubpath(i); GfxSubpath * subpath = path->getSubpath(i);
@ -213,48 +227,54 @@ void DrawingTracer::stroke(GfxState * state)
continue; continue;
double x = subpath->getX(0); double x = subpath->getX(0);
double y = subpath->getY(0); double y = subpath->getY(0);
xform_pt(x, y);
//p: loop cursor; j: next point index //p: loop cursor; j: next point index
int p =1, j = 1; int p =1;
int n = subpath->getNumPoints(); int n = subpath->getNumPoints();
while (p <= n) { while (p < n) {
cairo_new_path(cairo); cairo_new_path(cairo);
#ifdef DEBUG
printf("move_to: [%f,%f]\n", x, y);
#endif
cairo_move_to(cairo, x, y); cairo_move_to(cairo, x, y);
if (subpath->getCurve(j)) { if (subpath->getCurve(p)) {
x = subpath->getX(j+2); x = subpath->getX(p+2);
y = subpath->getY(j+2); y = subpath->getY(p+2);
double x1 = subpath->getX(p);
double y1 = subpath->getY(p);
double x2 = subpath->getX(p+1);
double y2 = subpath->getY(p+1);
xform_pt(x, y);
xform_pt(x1, y1);
xform_pt(x2, y2);
#ifdef DEBUG
printf("curve_to: [%f,%f], [%f,%f], [%f,%f]\n", x1, y1, x2, y2, x, y);
#endif
cairo_curve_to(cairo, cairo_curve_to(cairo,
subpath->getX(j), subpath->getY(j), x1, y1,
subpath->getX(j+1), subpath->getY(j+1), x2, y2,
x, y); x, y);
p += 3; p += 3;
} else { } else {
x = subpath->getX(j); x = subpath->getX(p);
y = subpath->getY(j); y = subpath->getY(p);
xform_pt(x, y);
#ifdef DEBUG
printf("line_to: [%f,%f]\n", x, y);
#endif
cairo_line_to(cairo, x, y); cairo_line_to(cairo, x, y);
++p; ++p;
} }
if (DT_DEBUG)
printf("DrawingTracer::stroke:new box:\n");
double sbox[4]; double sbox[4];
cairo_stroke_extents(cairo, sbox, sbox + 1, sbox + 2, sbox + 3); cairo_stroke_extents(cairo, sbox, sbox + 1, sbox + 2, sbox + 3);
#ifdef DEBUG
printf("DrawingTracer::stroke:new box:[%f,%f,%f,%f]\n", sbox[0], sbox[1], sbox[2], sbox[3]);
#endif
if (sbox[0] != sbox[2] && sbox[1] != sbox[3]) if (sbox[0] != sbox[2] && sbox[1] != sbox[3])
draw_non_char_bbox(state, sbox); draw_non_char_bbox(state, sbox, 2);
else if (DT_DEBUG)
printf("DrawingTracer::stroke:zero box!\n");
if (p == n)
{
if (subpath->isClosed())
j = 0; // if sub path is closed, go back to starting point
else
break;
}
else
j = p;
} }
} }
#endif
} }
void DrawingTracer::fill(GfxState * state, bool even_odd) void DrawingTracer::fill(GfxState * state, bool even_odd)
@ -262,139 +282,166 @@ void DrawingTracer::fill(GfxState * state, bool even_odd)
if (!param.correct_text_visibility) if (!param.correct_text_visibility)
return; return;
#if ENABLE_SVG if (state->getFillOpacity() < 0.5) {
// Ignore partially transparent fills for occlusion purposes
return;
}
do_path(state, state->getPath()); do_path(state, state->getPath());
//cairo_fill_extents don't take fill rule into account. //cairo_fill_extents don't take fill rule into account.
//cairo_set_fill_rule (cairo, even_odd? CAIRO_FILL_RULE_EVEN_ODD : CAIRO_FILL_RULE_WINDING); //cairo_set_fill_rule (cairo, even_odd? CAIRO_FILL_RULE_EVEN_ODD : CAIRO_FILL_RULE_WINDING);
double fbox[4]; double fbox[4];
cairo_fill_extents(cairo, fbox, fbox + 1, fbox + 2, fbox + 3); cairo_fill_extents(cairo, fbox, fbox + 1, fbox + 2, fbox + 3);
draw_non_char_bbox(state, fbox);
#ifdef DEBUG
printf("DrawingTracer::fill:[%f,%f,%f,%f]\n", fbox[0],fbox[1],fbox[2],fbox[3]);
#endif #endif
draw_non_char_bbox(state, fbox, 1);
} }
void DrawingTracer::draw_non_char_bbox(GfxState * state, double * bbox) void DrawingTracer::draw_non_char_bbox(GfxState * state, double * bbox, int what)
{ {
#if ENABLE_SVG // what == 0 => just do bbox text
// what == 1 => stroke test
// what == 2 => fill test
double cbox[4]; double cbox[4];
cairo_clip_extents(cairo, cbox, cbox + 1, cbox + 2, cbox + 3); cairo_clip_extents(cairo, cbox, cbox + 1, cbox + 2, cbox + 3);
if(bbox_intersect(cbox, bbox, bbox)) if(bbox_intersect(cbox, bbox))
#endif
{ {
transform_bbox_by_ctm(bbox, state); #ifdef DEBUG
if (DT_DEBUG) printf("DrawingTracer::draw_non_char_bbox:what=%d,[%f,%f,%f,%f]\n", what, bbox[0],bbox[1],bbox[2],bbox[3]);
printf("DrawingTracer::draw_non_char_bbox:[%f,%f,%f,%f]\n", bbox[0],bbox[1],bbox[2],bbox[3]); #endif
if (on_non_char_drawn) if (on_non_char_drawn)
on_non_char_drawn(bbox); on_non_char_drawn(cairo, bbox, what);
} }
} }
void DrawingTracer::draw_char_bbox(GfxState * state, double * bbox) void DrawingTracer::draw_char_bbox(GfxState * state, double * bbox, int inTransparencyGroup)
{ {
#if ENABLE_SVG if (inTransparencyGroup || state->getFillOpacity() < 1.0 || state->getStrokeOpacity() < 1.0) {
// Note: even if 4 corners of the char are all in or all out of the clip area, on_char_clipped(cairo, bbox, 0);
// it could still be partially clipped. return;
// TODO better solution? }
int pt_in = 0; if (!param.correct_text_visibility) {
if (cairo_in_clip(cairo, bbox[0], bbox[1])) double bbox[4] = { 0, 0, 0, 0 }; // bbox not relevant if not correcting text visibility
++pt_in; on_char_drawn(cairo, bbox);
if (cairo_in_clip(cairo, bbox[2], bbox[3])) return;
++pt_in; }
if (cairo_in_clip(cairo, bbox[2], bbox[1]))
++pt_in;
if (cairo_in_clip(cairo, bbox[0], bbox[3]))
++pt_in;
if (pt_in == 0) double cbox[4];
{ cairo_clip_extents(cairo, cbox, cbox + 1, cbox + 2, cbox + 3);
transform_bbox_by_ctm(bbox); #ifdef DEBUG
if(on_char_clipped) printf("DrawingTracer::draw_char_bbox::char bbox[%f,%f,%f,%f],clip extents:[%f,%f,%f,%f]\n", bbox[0], bbox[1], bbox[2], bbox[3], cbox[0],cbox[1],cbox[2],cbox[3]);
on_char_clipped(bbox, false);
}
else
{
if (pt_in < 4)
{
double cbox[4];
cairo_clip_extents(cairo, cbox, cbox + 1, cbox + 2, cbox + 3);
bbox_intersect(cbox, bbox, bbox);
}
transform_bbox_by_ctm(bbox);
if (pt_in < 4)
{
if(on_char_clipped)
on_char_clipped(bbox, true);
}
else
{
if (on_char_drawn)
on_char_drawn(bbox);
}
}
#else
transform_bbox_by_ctm(bbox, state);
if (on_char_drawn)
on_char_drawn(bbox);
#endif #endif
if (DT_DEBUG)
printf("DrawingTracer::draw_char_bbox:[%f,%f,%f,%f]\n",bbox[0],bbox[1],bbox[2],bbox[3]); if (bbox_intersect(bbox, cbox)) {
#ifdef DEBUG
printf("char intersects clip\n");
#endif
int pts_visible = 0;
// See which points are inside the current clip
if (cairo_in_clip(cairo, bbox[0], bbox[1]))
pts_visible |= 1;
if (cairo_in_clip(cairo, bbox[2], bbox[1]))
pts_visible |= 2;
if (cairo_in_clip(cairo, bbox[2], bbox[3]))
pts_visible |= 4;
if (cairo_in_clip(cairo, bbox[0], bbox[3]))
pts_visible |= 8;
if (pts_visible == (1|2|4|8)) {
#ifdef DEBUG
printf("char inside clip\n");
#endif
on_char_drawn(cairo, bbox);
} else {
#ifdef DEBUG
printf("char partial clip (%x)\n", pts_visible);
#endif
on_char_clipped(cairo, bbox, pts_visible);
}
} else {
#ifdef DEBUG
printf("char outside clip\n");
#endif
on_char_clipped(cairo, bbox, 0);
}
} }
void DrawingTracer::draw_image(GfxState *state) void DrawingTracer::draw_image(GfxState *state)
{ {
if (!param.correct_text_visibility) if (!param.correct_text_visibility)
return; return;
double x1, y1, x2, y2, x3, y3, x4, y4;
x1 = x4 = y3 = y4 = 0;
x2 = y2 = x3 = y1 = 1;
xform_pt(x1, y1);
xform_pt(x2, y2);
xform_pt(x3, y3);
xform_pt(x4, y4);
cairo_new_path(cairo);
cairo_move_to(cairo, x1, y1);
cairo_line_to(cairo, x2, y2);
cairo_line_to(cairo, x3, y3);
cairo_line_to(cairo, x4, y4);
cairo_close_path (cairo);
#ifdef DEBUG
printf("draw_image: [%f,%f], [%f,%f], [%f,%f], [%f,%f]\n", x1, y1, x2, y2, x3, y3, x4, y4);
#endif
double bbox[4] {0, 0, 1, 1}; double bbox[4] {0, 0, 1, 1};
draw_non_char_bbox(state, bbox); tm_transform_bbox(ctm_stack.back(), bbox);
draw_non_char_bbox(state, bbox, 1);
} }
void DrawingTracer::draw_char(GfxState *state, double x, double y, double ax, double ay) void DrawingTracer::draw_char(GfxState *state, double x, double y, double width, double height, int inTransparencyGroup)
{ {
if (!param.correct_text_visibility) //printf("x=%f,y=%f,width=%f,height=%f\n", x, y, width, height);
return;
Matrix tm, itm; Matrix tm, itm;
memcpy(tm.m, state->getTextMat(), sizeof(tm.m)); memcpy(tm.m, state->getTextMat(), sizeof(tm.m));
//printf("tm = %f,%f,%f,%f,%f,%f\n", tm.m[0], tm.m[1], tm.m[2], tm.m[3], tm.m[4], tm.m[5]);
double cx = state->getCurX(), cy = state->getCurY(), fs = state->getFontSize(), double cx = state->getCurX(), cy = state->getCurY(), fs = state->getFontSize(),
ry = state->getRise(), h = state->getHorizScaling(); ry = state->getRise(), h = state->getHorizScaling();
//printf("cx=%f,cy=%f,fs=%f,ry=%f,h=%f\n", cx,cy,fs,ry,h);
//cx and cy has been transformed by text matrix, we need to reverse them. //cx and cy has been transformed by text matrix, we need to reverse them.
tm.invertTo(&itm); tm.invertTo(&itm);
double char_cx, char_cy; double char_cx, char_cy;
itm.transform(cx, cy, &char_cx, &char_cy); itm.transform(cx, cy, &char_cx, &char_cy);
//printf("char_cx = %f, char_cy = %f\n", char_cx, char_cy);
//TODO Vertical? Currently vertical/type3 chars are treated as non-chars. //TODO Vertical? Currently vertical/type3 chars are treated as non-chars.
double char_m[6] {fs * h, 0, 0, fs, char_cx + x, char_cy + y + ry}; double char_m[6] {fs * h, 0, 0, fs, char_cx + x, char_cy + y + ry};
//printf("char_m = %f,%f,%f,%f,%f,%f\n", char_m[0], char_m[1], char_m[2], char_m[3], char_m[4], char_m[5]);
double final_m[6]; double final_m[6];
tm_multiply(final_m, tm.m, char_m); tm_multiply(final_m, tm.m, char_m);
auto font = state->getFont(); //printf("final_m = %f,%f,%f,%f,%f,%f\n", final_m[0], final_m[1], final_m[2], final_m[3], final_m[4], final_m[5]);
double bbox[4] {0, 0, ax, ay}; double final_after_ctm[6];
double desc = font->getDescent(), asc = font->getAscent(); tm_multiply(final_after_ctm, ctm_stack.back(), final_m);
if (font->getWMode() == 0) //printf("final_after_ctm= %f,%f,%f,%f,%f,%f\n", final_after_ctm[0], final_after_ctm[1], final_after_ctm[2], final_after_ctm[3], final_after_ctm[4], final_after_ctm[5]);
{ double inset = 0.1;
bbox[1] += desc; double bbox[4] {inset*width, inset*height, (1-inset)*width, (1-inset)*height};
bbox[3] += asc;
} //printf("bbox before: [%f,%f,%f,%f]\n", bbox[0],bbox[1],bbox[2],bbox[3]);
else //printf("bbox after: [%f,%f,%f,%f]\n", bbox[0],bbox[1],bbox[2],bbox[3]);
{//TODO Vertical? tm_transform_bbox(final_after_ctm, bbox);
} //printf("bbox after: [%f,%f,%f,%f]\n", bbox[0],bbox[1],bbox[2],bbox[3]);
tm_transform_bbox(final_m, bbox); draw_char_bbox(state, bbox, inTransparencyGroup);
draw_char_bbox(state, bbox);
} }
void DrawingTracer::xform_pt(double & x, double & y) {
void DrawingTracer::transform_bbox_by_ctm(double * bbox, GfxState * state) tm_transform(ctm_stack.back(), x, y);
{
#if ENABLE_SVG
cairo_matrix_t mat;
cairo_get_matrix(cairo, &mat);
double mat_a[6] {mat.xx, mat.yx, mat.xy, mat.yy, mat.x0, mat.y0};
tm_transform_bbox(mat_a, bbox);
#else
tm_transform_bbox(state->getCTM(), bbox);
#endif
} }
} /* namespace pdf2htmlEX */ } /* namespace pdf2htmlEX */

View File

@ -12,6 +12,9 @@
#include <GfxState.h> #include <GfxState.h>
#include <vector>
#include <array>
#include "pdf2htmlEX-config.h" #include "pdf2htmlEX-config.h"
#if ENABLE_SVG #if ENABLE_SVG
@ -31,11 +34,11 @@ public:
* bbox in device space. * bbox in device space.
*/ */
// a non-char graphics is drawn // a non-char graphics is drawn
std::function<void(double * bbox)> on_non_char_drawn; std::function<void(cairo_t *cairo, double * bbox, int what)> on_non_char_drawn;
// a char is drawn in the clip area // a char is drawn in the clip area
std::function<void(double * bbox)> on_char_drawn; std::function<void(cairo_t *cairo, double * bbox)> on_char_drawn;
// a char is drawn out of/partially in the clip area // a char is drawn out of/partially in the clip area
std::function<void(double * bbox, bool patially)> on_char_clipped; std::function<void(cairo_t *cairo, double * bbox, int pts_visible)> on_char_clipped;
DrawingTracer(const Param & param); DrawingTracer(const Param & param);
virtual ~DrawingTracer(); virtual ~DrawingTracer();
@ -44,9 +47,9 @@ public:
/* /*
* A character is drawing * A character is drawing
* x, y: glyph-drawing position, in PDF text object space. * x, y: glyph-drawing position, in PDF text object space.
* ax, ay: glyph advance, in glyph space. * width, height: glyph width/height
*/ */
void draw_char(GfxState * state, double x, double y, double ax, double ay); void draw_char(GfxState * state, double x, double y, double width, double height, int inTransparencyGroup);
/* /*
* An image is drawing * An image is drawing
*/ */
@ -63,13 +66,15 @@ private:
void finish(); void finish();
// Following methods operate in user space (just before CTM is applied) // Following methods operate in user space (just before CTM is applied)
void do_path(GfxState * state, GfxPath * path); void do_path(GfxState * state, GfxPath * path);
void draw_non_char_bbox(GfxState * state, double * bbox); void draw_non_char_bbox(GfxState * state, double * bbox, int what);
void draw_char_bbox(GfxState * state, double * bbox); void draw_char_bbox(GfxState * state, double * bbox, int inTransparencyGroup);
// If cairo is available, parameter state is ignored // If cairo is available, parameter state is ignored
void transform_bbox_by_ctm(double * bbox, GfxState * state = nullptr); void xform_pt(double & x, double & y);
const Param & param; const Param & param;
std::vector<double*> ctm_stack;
#if ENABLE_SVG #if ENABLE_SVG
cairo_t * cairo; cairo_t * cairo;
#endif #endif

View File

@ -47,7 +47,7 @@ namespace pdf2htmlEX {
struct HTMLRenderer : OutputDev struct HTMLRenderer : OutputDev
{ {
HTMLRenderer(const Param & param); HTMLRenderer(Param & param);
virtual ~HTMLRenderer(); virtual ~HTMLRenderer();
void process(PDFDoc * doc); void process(PDFDoc * doc);
@ -144,6 +144,13 @@ struct HTMLRenderer : OutputDev
virtual void eoFill(GfxState *state); virtual void eoFill(GfxState *state);
virtual GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax); virtual GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax);
virtual void beginTransparencyGroup(GfxState * /*state*/, double * /*bbox*/,
GfxColorSpace * /*blendingColorSpace*/,
GBool /*isolated*/, GBool /*knockout*/,
GBool /*forSoftMask*/);
virtual void endTransparencyGroup(GfxState * /*state*/);
virtual void processLink(AnnotLink * al); virtual void processLink(AnnotLink * al);
/* /*
@ -245,11 +252,12 @@ protected:
double print_scale (void) const { return 96.0 / DEFAULT_DPI / text_zoom_factor(); } double print_scale (void) const { return 96.0 / DEFAULT_DPI / text_zoom_factor(); }
const Param & param; Param & param;
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// PDF states // PDF states
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
int inTransparencyGroup;
// track the original (unscaled) values to determine scaling and merge lines // track the original (unscaled) values to determine scaling and merge lines
// current position // current position
double cur_tx, cur_ty; // real text position, in text coords double cur_tx, cur_ty; // real text position, in text coords

View File

@ -62,4 +62,14 @@ GBool HTMLRenderer::axialShadedFill(GfxState *state, GfxAxialShading *shading, d
return true; return true;
} }
void HTMLRenderer::beginTransparencyGroup(GfxState *state, double *bbox,
GfxColorSpace *blendingColorSpace,
GBool isolated, GBool knockout,
GBool forSoftMask) {
inTransparencyGroup++;
}
void HTMLRenderer::endTransparencyGroup(GfxState *state) {
inTransparencyGroup--;
}
} // namespace pdf2htmlEX } // namespace pdf2htmlEX

View File

@ -66,10 +66,10 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, FontInfo & info)
auto * id = font->getID(); auto * id = font->getID();
Object ref_obj; Object ref_obj(id->num, id->gen);
ref_obj.initRef(id->num, id->gen); //ref_obj.initRef(id->num, id->gen);
ref_obj.fetch(xref, &font_obj); font_obj = ref_obj.fetch(xref);
ref_obj.free(); //ref_obj.free();
if(!font_obj.isDict()) if(!font_obj.isDict())
{ {
@ -78,7 +78,8 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, FontInfo & info)
} }
Dict * dict = font_obj.getDict(); Dict * dict = font_obj.getDict();
if(dict->lookup("DescendantFonts", &font_obj2)->isArray()) font_obj2 = dict->lookup("DescendantFonts");
if(font_obj2.isArray())
{ {
if(font_obj2.arrayGetLength() == 0) if(font_obj2.arrayGetLength() == 0)
{ {
@ -86,27 +87,31 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, FontInfo & info)
} }
else else
{ {
if(font_obj2.arrayGetLength() > 1) if(font_obj2.arrayGetLength() > 1) {
cerr << "TODO: multiple entries in DescendantFonts array" << endl; cerr << "TODO: multiple entries in DescendantFonts array" << endl;
}
if(font_obj2.arrayGet(0, &obj2)->isDict())
obj2 = font_obj2.arrayGet(0);
if(obj2.isDict())
{ {
dict = obj2.getDict(); dict = obj2.getDict();
} }
} }
} }
if(!dict->lookup("FontDescriptor", &fontdesc_obj)->isDict()) fontdesc_obj = dict->lookup("FontDescriptor");
if(!fontdesc_obj.isDict())
{ {
cerr << "Cannot find FontDescriptor " << endl; cerr << "Cannot find FontDescriptor " << endl;
throw 0; throw 0;
} }
dict = fontdesc_obj.getDict(); dict = fontdesc_obj.getDict();
obj = dict->lookup("FontFile3");
if(dict->lookup("FontFile3", &obj)->isStream()) if(obj.isStream())
{ {
if(obj.streamGetDict()->lookup("Subtype", &obj1)->isName()) obj1 = obj.streamGetDict()->lookup("Subtype");
if(obj1.isName())
{ {
subtype = obj1.getName(); subtype = obj1.getName();
if(subtype == "Type1C") if(subtype == "Type1C")
@ -132,19 +137,19 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, FontInfo & info)
cerr << "Invalid subtype in font descriptor" << endl; cerr << "Invalid subtype in font descriptor" << endl;
throw 0; throw 0;
} }
} } else {
else if (dict->lookup("FontFile2", &obj)->isStream()) obj = dict->lookup("FontFile2");
{ if (obj.isStream()) {
suffix = ".ttf"; suffix = ".ttf";
} } else {
else if (dict->lookup("FontFile", &obj)->isStream()) obj = dict->lookup("FontFile");
{ if (obj.isStream()) {
suffix = ".pfa"; suffix = ".pfa";
} } else {
else cerr << "Cannot find FontFile for dump" << endl;
{ throw 0;
cerr << "Cannot find FontFile for dump" << endl; }
throw 0; }
} }
if(suffix == "") if(suffix == "")
@ -175,13 +180,13 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, FontInfo & info)
cerr << "Something wrong when trying to dump font " << hex << fn_id << dec << endl; cerr << "Something wrong when trying to dump font " << hex << fn_id << dec << endl;
} }
obj2.free(); //obj2.free();
obj1.free(); //obj1.free();
obj.free(); //obj.free();
fontdesc_obj.free(); //fontdesc_obj.free();
font_obj2.free(); //font_obj2.free();
font_obj.free(); //font_obj.free();
return filepath; return filepath;
} }
@ -237,7 +242,7 @@ string HTMLRenderer::dump_type3_font (GfxFont * font, FontInfo & info)
surface = cairo_svg_surface_create(glyph_filename.c_str(), transformed_bbox_width * scale, transformed_bbox_height * scale); surface = cairo_svg_surface_create(glyph_filename.c_str(), transformed_bbox_width * scale, transformed_bbox_height * scale);
cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2); cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2);
cairo_surface_set_fallback_resolution(surface, param.h_dpi, param.v_dpi); cairo_surface_set_fallback_resolution(surface, param.actual_dpi, param.actual_dpi);
cairo_t * cr = cairo_create(surface); cairo_t * cr = cairo_create(surface);
// track the position of the origin // track the position of the origin
@ -373,6 +378,14 @@ string HTMLRenderer::dump_type3_font (GfxFont * font, FontInfo & info)
#endif #endif
} }
namespace {
void output_map_file_header(std::ostream& out) {
out << "glyph_code mapped_code unicode" << std::endl;
}
} // namespace
void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only) void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only)
{ {
if(param.debug) if(param.debug)
@ -528,6 +541,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
ffw_reencode_glyph_order(); ffw_reencode_glyph_order();
GfxCIDFont * _font = dynamic_cast<GfxCIDFont*>(font); GfxCIDFont * _font = dynamic_cast<GfxCIDFont*>(font);
assert(_font != nullptr);
// To locate CID2GID for the font // To locate CID2GID for the font
// as in CairoFontEngine.cc // as in CairoFontEngine.cc
@ -574,6 +588,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
map_filename = (char*)str_fmt("%s/f%llx.map", param.tmp_dir.c_str(), info.id); map_filename = (char*)str_fmt("%s/f%llx.map", param.tmp_dir.c_str(), info.id);
tmp_files.add(map_filename); tmp_files.add(map_filename);
map_outf.open(map_filename); map_outf.open(map_filename);
output_map_file_header(map_outf);
} }
unordered_set<int> codeset; unordered_set<int> codeset;
@ -650,6 +665,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
{ {
map_outf.close(); map_outf.close();
map_outf.open(map_filename); map_outf.open(map_filename);
output_map_file_header(map_outf);
} }
continue; continue;
} }

View File

@ -41,12 +41,13 @@ using std::abs;
using std::cerr; using std::cerr;
using std::endl; using std::endl;
HTMLRenderer::HTMLRenderer(const Param & param) HTMLRenderer::HTMLRenderer(Param & param)
:OutputDev() :OutputDev()
,param(param) ,param(param)
,html_text_page(param, all_manager) ,html_text_page(param, all_manager)
,preprocessor(param) ,preprocessor(param)
,tmp_files(param) ,tmp_files(param)
,covered_text_detector(param)
,tracer(param) ,tracer(param)
{ {
if(!(param.debug)) if(!(param.debug))
@ -81,11 +82,11 @@ HTMLRenderer::HTMLRenderer(const Param & param)
all_manager.bottom .set_eps(EPS); all_manager.bottom .set_eps(EPS);
tracer.on_char_drawn = tracer.on_char_drawn =
[this](double * box) { covered_text_detector.add_char_bbox(box); }; [this](cairo_t *cairo, double * box) { covered_text_detector.add_char_bbox(cairo, box); };
tracer.on_char_clipped = tracer.on_char_clipped =
[this](double * box, bool partial) { covered_text_detector.add_char_bbox_clipped(box, partial); }; [this](cairo_t *cairo, double * box, int partial) { covered_text_detector.add_char_bbox_clipped(cairo, box, partial); };
tracer.on_non_char_drawn = tracer.on_non_char_drawn =
[this](double * box) { covered_text_detector.add_non_char_bbox(box); }; [this](cairo_t *cairo, double * box, int what) { covered_text_detector.add_non_char_bbox(cairo, box, what); };
} }
HTMLRenderer::~HTMLRenderer() HTMLRenderer::~HTMLRenderer()
@ -93,6 +94,8 @@ HTMLRenderer::~HTMLRenderer()
ffw_finalize(); ffw_finalize();
} }
#define MAX_DIMEN 9000
void HTMLRenderer::process(PDFDoc *doc) void HTMLRenderer::process(PDFDoc *doc)
{ {
cur_doc = doc; cur_doc = doc;
@ -119,12 +122,22 @@ void HTMLRenderer::process(PDFDoc *doc)
int page_count = (param.last_page - param.first_page + 1); int page_count = (param.last_page - param.first_page + 1);
for(int i = param.first_page; i <= param.last_page ; ++i) for(int i = param.first_page; i <= param.last_page ; ++i)
{ {
param.actual_dpi = param.desired_dpi;
param.max_dpi = 72 * MAX_DIMEN / max(doc->getPageCropWidth(i), doc->getPageCropHeight(i));
if (param.actual_dpi > param.max_dpi) {
param.actual_dpi = param.max_dpi;
printf("Warning:Page %d clamped to %f DPI\n", i, param.actual_dpi);
}
if (param.tmp_file_size_limit != -1 && tmp_files.get_total_size() > param.tmp_file_size_limit * 1024) { if (param.tmp_file_size_limit != -1 && tmp_files.get_total_size() > param.tmp_file_size_limit * 1024) {
cerr << "Stop processing, reach max size\n"; if(param.quiet == 0)
cerr << "Stop processing, reach max size\n";
break; break;
} }
cerr << "Working: " << (i-param.first_page) << "/" << page_count << '\r' << flush; if (param.quiet == 0)
cerr << "Working: " << (i-param.first_page) << "/" << page_count << '\r' << flush;
if(param.split_pages) if(param.split_pages)
{ {
@ -147,15 +160,21 @@ void HTMLRenderer::process(PDFDoc *doc)
false, // printing false, // printing
nullptr, nullptr, nullptr, nullptr); nullptr, nullptr, nullptr, nullptr);
if (param.desired_dpi != param.actual_dpi) {
printf("Page %d DPI change %.1f => %.1f\n", i, param.desired_dpi, param.actual_dpi);
}
if(param.split_pages) if(param.split_pages)
{ {
delete f_curpage; delete f_curpage;
f_curpage = nullptr; f_curpage = nullptr;
} }
} }
if(page_count >= 0) if(page_count >= 0 && param.quiet == 0)
cerr << "Working: " << page_count << "/" << page_count; cerr << "Working: " << page_count << "/" << page_count;
cerr << endl;
if(param.quiet == 0)
cerr << endl;
//////////////////////// ////////////////////////
// Process Outline // Process Outline
@ -167,7 +186,8 @@ void HTMLRenderer::process(PDFDoc *doc)
bg_renderer = nullptr; bg_renderer = nullptr;
fallback_bg_renderer = nullptr; fallback_bg_renderer = nullptr;
cerr << endl; if(param.quiet == 0)
cerr << endl;
} }
void HTMLRenderer::setDefaultCTM(double *ctm) void HTMLRenderer::setDefaultCTM(double *ctm)

View File

@ -56,73 +56,70 @@ static string get_linkdest_detail_str(LinkDest * dest, Catalog * catalog, int &
// dec // dec
sout << "[" << pageno; sout << "[" << pageno;
if(dest) switch(dest->getKind())
{ {
switch(dest->getKind()) case destXYZ:
{ {
case destXYZ: sout << ",\"XYZ\",";
{
sout << ",\"XYZ\",";
if(dest->getChangeLeft())
sout << (dest->getLeft());
else
sout << "null";
sout << ",";
if(dest->getChangeTop())
sout << (dest->getTop());
else
sout << "null";
sout << ",";
if(dest->getChangeZoom())
sout << (dest->getZoom());
else
sout << "null";
}
break;
case destFit:
sout << ",\"Fit\"";
break;
case destFitH:
sout << ",\"FitH\",";
if(dest->getChangeTop())
sout << (dest->getTop());
else
sout << "null";
break;
case destFitV:
sout << ",\"FitV\",";
if(dest->getChangeLeft()) if(dest->getChangeLeft())
sout << (dest->getLeft()); sout << (dest->getLeft());
else else
sout << "null"; sout << "null";
break; sout << ",";
case destFitR:
sout << ",\"FitR\","
<< (dest->getLeft()) << ","
<< (dest->getBottom()) << ","
<< (dest->getRight()) << ","
<< (dest->getTop());
break;
case destFitB:
sout << ",\"FitB\"";
break;
case destFitBH:
sout << ",\"FitBH\",";
if(dest->getChangeTop()) if(dest->getChangeTop())
sout << (dest->getTop()); sout << (dest->getTop());
else else
sout << "null"; sout << "null";
break; sout << ",";
case destFitBV: if(dest->getChangeZoom())
sout << ",\"FitBV\","; sout << (dest->getZoom());
if(dest->getChangeLeft())
sout << (dest->getLeft());
else else
sout << "null"; sout << "null";
break; }
default: break;
break; case destFit:
} sout << ",\"Fit\"";
break;
case destFitH:
sout << ",\"FitH\",";
if(dest->getChangeTop())
sout << (dest->getTop());
else
sout << "null";
break;
case destFitV:
sout << ",\"FitV\",";
if(dest->getChangeLeft())
sout << (dest->getLeft());
else
sout << "null";
break;
case destFitR:
sout << ",\"FitR\","
<< (dest->getLeft()) << ","
<< (dest->getBottom()) << ","
<< (dest->getRight()) << ","
<< (dest->getTop());
break;
case destFitB:
sout << ",\"FitB\"";
break;
case destFitBH:
sout << ",\"FitBH\",";
if(dest->getChangeTop())
sout << (dest->getTop());
else
sout << "null";
break;
case destFitBV:
sout << ",\"FitBV\",";
if(dest->getChangeLeft())
sout << (dest->getLeft());
else
sout << "null";
break;
default:
break;
} }
sout << "]"; sout << "]";
@ -166,6 +163,7 @@ string HTMLRenderer::get_linkaction_str(LinkAction * action, string & detail)
case actionURI: case actionURI:
{ {
auto * real_action = dynamic_cast<LinkURI*>(action); auto * real_action = dynamic_cast<LinkURI*>(action);
assert(real_action != nullptr);
dest_str = real_action->getURI()->getCString(); dest_str = real_action->getURI()->getCString();
} }
break; break;

View File

@ -104,6 +104,7 @@ void HTMLRenderer::clipToStrokePath(GfxState * state)
} }
void HTMLRenderer::reset_state() void HTMLRenderer::reset_state()
{ {
inTransparencyGroup = 0;
draw_text_scale = 1.0; draw_text_scale = 1.0;
cur_font_size = 0.0; cur_font_size = 0.0;

View File

@ -33,16 +33,23 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
double cur_word_space = state->getWordSpace(); double cur_word_space = state->getWordSpace();
double cur_horiz_scaling = state->getHorizScaling(); double cur_horiz_scaling = state->getHorizScaling();
bool drawChars = true;
// Writing mode fonts and Type 3 fonts are rendered as images // Writing mode fonts and Type 3 fonts are rendered as images
// I don't find a way to display writing mode fonts in HTML except for one div for each character, which is too costly // I don't find a way to display writing mode fonts in HTML except for one div for each character, which is too costly
// For type 3 fonts, due to the font matrix, still it's hard to show it on HTML // For type 3 fonts, due to the font matrix, still it's hard to show it on HTML
if( (font == nullptr)
|| (font->getWMode())
|| ((font->getType() == fontType3) && (!param.process_type3)) if(state->getFont()
&& ( (state->getFont()->getWMode())
|| ((state->getFont()->getType() == fontType3) && (!param.process_type3))
|| (state->getRender() >= 4)
)
) )
{ {
return; // We still want to go through the loop to ensure characters are added to the covered_chars array
drawChars = false;
//printf("%d / %d / %d\n", state->getFont()->getWMode(), (state->getFont()->getType() == fontType3), state->getRender());
} }
// see if the line has to be closed due to state change // see if the line has to be closed due to state change
@ -74,7 +81,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
while (len > 0) while (len > 0)
{ {
auto n = font->getNextChar(p, len, &code, &u, &uLen, &ax, &ay, &ox, &oy); auto n = font->getNextChar(p, len, &code, &u, &uLen, &ax, &ay, &ox, &oy);
HR_DEBUG(printf("HTMLRenderer::drawString:unicode=%lc(%d)\n", (wchar_t)u[0], u[0])); HR_DEBUG(printf("HTMLRenderer::drawString:unicode=%lc(%d)\n", u ? (wchar_t)u[0] : ' ', u ? u[0] : -1));
if(!(equal(ox, 0) && equal(oy, 0))) if(!(equal(ox, 0) && equal(oy, 0)))
{ {
@ -82,7 +89,34 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
} }
ddx = ax * cur_font_size + cur_letter_space; ddx = ax * cur_font_size + cur_letter_space;
ddy = ay * cur_font_size; ddy = ay * cur_font_size;
tracer.draw_char(state, dx, dy, ax, ay);
double width = 0, height = font->getAscent();
if (font->isCIDFont()) {
char buf[2];
buf[0] = (code >> 8) & 0xff;
buf[1] = (code & 0xff);
width = ((GfxCIDFont *)font)->getWidth(buf, 2);
} else {
width = ((Gfx8BitFont *)font)->getWidth(code);
}
if (width == 0 || height == 0) {
//cerr << "CID: " << font->isCIDFont() << ", char:" << code << ", width:" << width << ", ax:" << ax << ", height:" << height << ", ay:" << ay << endl;
}
if (width == 0) {
width = ax;
if (width == 0) {
width = 0.001;
}
}
if (height == 0) {
height = ay;
if (height == 0) {
height = 0.001;
}
}
tracer.draw_char(state, dx, dy, width, height, !drawChars || inTransparencyGroup);
bool is_space = false; bool is_space = false;
if (n == 1 && *p == ' ') if (n == 1 && *p == ' ')
@ -99,6 +133,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
is_space = true; is_space = true;
} }
if(is_space && (param.space_as_offset)) if(is_space && (param.space_as_offset))
{ {
html_text_page.get_cur_line()->append_padding_char(); html_text_page.get_cur_line()->append_padding_char();
@ -158,7 +193,7 @@ bool HTMLRenderer::is_char_covered(int index)
{ {
std::cerr << "Warning: HTMLRenderer::is_char_covered: index out of bound: " std::cerr << "Warning: HTMLRenderer::is_char_covered: index out of bound: "
<< index << ", size: " << covered.size() <<endl; << index << ", size: " << covered.size() <<endl;
return false; return true; // Something's gone wrong so assume covered so at least something is output
} }
return covered[index]; return covered[index];
} }

View File

@ -250,7 +250,10 @@ void HTMLTextLine::dump_text(ostream & out)
if(std::abs(target - space_off) <= param.h_eps) if(std::abs(target - space_off) <= param.h_eps)
{ {
Unicode u = ' '; Unicode u = ' ';
// Sometimes we guess wrong whether we have a valid space character, so ensure it is always hidden
out << "<span class=\"" << CSS::WHITESPACE_CN << "\">";
writeUnicodes(out, &u, 1); writeUnicodes(out, &u, 1);
out << "</span>";
actual_offset = space_off; actual_offset = space_off;
done = true; done = true;
} }
@ -378,13 +381,12 @@ void HTMLTextLine::optimize_normal(std::vector<HTMLTextLine*> & lines)
new_offsets.reserve(offsets.size()); new_offsets.reserve(offsets.size());
auto offset_iter1 = offsets.begin(); auto offset_iter1 = offsets.begin();
for(auto state_iter2 = states.begin(), state_iter1 = state_iter2++; for(auto state_iter1 = states.begin(); state_iter1 != states.end(); ++state_iter1)
state_iter1 != states.end();
++state_iter1, ++state_iter2)
{ {
const auto state_iter2 = std::next(state_iter1);
const size_t text_idx1 = state_iter1->start_idx; const size_t text_idx1 = state_iter1->start_idx;
const size_t text_idx2 = (state_iter2 == states.end()) ? text.size() : state_iter2->start_idx; const size_t text_idx2 = (state_iter2 == states.end()) ? text.size() : state_iter2->start_idx;
size_t text_count = text_idx2 - text_idx1; const size_t text_count = text_idx2 - text_idx1;
// there might be some offsets before the first state // there might be some offsets before the first state
while((offset_iter1 != offsets.end()) while((offset_iter1 != offsets.end())

View File

@ -22,7 +22,10 @@ struct Param
double zoom; double zoom;
double fit_width, fit_height; double fit_width, fit_height;
int use_cropbox; int use_cropbox;
double h_dpi, v_dpi; double desired_dpi;
double actual_dpi;
double max_dpi;
double text_dpi;
// output // output
int embed_css; int embed_css;
@ -79,6 +82,7 @@ struct Param
std::string tmp_dir; std::string tmp_dir;
int debug; int debug;
int proof; int proof;
int quiet;
std::string input_filename, output_filename; std::string input_filename, output_filename;
}; };

View File

@ -45,7 +45,8 @@ void Preprocessor::process(PDFDoc * doc)
int page_count = (param.last_page - param.first_page + 1); int page_count = (param.last_page - param.first_page + 1);
for(int i = param.first_page; i <= param.last_page ; ++i) for(int i = param.first_page; i <= param.last_page ; ++i)
{ {
cerr << "Preprocessing: " << (i-param.first_page) << "/" << page_count << '\r' << flush; if(param.quiet == 0)
cerr << "Preprocessing: " << (i - param.first_page) << "/" << page_count << '\r' << flush;
doc->displayPage(this, i, DEFAULT_DPI, DEFAULT_DPI, doc->displayPage(this, i, DEFAULT_DPI, DEFAULT_DPI,
0, 0,
@ -54,9 +55,11 @@ void Preprocessor::process(PDFDoc * doc)
false, // printing false, // printing
nullptr, nullptr, nullptr, nullptr); nullptr, nullptr, nullptr, nullptr);
} }
if(page_count >= 0) if(page_count >= 0 && param.quiet == 0)
cerr << "Preprocessing: " << page_count << "/" << page_count; cerr << "Preprocessing: " << page_count << "/" << page_count;
cerr << endl;
if(param.quiet == 0)
cerr << endl;
} }
void Preprocessor::drawChar(GfxState *state, double x, double y, void Preprocessor::drawChar(GfxState *state, double x, double y,

View File

@ -43,7 +43,18 @@ public:
// install new_value into the map // install new_value into the map
// return the corresponding id // return the corresponding id
long long install(double new_value, double * actual_value_ptr = nullptr) { long long install(double new_value, double * actual_value_ptr = nullptr) {
auto iter = value_map.lower_bound(new_value - eps); // DCRH: Fix for when eps check fails and yet map thinks the keys are the same
// (DEV1-RYR-LETTER example)
auto iter = value_map.find(new_value);
if (iter != value_map.end()) {
if(actual_value_ptr != nullptr)
*actual_value_ptr = iter->first;
return iter->second;
}
iter = value_map.lower_bound(new_value - eps);
if((iter != value_map.end()) && (std::abs(iter->first - new_value) <= eps)) if((iter != value_map.end()) && (std::abs(iter->first - new_value) <= eps))
{ {
if(actual_value_ptr != nullptr) if(actual_value_ptr != nullptr)
@ -84,7 +95,7 @@ protected:
// Be careful about the mixed usage of Matrix and const double * // Be careful about the mixed usage of Matrix and const double *
// the input is usually double *, which might be changed, so we have to copy the content out // the input is usually double *, which might be changed, so we have to copy the content out
// in the map we use Matrix instead of double * such that the array may be automatically release when deconstructing // in the map we use Matrix instead of double * such that the array may be automatically release when destructing
template <class Imp> template <class Imp>
class StateManager<Matrix, Imp> class StateManager<Matrix, Imp>
{ {
@ -96,7 +107,7 @@ public:
// return id // return id
long long install(const double * new_value) { long long install(const double * new_value) {
Matrix m; Matrix m;
memcpy(m.m, new_value, sizeof(m.m)); memcpy(m.m, new_value, 4 * sizeof(double));
auto iter = value_map.lower_bound(m); auto iter = value_map.lower_bound(m);
if((iter != value_map.end()) && (tm_equal(m.m, iter->first.m, 4))) if((iter != value_map.end()) && (tm_equal(m.m, iter->first.m, 4)))
{ {

View File

@ -111,7 +111,7 @@ void prepare_directories()
errno = 0; errno = 0;
unique_ptr<char> pBuf(new char[tmp_dir.size() + 1]); unique_ptr<char[]> pBuf(new char[tmp_dir.size() + 1]);
strcpy(pBuf.get(), tmp_dir.c_str()); strcpy(pBuf.get(), tmp_dir.c_str());
auto p = mkdtemp(pBuf.get()); auto p = mkdtemp(pBuf.get());
if(p == nullptr) if(p == nullptr)
@ -139,8 +139,7 @@ void parse_options (int argc, char **argv)
.add("fit-width", &param.fit_width, 0, "fit width to <fp> pixels", true) .add("fit-width", &param.fit_width, 0, "fit width to <fp> pixels", true)
.add("fit-height", &param.fit_height, 0, "fit height to <fp> pixels", true) .add("fit-height", &param.fit_height, 0, "fit height to <fp> pixels", true)
.add("use-cropbox", &param.use_cropbox, 1, "use CropBox instead of MediaBox") .add("use-cropbox", &param.use_cropbox, 1, "use CropBox instead of MediaBox")
.add("hdpi", &param.h_dpi, 144.0, "horizontal resolution for graphics in DPI") .add("dpi", &param.desired_dpi, 144.0, "Resolution for graphics in DPI")
.add("vdpi", &param.v_dpi, 144.0, "vertical resolution for graphics in DPI")
// output files // output files
.add("embed", "specify which elements should be embedded into output", embed_parser, true) .add("embed", "specify which elements should be embedded into output", embed_parser, true)
@ -160,7 +159,7 @@ void parse_options (int argc, char **argv)
.add("process-form", &param.process_form, 0, "include text fields and radio buttons") .add("process-form", &param.process_form, 0, "include text fields and radio buttons")
.add("printing", &param.printing, 1, "enable printing support") .add("printing", &param.printing, 1, "enable printing support")
.add("fallback", &param.fallback, 0, "output in fallback mode") .add("fallback", &param.fallback, 0, "output in fallback mode")
.add("tmp-file-size-limit", &param.tmp_file_size_limit, -1, "Maximum size (in KB) used by temporary files, -1 for no limit.") .add("tmp-file-size-limit", &param.tmp_file_size_limit, -1, "Maximum size (in KB) used by temporary files, -1 for no limit")
// fonts // fonts
.add("embed-external-font", &param.embed_external_font, 1, "embed local match for external fonts") .add("embed-external-font", &param.embed_external_font, 1, "embed local match for external fonts")
@ -181,13 +180,14 @@ void parse_options (int argc, char **argv)
.add("space-as-offset", &param.space_as_offset, 0, "treat space characters as offsets") .add("space-as-offset", &param.space_as_offset, 0, "treat space characters as offsets")
.add("tounicode", &param.tounicode, 0, "how to handle ToUnicode CMaps (0=auto, 1=force, -1=ignore)") .add("tounicode", &param.tounicode, 0, "how to handle ToUnicode CMaps (0=auto, 1=force, -1=ignore)")
.add("optimize-text", &param.optimize_text, 0, "try to reduce the number of HTML elements used for text") .add("optimize-text", &param.optimize_text, 0, "try to reduce the number of HTML elements used for text")
.add("correct-text-visibility", &param.correct_text_visibility, 0, "try to detect texts covered by other graphics and properly arrange them") .add("correct-text-visibility", &param.correct_text_visibility, 1, "0: Don't do text visibility checks. 1: Fully occluded text handled. 2: Partially occluded text handled")
.add("covered-text-dpi", &param.text_dpi, 300, "Rendering DPI to use if correct-text-visibility == 2 and there is partially covered text on the page")
// background image // background image
.add("bg-format", &param.bg_format, "png", "specify background image format") .add("bg-format", &param.bg_format, "png", "specify background image format")
.add("svg-node-count-limit", &param.svg_node_count_limit, -1, "if node count in a svg background image exceeds this limit," .add("svg-node-count-limit", &param.svg_node_count_limit, -1, "if node count in a svg background image exceeds this limit,"
" fall back this page to bitmap background; negative value means no limit.") " fall back this page to bitmap background; negative value means no limit")
.add("svg-embed-bitmap", &param.svg_embed_bitmap, 1, "1: embed bitmaps in svg background; 0: dump bitmaps to external files if possible.") .add("svg-embed-bitmap", &param.svg_embed_bitmap, 1, "1: embed bitmaps in svg background; 0: dump bitmaps to external files if possible")
// encryption // encryption
.add("owner-password,o", &param.owner_password, "", "owner password (for encrypted files)", true) .add("owner-password,o", &param.owner_password, "", "owner password (for encrypted files)", true)
@ -196,11 +196,12 @@ void parse_options (int argc, char **argv)
// misc. // misc.
.add("clean-tmp", &param.clean_tmp, 1, "remove temporary files after conversion") .add("clean-tmp", &param.clean_tmp, 1, "remove temporary files after conversion")
.add("tmp-dir", &param.tmp_dir, param.tmp_dir, "specify the location of temporary directory.") .add("tmp-dir", &param.tmp_dir, param.tmp_dir, "specify the location of temporary directory")
.add("data-dir", &param.data_dir, param.data_dir, "specify data directory") .add("data-dir", &param.data_dir, param.data_dir, "specify data directory")
.add("poppler-data-dir", &param.poppler_data_dir, param.poppler_data_dir, "specify poppler data directory") .add("poppler-data-dir", &param.poppler_data_dir, param.poppler_data_dir, "specify poppler data directory")
.add("debug", &param.debug, 0, "print debugging information") .add("debug", &param.debug, 0, "print debugging information")
.add("proof", &param.proof, 0, "texts are drawn on both text layer and background for proof.") .add("proof", &param.proof, 0, "texts are drawn on both text layer and background for proof")
.add("quiet", &param.quiet, 0, "perform operations quietly")
// meta // meta
.add("version,v", "print copyright and version info", &show_version_and_exit) .add("version,v", "print copyright and version info", &show_version_and_exit)

View File

@ -126,6 +126,15 @@ void ffw_load_font(const char * filename)
assert(font->fv); assert(font->fv);
cur_fv = font->fv; cur_fv = font->fv;
// If we are a composite font, then ensure the cidmaster has the same ascent/descent values as the first subfont.
// If there are more than one subfont then what do we do???
if (cur_fv->cidmaster && (cur_fv->cidmaster->ascent != cur_fv->sf->ascent || cur_fv->cidmaster->descent != cur_fv->sf->descent)) {
printf("ffw_load_font:Warning ascent/descent mismatch for CID font: %d/%d => %d/%d\n",
cur_fv->cidmaster->ascent, cur_fv->cidmaster->descent, cur_fv->sf->ascent, cur_fv->sf->descent);
cur_fv->cidmaster->ascent = cur_fv->sf->ascent;
cur_fv->cidmaster->descent = cur_fv->sf->descent;
}
} }
/* /*

View File

@ -20,10 +20,10 @@ using std::ostream;
Unicode map_to_private(CharCode code) Unicode map_to_private(CharCode code)
{ {
Unicode private_mapping = (Unicode)(code + 0xE000); Unicode private_mapping = (Unicode)(code + 0xE600); // DCRH: Stupid mobile safari uses code points in 0xe000 - 0xe5ff range to switch to emoji font
if(private_mapping > 0xF8FF) if(private_mapping > 0xF65F) // DCRH: More emoji-avoiding for mobile safari (see http://www.fileformat.info/info/unicode/block/private_use_area/utf8test.htm)
{ {
private_mapping = (Unicode)((private_mapping - 0xF8FF) + 0xF0000); private_mapping = (Unicode)((private_mapping - 0xF65F) + 0xF0000);
if(private_mapping > 0xFFFFD) if(private_mapping > 0xFFFFD)
{ {
private_mapping = (Unicode)((private_mapping - 0xFFFFD) + 0x100000); private_mapping = (Unicode)((private_mapping - 0xFFFFD) + 0x100000);
@ -40,7 +40,9 @@ Unicode unicode_from_font (CharCode code, GfxFont * font)
{ {
if(!font->isCIDFont()) if(!font->isCIDFont())
{ {
char * cname = dynamic_cast<Gfx8BitFont*>(font)->getCharName(code); auto * font2 = dynamic_cast<Gfx8BitFont*>(font);
assert(font2 != nullptr);
char * cname = font2->getCharName(code);
// may be untranslated ligature // may be untranslated ligature
if(cname) if(cname)
{ {

View File

@ -59,6 +59,13 @@ namespace pdf2htmlEX {
inline bool is_illegal_unicode(Unicode c) inline bool is_illegal_unicode(Unicode c)
{ {
return (c < 0x20) || (c >= 0x7F && c <= 0xA0) || (c == 0xAD) return (c < 0x20) || (c >= 0x7F && c <= 0xA0) || (c == 0xAD)
|| (c >= 0x300 && c <= 0x36f) // DCRH Combining diacriticals
|| (c >= 0x1ab0 && c <= 0x1aff) // DCRH Combining diacriticals
|| (c >= 0x1dc0 && c <= 0x1dff) // DCRH Combining diacriticals
|| (c >= 0x20d0 && c <= 0x20ff) // DCRH Combining diacriticals
|| (c >= 0xfe20 && c <= 0xfe2f) // DCRH Combining diacriticals
|| (c >= 0x900 && c <= 0x97f) // DCRH Devanagari - Webkit struggles with spacing for these code points
|| (c >= 0xa00 && c <= 0xa7f) // DCRH Gurmukhi - Webkit struggles with spacing for these code points
|| (c == 0x061C) || (c == 0x1361) || (c == 0x061C) || (c == 0x1361)
|| (c >= 0x200B && c <= 0x200F) || (c == 0x2028) || (c == 0x2029) || (c >= 0x200B && c <= 0x200F) || (c == 0x2028) || (c == 0x2029)
|| (c >= 0x202A && c <= 0x202E) || (c >= 0x2066 && c <= 0x2069) || (c >= 0x202A && c <= 0x202E) || (c >= 0x2066 && c <= 0x2069)