mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 04:50:09 +00:00
Merge branch 'master' into devv
Conflicts: TODO
This commit is contained in:
commit
81ec1a5618
@ -5,7 +5,7 @@ set(CMAKE_BUILD_TYPE Release CACHE STRING "Build configuration (Debug, Release,
|
|||||||
project(pdf2htmlEX)
|
project(pdf2htmlEX)
|
||||||
cmake_minimum_required(VERSION 2.6.0 FATAL_ERROR)
|
cmake_minimum_required(VERSION 2.6.0 FATAL_ERROR)
|
||||||
|
|
||||||
include_directories(${CMAKE_SOURCE_DIR}/src/include)
|
include_directories(${CMAKE_SOURCE_DIR}/src)
|
||||||
|
|
||||||
set(PDF2HTMLEX_VERSION "0.6")
|
set(PDF2HTMLEX_VERSION "0.6")
|
||||||
set(ARCHIVE_NAME pdf2htmlex-${PDF2HTMLEX_VERSION})
|
set(ARCHIVE_NAME pdf2htmlex-${PDF2HTMLEX_VERSION})
|
||||||
@ -138,36 +138,52 @@ if(NOT CXX0X_SUPPORT)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
||||||
configure_file (${CMAKE_SOURCE_DIR}/src/include/pdf2htmlEX-config.h.in ${CMAKE_SOURCE_DIR}/src/include/pdf2htmlEX-config.h)
|
configure_file (${CMAKE_SOURCE_DIR}/src/pdf2htmlEX-config.h.in ${CMAKE_SOURCE_DIR}/src/pdf2htmlEX-config.h)
|
||||||
configure_file (${CMAKE_SOURCE_DIR}/pdf2htmlEX.1.in ${CMAKE_SOURCE_DIR}/pdf2htmlEX.1)
|
configure_file (${CMAKE_SOURCE_DIR}/pdf2htmlEX.1.in ${CMAKE_SOURCE_DIR}/pdf2htmlEX.1)
|
||||||
|
|
||||||
add_executable(pdf2htmlEX
|
add_executable(pdf2htmlEX
|
||||||
|
src/Param.h
|
||||||
|
src/pdf2htmlEX-config.h
|
||||||
src/pdf2htmlEX.cc
|
src/pdf2htmlEX.cc
|
||||||
src/include/HTMLRenderer.h
|
src/HTMLRenderer/HTMLRenderer.h
|
||||||
src/HTMLRenderer/general.cc
|
|
||||||
src/HTMLRenderer/state.cc
|
|
||||||
src/HTMLRenderer/install.cc
|
|
||||||
src/HTMLRenderer/export.cc
|
|
||||||
src/HTMLRenderer/text.cc
|
|
||||||
src/HTMLRenderer/image.cc
|
|
||||||
src/HTMLRenderer/draw.cc
|
src/HTMLRenderer/draw.cc
|
||||||
|
src/HTMLRenderer/export.cc
|
||||||
|
src/HTMLRenderer/general.cc
|
||||||
|
src/HTMLRenderer/image.cc
|
||||||
|
src/HTMLRenderer/install.cc
|
||||||
|
src/HTMLRenderer/TextLineBuffer.h
|
||||||
|
src/HTMLRenderer/TextLineBuffer.cc
|
||||||
src/HTMLRenderer/link.cc
|
src/HTMLRenderer/link.cc
|
||||||
src/include/namespace.h
|
src/HTMLRenderer/state.cc
|
||||||
src/HTMLRenderer/LineBuffer.cc
|
src/HTMLRenderer/text.cc
|
||||||
src/include/ffw.h
|
src/BackgroundRenderer/BackgroundRenderer.h
|
||||||
src/ffw.c
|
src/BackgroundRenderer/SplashBackgroundRenderer.h
|
||||||
src/include/BackgroundRenderer.h
|
src/BackgroundRenderer/SplashBackgroundRenderer.cc
|
||||||
src/include/SplashBackgroundRenderer.h
|
src/BackgroundRenderer/CairoBackgroundRenderer.h
|
||||||
src/SplashBackgroundRenderer.cc
|
src/BackgroundRenderer/CairoBackgroundRenderer.cc
|
||||||
src/include/CairoBackgroundRenderer.h
|
src/util/ArgParser.h
|
||||||
src/CairoBackgroundRenderer.cc
|
src/util/ArgParser.cc
|
||||||
src/include/Preprocessor.h
|
src/util/base64stream.h
|
||||||
src/Preprocessor.cc
|
src/util/base64stream.cc
|
||||||
src/include/util.h
|
src/util/const.h
|
||||||
src/util.cc
|
src/util/const.cc
|
||||||
src/include/ArgParser.h
|
src/util/ffw.h
|
||||||
src/ArgParser.cc
|
src/util/ffw.c
|
||||||
src/include/pdf2htmlEX-config.h
|
src/util/math.h
|
||||||
|
src/util/math.cc
|
||||||
|
src/util/misc.h
|
||||||
|
src/util/misc.cc
|
||||||
|
src/util/namespace.h
|
||||||
|
src/util/path.h
|
||||||
|
src/util/path.cc
|
||||||
|
src/util/Preprocessor.h
|
||||||
|
src/util/Preprocessor.cc
|
||||||
|
src/util/StringFormatter.h
|
||||||
|
src/util/StringFormatter.cc
|
||||||
|
src/util/TmpFiles.h
|
||||||
|
src/util/TmpFiles.cc
|
||||||
|
src/util/unicode.h
|
||||||
|
src/util/unicode.cc
|
||||||
)
|
)
|
||||||
target_link_libraries(pdf2htmlEX ${PDF2HTMLEX_LIBS})
|
target_link_libraries(pdf2htmlEX ${PDF2HTMLEX_LIBS})
|
||||||
|
|
||||||
|
@ -1,5 +1,10 @@
|
|||||||
Latest v0.6
|
Latest v0.6
|
||||||
|
|
||||||
|
* New parameter: --use-cropbox
|
||||||
|
* Progress indicator
|
||||||
|
* Create a glyph for ' ' when missing
|
||||||
|
* Code refining
|
||||||
|
|
||||||
v0.5
|
v0.5
|
||||||
2012.10.06
|
2012.10.06
|
||||||
|
|
||||||
|
48
README.md
48
README.md
@ -20,24 +20,34 @@ It is optimized for modern web browsers.On Linux/Mac, the generated HTML pages c
|
|||||||
|
|
||||||
This program is designed for scientific papers with complicate formulas and figures, therefore precise rendering is the #1 concern. But of course general PDF files are also supported.
|
This program is designed for scientific papers with complicate formulas and figures, therefore precise rendering is the #1 concern. But of course general PDF files are also supported.
|
||||||
|
|
||||||
|
### Why HTML ?
|
||||||
|
|
||||||
|
HTML, together with CSS and Javascript, is much more open and flexible than PDF. Almost everything can be customized.
|
||||||
|
- Embedding documents to web pages with consistent theme and behavior
|
||||||
|
- Cross references to other documents are much easier and intuitive
|
||||||
|
- More functions to the document with Javascript, e.g. access control, animation, statistics
|
||||||
|
|
||||||
|
Readers can also be benefitted
|
||||||
|
- Read while downloading
|
||||||
|
- Plugin-free
|
||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
* Single HTML file output
|
* Optional single HTML file output
|
||||||
* Precise rendering
|
* Precise rendering
|
||||||
* Text Selection
|
* Text perserved - you can select & copy & paste
|
||||||
* Font embedding & reencoding for Web
|
* Proper styling
|
||||||
* Proper styling (Color, Transformation...)
|
- Font - extracted and reencoded
|
||||||
|
- Color
|
||||||
|
- Transformation
|
||||||
* Links
|
* Links
|
||||||
* Optimization for Web
|
|
||||||
* [EXPERIMENTAL] Path drawing with CSS
|
* [EXPERIMENTAL] Path drawing with CSS
|
||||||
* Orthogonal lines
|
- Orthogonal lines
|
||||||
* Rectangles
|
- Rectangles
|
||||||
* Linear gradients
|
- Linear gradients
|
||||||
|
* Not fully supported, and rendered as images
|
||||||
### Objects rendered as images
|
- Type 3 fonts
|
||||||
|
- Non-text object
|
||||||
* Type 3 fonts
|
|
||||||
* Non-text object
|
|
||||||
|
|
||||||
## Get started
|
## Get started
|
||||||
|
|
||||||
@ -99,12 +109,6 @@ More info can be found on [the pdf2htmlEX page in TeX Wiki](http://oku.edu.mie-u
|
|||||||
|
|
||||||
man pdf2htmlEX
|
man pdf2htmlEX
|
||||||
|
|
||||||
### For Geeks
|
|
||||||
|
|
||||||
* Experimental and unsupported
|
|
||||||
|
|
||||||
pdf2htmlEX --process-nontext 0 --css-draw 1 /path/to/foobar.pdf
|
|
||||||
|
|
||||||
## FAQ
|
## FAQ
|
||||||
|
|
||||||
* [Troubleshooting compilation errors](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ#wiki-compile)
|
* [Troubleshooting compilation errors](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ#wiki-compile)
|
||||||
@ -113,11 +117,6 @@ More info can be found on [the pdf2htmlEX page in TeX Wiki](http://oku.edu.mie-u
|
|||||||
* [I want more features](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ#wiki-feature_commission)
|
* [I want more features](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ#wiki-feature_commission)
|
||||||
* [More](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ)
|
* [More](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ)
|
||||||
|
|
||||||
|
|
||||||
**WINDOWS XP USERS: Please make sure ClearType is turned on**
|
|
||||||
|
|
||||||
(Control Panel -> Display -> Appearance -> Effects -> "Use the following method to smooth edges of screen fonts" -> ClearType)
|
|
||||||
|
|
||||||
## LICENSE
|
## LICENSE
|
||||||
|
|
||||||
GPLv2 & GPLv3 Dual licensed
|
GPLv2 & GPLv3 Dual licensed
|
||||||
@ -161,4 +160,5 @@ pdf2htmlEX is inspired by the following projects:
|
|||||||
### Special Thanks
|
### Special Thanks
|
||||||
|
|
||||||
* Hongliang Tian <tatetian@gmail.com>
|
* Hongliang Tian <tatetian@gmail.com>
|
||||||
|
* Wanmin Liu <wanminliu@gmail.com>
|
||||||
|
|
||||||
|
21
TODO
21
TODO
@ -1,30 +1,37 @@
|
|||||||
|
<<<<<<< HEAD
|
||||||
manually locate font if fixed name
|
manually locate font if fixed name
|
||||||
|
|
||||||
word space/offset before the first letter (calendar pdf)
|
word space/offset before the first letter (calendar pdf)
|
||||||
|
|
||||||
don't dump image when there is nothing
|
don't dump image when there is nothing
|
||||||
|
=======
|
||||||
|
>>>>>>> master
|
||||||
|
|
||||||
Integrate splash/cairo
|
word space/offset before the first letter (calendar pdf)
|
||||||
native support for image
|
|
||||||
native support for draw
|
|
||||||
|
|
||||||
draw non-orthogonal lines with CSS
|
minimum line width of css drawing
|
||||||
|
|
||||||
position history stack (popstate)
|
|
||||||
|
|
||||||
==Wait until someone asks==
|
==Wait until someone asks==
|
||||||
|
position history stack (popstate)
|
||||||
|
draw non-orthogonal lines with CSS
|
||||||
try harder finding glyph names (using fontforge) for CID Type 0
|
try harder finding glyph names (using fontforge) for CID Type 0
|
||||||
rename single-html -> embed-font/image/css ...
|
rename single-html -> embed-font/image/css ...
|
||||||
merge sub/sup into one line
|
merge sub/sup into one line
|
||||||
precise link dest: zoom
|
precise link dest: zoom
|
||||||
multiple charcode mapped to a same glyph
|
multiple charcode mapped to a same glyph
|
||||||
|
don't dump image when there is nothing
|
||||||
|
|
||||||
==Future==
|
==Future==
|
||||||
|
Integrate splash/cairo
|
||||||
|
native support for image
|
||||||
|
native support for draw
|
||||||
|
type 3 fonts
|
||||||
|
combine lines (unwarp)
|
||||||
argument auto-completion
|
argument auto-completion
|
||||||
use absolute positioning for long whitespace
|
use absolute positioning for long whitespace
|
||||||
color invert
|
color invert
|
||||||
detect duplicate base fonts when embedding
|
detect duplicate base fonts when embedding
|
||||||
disable selection if we know unicode is wrong
|
disable selection if we know unicode is wrong
|
||||||
combine lines (unwarp)
|
|
||||||
Printing
|
|
||||||
check if we can add information to the font, and let browsers show ligatures automatically
|
check if we can add information to the font, and let browsers show ligatures automatically
|
||||||
|
Printing
|
||||||
|
13
debian/changelog
vendored
13
debian/changelog
vendored
@ -1,3 +1,16 @@
|
|||||||
|
pdf2htmlex (0.6-1~git201212182148rd76af-0ubuntu1) quantal; urgency=low
|
||||||
|
|
||||||
|
* fix dependency of poppler for quantal
|
||||||
|
*
|
||||||
|
|
||||||
|
-- WANG Lu <coolwanglu@gmail.com> Tue, 18 Dec 2012 21:48:35 +0800
|
||||||
|
|
||||||
|
pdf2htmlex (0.6-1~git201212111844rd76af-0ubuntu1) quantal; urgency=low
|
||||||
|
|
||||||
|
* Package for quantal
|
||||||
|
|
||||||
|
-- WANG Lu <coolwanglu@gmail.com> Tue, 11 Dec 2012 18:44:44 +0800
|
||||||
|
|
||||||
pdf2htmlex (0.6-1~git201210070052rcb9a8-0ubuntu1) precise; urgency=low
|
pdf2htmlex (0.6-1~git201210070052rcb9a8-0ubuntu1) precise; urgency=low
|
||||||
|
|
||||||
* New version
|
* New version
|
||||||
|
2
debian/control
vendored
2
debian/control
vendored
@ -8,6 +8,6 @@ Homepage: http://github.com/coolwanglu/pdf2htmlEX
|
|||||||
|
|
||||||
Package: pdf2htmlex
|
Package: pdf2htmlex
|
||||||
Architecture: any
|
Architecture: any
|
||||||
Depends: ${shlibs:Depends}, ${misc:Depends}, libpoppler27 (>= 0.20.3), libboost-filesystem-dev, libboost-program-options-dev, libpng12-0, libfontforge1
|
Depends: ${shlibs:Depends}, ${misc:Depends}, libpoppler27 (>= 0.20.3) | libpoppler28, libboost-filesystem-dev, libboost-program-options-dev, libpng12-0, libfontforge1
|
||||||
Description: Converts PDF to HTML without losing format
|
Description: Converts PDF to HTML without losing format
|
||||||
pdf2htmlEX converts PDF to HTML while retaining text, format & style as much as possible
|
pdf2htmlEX converts PDF to HTML while retaining text, format & style as much as possible
|
||||||
|
@ -54,9 +54,12 @@ If multiple values are specified, the minimum one will be used.
|
|||||||
|
|
||||||
If none is specified, pages will be rendered as 72DPI.
|
If none is specified, pages will be rendered as 72DPI.
|
||||||
.TP
|
.TP
|
||||||
.B --hpdi <dpi>, --vpdi <dpi> (Default: 144)
|
.B --hdpi <dpi>, --vdpi <dpi> (Default: 144)
|
||||||
Specify the horizontal and vertical DPI for images
|
Specify the horizontal and vertical DPI for images
|
||||||
.TP
|
.TP
|
||||||
|
.B --use-cropbox <0|1> (Default: 0)
|
||||||
|
Use CropBox instead of MediaBox for output.
|
||||||
|
.TP
|
||||||
.B --process-nontext <0|1> (Default: 1)
|
.B --process-nontext <0|1> (Default: 1)
|
||||||
Whether to process non-text objects (as images)
|
Whether to process non-text objects (as images)
|
||||||
.TP
|
.TP
|
||||||
@ -110,7 +113,7 @@ If this value is set to 1, the ToUnicode Map is always applied, if provided in P
|
|||||||
|
|
||||||
If set to -1, a customized map is used such that rendering will be correct in HTML (visually the same), but you may not get correct characters by select & copy & paste.
|
If set to -1, a customized map is used such that rendering will be correct in HTML (visually the same), but you may not get correct characters by select & copy & paste.
|
||||||
|
|
||||||
If set to 0, pdf2htmlEX would try it best to balance the two methods above.
|
If set to 0, pdf2htmlEX would try its best to balance the two methods above.
|
||||||
.TP
|
.TP
|
||||||
.B --space-as-offset <0|1> (Default: 0)
|
.B --space-as-offset <0|1> (Default: 0)
|
||||||
Treat space characters as offsets, which may increase the size of the output.
|
Treat space characters as offsets, which may increase the size of the output.
|
||||||
@ -118,13 +121,13 @@ Treat space characters as offsets, which may increase the size of the output.
|
|||||||
Turn it on if space characters are not displayed correctly, or you want to remove positional spaces.
|
Turn it on if space characters are not displayed correctly, or you want to remove positional spaces.
|
||||||
.TP
|
.TP
|
||||||
.B --stretch-narrow-glyph <0|1> (Default: 0)
|
.B --stretch-narrow-glyph <0|1> (Default: 0)
|
||||||
If set to 1, glyphs narrower than described in PDF will be strecth; otherwise space will be padded to the right of the glyphs
|
If set to 1, glyphs narrower than described in PDF will be stretched; otherwise space will be padded to the right of the glyphs
|
||||||
.TP
|
.TP
|
||||||
.B --squeeze_wide_glyph <0|1> (Default: 1)
|
.B --squeeze-wide-glyph <0|1> (Default: 1)
|
||||||
If set to 1, glyphs wider than described in PDF will be squeezed; otherwise it will be truncated.
|
If set to 1, glyphs wider than described in PDF will be squeezed; otherwise it will be truncated.
|
||||||
.TP
|
.TP
|
||||||
.B --remove-unused-glyph <0|1> (Default: 1)
|
.B --remove-unused-glyph <0|1> (Default: 1)
|
||||||
[Experimental] If set to 1, remove unused glyphs in embedded fonts in order to reduce the file size.
|
If set to 1, remove unused glyphs in embedded fonts in order to reduce the file size.
|
||||||
.TP
|
.TP
|
||||||
.B --font-suffix <suffix> (Default: .ttf), --font-format <format> (Default: truetype)
|
.B --font-suffix <suffix> (Default: .ttf), --font-format <format> (Default: truetype)
|
||||||
Specify the suffix and format of fonts extracted from the PDF file. They should be consistent.
|
Specify the suffix and format of fonts extracted from the PDF file. They should be consistent.
|
||||||
|
@ -63,6 +63,8 @@
|
|||||||
span {
|
span {
|
||||||
position:relative;
|
position:relative;
|
||||||
vertical-align: baseline;
|
vertical-align: baseline;
|
||||||
|
/* _<id> for spaces may need display:inline, which will override this */
|
||||||
|
display:inline-block;
|
||||||
}
|
}
|
||||||
._ {
|
._ {
|
||||||
color:transparent;
|
color:transparent;
|
||||||
@ -74,9 +76,6 @@ span {
|
|||||||
::-moz-selection{
|
::-moz-selection{
|
||||||
background: rgba(127,255,255,1);
|
background: rgba(127,255,255,1);
|
||||||
}
|
}
|
||||||
.i {
|
|
||||||
position:absolute;
|
|
||||||
}
|
|
||||||
.j {
|
.j {
|
||||||
display:none;
|
display:none;
|
||||||
}
|
}
|
||||||
|
@ -19,7 +19,8 @@ void SplashBackgroundRenderer::drawChar(GfxState *state, double x, double y,
|
|||||||
double originX, double originY,
|
double originX, double originY,
|
||||||
CharCode code, int nBytes, Unicode *u, int uLen)
|
CharCode code, int nBytes, Unicode *u, int uLen)
|
||||||
{
|
{
|
||||||
if((state->getRender() & 3) == 3)
|
if(((state->getRender() & 3) == 3)
|
||||||
|
|| ((state->getFont()) && (state->getFont()->getWMode())))
|
||||||
{
|
{
|
||||||
SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen);
|
SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen);
|
||||||
}
|
}
|
||||||
@ -32,7 +33,9 @@ static GBool annot_cb(Annot *, void *) {
|
|||||||
void SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno, const string & filename)
|
void SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno, const string & filename)
|
||||||
{
|
{
|
||||||
doc->displayPage(this, pageno, param->h_dpi, param->v_dpi,
|
doc->displayPage(this, pageno, param->h_dpi, param->v_dpi,
|
||||||
0, true, false, false,
|
0,
|
||||||
|
(param->use_cropbox == 0),
|
||||||
|
false, false,
|
||||||
nullptr, nullptr, &annot_cb, nullptr);
|
nullptr, nullptr, &annot_cb, nullptr);
|
||||||
|
|
||||||
getBitmap()->writeImgFile(splashFormatPng,
|
getBitmap()->writeImgFile(splashFormatPng,
|
@ -15,8 +15,8 @@
|
|||||||
#include <splash/SplashBitmap.h>
|
#include <splash/SplashBitmap.h>
|
||||||
#include <SplashOutputDev.h>
|
#include <SplashOutputDev.h>
|
||||||
|
|
||||||
#include "HTMLRenderer.h"
|
|
||||||
#include "Param.h"
|
#include "Param.h"
|
||||||
|
#include "HTMLRenderer/HTMLRenderer.h"
|
||||||
|
|
||||||
namespace pdf2htmlEX {
|
namespace pdf2htmlEX {
|
||||||
|
|
@ -25,8 +25,10 @@
|
|||||||
#include <Annot.h>
|
#include <Annot.h>
|
||||||
|
|
||||||
#include "Param.h"
|
#include "Param.h"
|
||||||
#include "util.h"
|
#include "util/Preprocessor.h"
|
||||||
#include "Preprocessor.h"
|
#include "util/const.h"
|
||||||
|
#include "util/StringFormatter.h"
|
||||||
|
#include "util/TmpFiles.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Naming Convention
|
* Naming Convention
|
||||||
@ -38,7 +40,6 @@
|
|||||||
* b - page Box
|
* b - page Box
|
||||||
* d - page Decoration
|
* d - page Decoration
|
||||||
* l - Line
|
* l - Line
|
||||||
* i - Image
|
|
||||||
* j - Js data
|
* j - Js data
|
||||||
* p - Page
|
* p - Page
|
||||||
*
|
*
|
||||||
@ -60,6 +61,51 @@
|
|||||||
|
|
||||||
namespace pdf2htmlEX {
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
// we may need more info of a font in the future
|
||||||
|
class FontInfo
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
long long id;
|
||||||
|
bool use_tounicode;
|
||||||
|
int em_size;
|
||||||
|
double ascent, descent;
|
||||||
|
};
|
||||||
|
|
||||||
|
class GfxRGB_hash
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
size_t operator () (const GfxRGB & rgb) const
|
||||||
|
{
|
||||||
|
return (colToByte(rgb.r) << 16) | (colToByte(rgb.g) << 8) | (colToByte(rgb.b));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class GfxRGB_equal
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
bool operator ()(const GfxRGB & rgb1, const GfxRGB & rgb2) const
|
||||||
|
{
|
||||||
|
return ((rgb1.r == rgb2.r) && (rgb1.g == rgb2.g) && (rgb1.b == rgb1.b));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class Matrix_less
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
bool operator () (const Matrix & m1, const Matrix & m2) const
|
||||||
|
{
|
||||||
|
// Note that we only care about the first 4 elements
|
||||||
|
for(int i = 0; i < 4; ++i)
|
||||||
|
{
|
||||||
|
if(m1.m[i] < m2.m[i] - EPS)
|
||||||
|
return true;
|
||||||
|
if(m1.m[i] > m2.m[i] + EPS)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class HTMLRenderer : public OutputDev
|
class HTMLRenderer : public OutputDev
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -154,10 +200,8 @@ class HTMLRenderer : public OutputDev
|
|||||||
void post_process();
|
void post_process();
|
||||||
|
|
||||||
// set flags
|
// set flags
|
||||||
void fix_stream (std::ostream & out);
|
void set_stream_flags (std::ostream & out);
|
||||||
|
|
||||||
void add_tmp_file (const std::string & fn);
|
|
||||||
void clean_tmp_files ();
|
|
||||||
std::string dump_embedded_font (GfxFont * font, long long fn_id);
|
std::string dump_embedded_font (GfxFont * font, long long fn_id);
|
||||||
void embed_font(const std::string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only = false);
|
void embed_font(const std::string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only = false);
|
||||||
|
|
||||||
@ -335,82 +379,20 @@ class HTMLRenderer : public OutputDev
|
|||||||
double draw_tx, draw_ty;
|
double draw_tx, draw_ty;
|
||||||
|
|
||||||
// some metrics have to be determined after all elements in the lines have been seen
|
// some metrics have to be determined after all elements in the lines have been seen
|
||||||
class LineBuffer {
|
class TextLineBuffer;
|
||||||
public:
|
friend class TextLineBuffer;
|
||||||
LineBuffer (HTMLRenderer * renderer) : renderer(renderer) { }
|
TextLineBuffer * text_line_buf;
|
||||||
|
|
||||||
class State {
|
|
||||||
public:
|
|
||||||
void begin(std::ostream & out, const State * prev_state);
|
|
||||||
void end(std::ostream & out) const;
|
|
||||||
void hash(void);
|
|
||||||
int diff(const State & s) const;
|
|
||||||
|
|
||||||
enum {
|
|
||||||
FONT_ID,
|
|
||||||
FONT_SIZE_ID,
|
|
||||||
COLOR_ID,
|
|
||||||
LETTER_SPACE_ID,
|
|
||||||
WORD_SPACE_ID,
|
|
||||||
RISE_ID,
|
|
||||||
|
|
||||||
ID_COUNT
|
|
||||||
};
|
|
||||||
|
|
||||||
long long ids[ID_COUNT];
|
|
||||||
|
|
||||||
double ascent;
|
|
||||||
double descent;
|
|
||||||
double draw_font_size;
|
|
||||||
|
|
||||||
size_t start_idx; // index of the first Text using this state
|
|
||||||
// for optimzation
|
|
||||||
long long hash_value;
|
|
||||||
bool need_close;
|
|
||||||
|
|
||||||
static const char * format_str; // class names for each id
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
class Offset {
|
|
||||||
public:
|
|
||||||
size_t start_idx; // should put this idx before text[start_idx];
|
|
||||||
double width;
|
|
||||||
};
|
|
||||||
|
|
||||||
void reset(GfxState * state);
|
|
||||||
void append_unicodes(const Unicode * u, int l);
|
|
||||||
void append_offset(double width);
|
|
||||||
void append_state(void);
|
|
||||||
void flush(void);
|
|
||||||
|
|
||||||
private:
|
|
||||||
// retrieve state from renderer
|
|
||||||
void set_state(State & state);
|
|
||||||
|
|
||||||
HTMLRenderer * renderer;
|
|
||||||
|
|
||||||
double x, y;
|
|
||||||
long long tm_id;
|
|
||||||
|
|
||||||
std::vector<State> states;
|
|
||||||
std::vector<Offset> offsets;
|
|
||||||
std::vector<Unicode> text;
|
|
||||||
|
|
||||||
// for flush
|
|
||||||
std::vector<State*> stack;
|
|
||||||
|
|
||||||
} line_buf;
|
|
||||||
friend class LineBuffer;
|
|
||||||
|
|
||||||
// for font reencoding
|
// for font reencoding
|
||||||
int32_t * cur_mapping;
|
int32_t * cur_mapping;
|
||||||
char ** cur_mapping2;
|
char ** cur_mapping2;
|
||||||
int * width_list;
|
int * width_list;
|
||||||
|
|
||||||
Preprocessor preprocessor;
|
Preprocessor preprocessor;
|
||||||
|
TmpFiles tmp_files;
|
||||||
|
|
||||||
// for string formatting
|
// for string formatting
|
||||||
string_formatter str_fmt;
|
StringFormatter str_fmt;
|
||||||
|
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
// styles & resources
|
// styles & resources
|
||||||
@ -426,12 +408,9 @@ class HTMLRenderer : public OutputDev
|
|||||||
std::map<double, long long> rise_map;
|
std::map<double, long long> rise_map;
|
||||||
std::map<double, long long> height_map;
|
std::map<double, long long> height_map;
|
||||||
|
|
||||||
int image_count;
|
|
||||||
|
|
||||||
const Param * param;
|
const Param * param;
|
||||||
std::ofstream html_fout, css_fout;
|
std::ofstream html_fout, css_fout;
|
||||||
std::string html_path, css_path;
|
std::string html_path, css_path;
|
||||||
std::set<std::string> tmp_files;
|
|
||||||
|
|
||||||
static const std::string MANIFEST_FILENAME;
|
static const std::string MANIFEST_FILENAME;
|
||||||
};
|
};
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* LineBuffer.cc
|
* TextLineBuffer.cc
|
||||||
*
|
*
|
||||||
* Generate and optimized HTML for one line
|
* Generate and optimized HTML for one line
|
||||||
*
|
*
|
||||||
@ -10,7 +10,10 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "HTMLRenderer.h"
|
#include "HTMLRenderer.h"
|
||||||
#include "namespace.h"
|
#include "TextLineBuffer.h"
|
||||||
|
#include "util/namespace.h"
|
||||||
|
#include "util/unicode.h"
|
||||||
|
#include "util/math.h"
|
||||||
|
|
||||||
namespace pdf2htmlEX {
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
@ -18,19 +21,21 @@ using std::min;
|
|||||||
using std::max;
|
using std::max;
|
||||||
using std::vector;
|
using std::vector;
|
||||||
using std::ostream;
|
using std::ostream;
|
||||||
|
using std::cerr;
|
||||||
|
using std::endl;
|
||||||
|
|
||||||
void HTMLRenderer::LineBuffer::reset(GfxState * state)
|
void HTMLRenderer::TextLineBuffer::reset(GfxState * state)
|
||||||
{
|
{
|
||||||
state->transform(state->getCurX(), state->getCurY(), &x, &y);
|
state->transform(state->getCurX(), state->getCurY(), &x, &y);
|
||||||
tm_id = renderer->cur_ttm_id;
|
tm_id = renderer->cur_ttm_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::LineBuffer::append_unicodes(const Unicode * u, int l)
|
void HTMLRenderer::TextLineBuffer::append_unicodes(const Unicode * u, int l)
|
||||||
{
|
{
|
||||||
text.insert(text.end(), u, u+l);
|
text.insert(text.end(), u, u+l);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::LineBuffer::append_offset(double width)
|
void HTMLRenderer::TextLineBuffer::append_offset(double width)
|
||||||
{
|
{
|
||||||
if((!offsets.empty()) && (offsets.back().start_idx == text.size()))
|
if((!offsets.empty()) && (offsets.back().start_idx == text.size()))
|
||||||
offsets.back().width += width;
|
offsets.back().width += width;
|
||||||
@ -38,7 +43,7 @@ void HTMLRenderer::LineBuffer::append_offset(double width)
|
|||||||
offsets.push_back(Offset({text.size(), width}));
|
offsets.push_back(Offset({text.size(), width}));
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::LineBuffer::append_state(void)
|
void HTMLRenderer::TextLineBuffer::append_state(void)
|
||||||
{
|
{
|
||||||
if(states.empty() || (states.back().start_idx != text.size()))
|
if(states.empty() || (states.back().start_idx != text.size()))
|
||||||
{
|
{
|
||||||
@ -49,10 +54,10 @@ void HTMLRenderer::LineBuffer::append_state(void)
|
|||||||
set_state(states.back());
|
set_state(states.back());
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::LineBuffer::flush(void)
|
void HTMLRenderer::TextLineBuffer::flush(void)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Each Line is an independent absolute positioined block
|
* Each Line is an independent absolute positioned block
|
||||||
* so even we have a few states or offsets, we may omit them
|
* so even we have a few states or offsets, we may omit them
|
||||||
*/
|
*/
|
||||||
if(text.empty()) return;
|
if(text.empty()) return;
|
||||||
@ -80,8 +85,8 @@ void HTMLRenderer::LineBuffer::flush(void)
|
|||||||
|
|
||||||
ostream & out = renderer->html_fout;
|
ostream & out = renderer->html_fout;
|
||||||
out << "<div style=\"left:"
|
out << "<div style=\"left:"
|
||||||
<< _round(x) << "px;bottom:"
|
<< round(x) << "px;bottom:"
|
||||||
<< _round(y) << "px;"
|
<< round(y) << "px;"
|
||||||
<< "\""
|
<< "\""
|
||||||
<< " class=\"l t" << tm_id
|
<< " class=\"l t" << tm_id
|
||||||
<< " h" << renderer->install_height(max_ascent)
|
<< " h" << renderer->install_height(max_ascent)
|
||||||
@ -177,7 +182,7 @@ void HTMLRenderer::LineBuffer::flush(void)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::LineBuffer::set_state (State & state)
|
void HTMLRenderer::TextLineBuffer::set_state (State & state)
|
||||||
{
|
{
|
||||||
state.ids[State::FONT_ID] = renderer->cur_font_info->id;
|
state.ids[State::FONT_ID] = renderer->cur_font_info->id;
|
||||||
state.ids[State::FONT_SIZE_ID] = renderer->cur_fs_id;
|
state.ids[State::FONT_SIZE_ID] = renderer->cur_fs_id;
|
||||||
@ -192,7 +197,7 @@ void HTMLRenderer::LineBuffer::set_state (State & state)
|
|||||||
state.draw_font_size = renderer->draw_font_size;
|
state.draw_font_size = renderer->draw_font_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::LineBuffer::State::begin (ostream & out, const State * prev_state)
|
void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * prev_state)
|
||||||
{
|
{
|
||||||
bool first = true;
|
bool first = true;
|
||||||
for(int i = 0; i < ID_COUNT; ++i)
|
for(int i = 0; i < ID_COUNT; ++i)
|
||||||
@ -225,13 +230,13 @@ void HTMLRenderer::LineBuffer::State::begin (ostream & out, const State * prev_s
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::LineBuffer::State::end(ostream & out) const
|
void HTMLRenderer::TextLineBuffer::State::end(ostream & out) const
|
||||||
{
|
{
|
||||||
if(need_close)
|
if(need_close)
|
||||||
out << "</span>";
|
out << "</span>";
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::LineBuffer::State::hash(void)
|
void HTMLRenderer::TextLineBuffer::State::hash(void)
|
||||||
{
|
{
|
||||||
hash_value = 0;
|
hash_value = 0;
|
||||||
for(int i = 0; i < ID_COUNT; ++i)
|
for(int i = 0; i < ID_COUNT; ++i)
|
||||||
@ -240,7 +245,7 @@ void HTMLRenderer::LineBuffer::State::hash(void)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int HTMLRenderer::LineBuffer::State::diff(const State & s) const
|
int HTMLRenderer::TextLineBuffer::State::diff(const State & s) const
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* A quick check based on hash_value
|
* A quick check based on hash_value
|
||||||
@ -256,5 +261,5 @@ int HTMLRenderer::LineBuffer::State::diff(const State & s) const
|
|||||||
return d;
|
return d;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char * HTMLRenderer::LineBuffer::State::format_str = "fsclwr";
|
const char * HTMLRenderer::TextLineBuffer::State::format_str = "fsclwr";
|
||||||
} //namespace pdf2htmlEX
|
} //namespace pdf2htmlEX
|
78
src/HTMLRenderer/TextLineBuffer.h
Normal file
78
src/HTMLRenderer/TextLineBuffer.h
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
#ifndef TEXTLINEBUFFER_H__
|
||||||
|
#define TEXTLINEBUFFER_H__
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
class HTMLRenderer;
|
||||||
|
class HTMLRenderer::TextLineBuffer
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
TextLineBuffer (HTMLRenderer * renderer) : renderer(renderer) { }
|
||||||
|
|
||||||
|
class State {
|
||||||
|
public:
|
||||||
|
void begin(std::ostream & out, const State * prev_state);
|
||||||
|
void end(std::ostream & out) const;
|
||||||
|
void hash(void);
|
||||||
|
int diff(const State & s) const;
|
||||||
|
|
||||||
|
enum {
|
||||||
|
FONT_ID,
|
||||||
|
FONT_SIZE_ID,
|
||||||
|
COLOR_ID,
|
||||||
|
LETTER_SPACE_ID,
|
||||||
|
WORD_SPACE_ID,
|
||||||
|
RISE_ID,
|
||||||
|
|
||||||
|
ID_COUNT
|
||||||
|
};
|
||||||
|
|
||||||
|
long long ids[ID_COUNT];
|
||||||
|
|
||||||
|
double ascent;
|
||||||
|
double descent;
|
||||||
|
double draw_font_size;
|
||||||
|
|
||||||
|
size_t start_idx; // index of the first Text using this state
|
||||||
|
// for optimzation
|
||||||
|
long long hash_value;
|
||||||
|
bool need_close;
|
||||||
|
|
||||||
|
static const char * format_str; // class names for each id
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
class Offset {
|
||||||
|
public:
|
||||||
|
size_t start_idx; // should put this idx before text[start_idx];
|
||||||
|
double width;
|
||||||
|
};
|
||||||
|
|
||||||
|
void reset(GfxState * state);
|
||||||
|
void append_unicodes(const Unicode * u, int l);
|
||||||
|
void append_offset(double width);
|
||||||
|
void append_state(void);
|
||||||
|
void flush(void);
|
||||||
|
|
||||||
|
private:
|
||||||
|
// retrieve state from renderer
|
||||||
|
void set_state(State & state);
|
||||||
|
|
||||||
|
HTMLRenderer * renderer;
|
||||||
|
|
||||||
|
double x, y;
|
||||||
|
long long tm_id;
|
||||||
|
|
||||||
|
std::vector<State> states;
|
||||||
|
std::vector<Offset> offsets;
|
||||||
|
std::vector<Unicode> text;
|
||||||
|
|
||||||
|
// for flush
|
||||||
|
std::vector<State*> stack;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace pdf2htmlEX
|
||||||
|
#endif //TEXTLINEBUFFER_H__
|
@ -14,8 +14,9 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#include "HTMLRenderer.h"
|
#include "HTMLRenderer.h"
|
||||||
#include "util.h"
|
#include "util/misc.h"
|
||||||
#include "namespace.h"
|
#include "util/math.h"
|
||||||
|
#include "util/namespace.h"
|
||||||
|
|
||||||
namespace pdf2htmlEX {
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
@ -33,36 +34,36 @@ static bool is_horizontal_line(GfxSubpath * path)
|
|||||||
{
|
{
|
||||||
return ((path->getNumPoints() == 2)
|
return ((path->getNumPoints() == 2)
|
||||||
&& (!path->getCurve(1))
|
&& (!path->getCurve(1))
|
||||||
&& (_equal(path->getY(0), path->getY(1))));
|
&& (equal(path->getY(0), path->getY(1))));
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_vertical_line(GfxSubpath * path)
|
static bool is_vertical_line(GfxSubpath * path)
|
||||||
{
|
{
|
||||||
return ((path->getNumPoints() == 2)
|
return ((path->getNumPoints() == 2)
|
||||||
&& (!path->getCurve(1))
|
&& (!path->getCurve(1))
|
||||||
&& (_equal(path->getX(0), path->getX(1))));
|
&& (equal(path->getX(0), path->getX(1))));
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_rectangle(GfxSubpath * path)
|
static bool is_rectangle(GfxSubpath * path)
|
||||||
{
|
{
|
||||||
if (!(((path->getNumPoints() != 4) && (path->isClosed()))
|
if (!(((path->getNumPoints() != 4) && (path->isClosed()))
|
||||||
|| ((path->getNumPoints() == 5)
|
|| ((path->getNumPoints() == 5)
|
||||||
&& _equal(path->getX(0), path->getX(4))
|
&& equal(path->getX(0), path->getX(4))
|
||||||
&& _equal(path->getY(0), path->getY(4)))))
|
&& equal(path->getY(0), path->getY(4)))))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
for(int i = 1; i < path->getNumPoints(); ++i)
|
for(int i = 1; i < path->getNumPoints(); ++i)
|
||||||
if(path->getCurve(i))
|
if(path->getCurve(i))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
return (_equal(path->getY(0), path->getY(1))
|
return (equal(path->getY(0), path->getY(1))
|
||||||
&& _equal(path->getX(1), path->getX(2))
|
&& equal(path->getX(1), path->getX(2))
|
||||||
&& _equal(path->getY(2), path->getY(3))
|
&& equal(path->getY(2), path->getY(3))
|
||||||
&& _equal(path->getX(3), path->getX(0)))
|
&& equal(path->getX(3), path->getX(0)))
|
||||||
|| (_equal(path->getX(0), path->getX(1))
|
|| (equal(path->getX(0), path->getX(1))
|
||||||
&& _equal(path->getY(1), path->getY(2))
|
&& equal(path->getY(1), path->getY(2))
|
||||||
&& _equal(path->getX(2), path->getX(3))
|
&& equal(path->getX(2), path->getX(3))
|
||||||
&& _equal(path->getY(3), path->getY(0)));
|
&& equal(path->getY(3), path->getY(0)));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void get_shading_bbox(GfxState * state, GfxShading * shading,
|
static void get_shading_bbox(GfxState * state, GfxShading * shading,
|
||||||
@ -105,7 +106,7 @@ static void get_shading_bbox(GfxState * state, GfxShading * shading,
|
|||||||
*/
|
*/
|
||||||
static double get_angle(double dx, double dy)
|
static double get_angle(double dx, double dy)
|
||||||
{
|
{
|
||||||
double r = _hypot(dx, dy);
|
double r = hypot(dx, dy);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* acos always returns [0, pi]
|
* acos always returns [0, pi]
|
||||||
@ -208,10 +209,10 @@ void LinearGradient::dumpto (ostream & out)
|
|||||||
auto prefixes = {"", "-ms-", "-moz-", "-webkit-", "-o-"};
|
auto prefixes = {"", "-ms-", "-moz-", "-webkit-", "-o-"};
|
||||||
for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter)
|
for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter)
|
||||||
{
|
{
|
||||||
out << "background-image:" << (*iter) << "linear-gradient(" << _round(angle) << "rad";
|
out << "background-image:" << (*iter) << "linear-gradient(" << round(angle) << "rad";
|
||||||
for(auto iter2 = stops.begin(); iter2 != stops.end(); ++iter2)
|
for(auto iter2 = stops.begin(); iter2 != stops.end(); ++iter2)
|
||||||
{
|
{
|
||||||
out << "," << (iter2->rgb) << " " << _round((iter2->pos) * 100) << "%";
|
out << "," << (iter2->rgb) << " " << round((iter2->pos) * 100) << "%";
|
||||||
}
|
}
|
||||||
out << ");";
|
out << ");";
|
||||||
}
|
}
|
||||||
@ -318,7 +319,7 @@ bool HTMLRenderer::css_do_path(GfxState *state, bool fill, bool test_only)
|
|||||||
GfxRGB * ps = fill ? nullptr : (&stroke_color);
|
GfxRGB * ps = fill ? nullptr : (&stroke_color);
|
||||||
GfxRGB * pf = fill ? (&fill_color) : nullptr;
|
GfxRGB * pf = fill ? (&fill_color) : nullptr;
|
||||||
|
|
||||||
if(_equal(h, 0) || _equal(w, 0))
|
if(equal(h, 0) || equal(w, 0))
|
||||||
{
|
{
|
||||||
// orthogonal line
|
// orthogonal line
|
||||||
|
|
||||||
@ -351,7 +352,7 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, co
|
|||||||
double new_tm[6];
|
double new_tm[6];
|
||||||
memcpy(new_tm, tm, sizeof(new_tm));
|
memcpy(new_tm, tm, sizeof(new_tm));
|
||||||
|
|
||||||
_tm_transform(new_tm, x, y);
|
tm_transform(new_tm, x, y);
|
||||||
|
|
||||||
double scale = 1.0;
|
double scale = 1.0;
|
||||||
{
|
{
|
||||||
@ -359,8 +360,8 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, co
|
|||||||
|
|
||||||
double i1 = (new_tm[0] + new_tm[2]) / sqrt2;
|
double i1 = (new_tm[0] + new_tm[2]) / sqrt2;
|
||||||
double i2 = (new_tm[1] + new_tm[3]) / sqrt2;
|
double i2 = (new_tm[1] + new_tm[3]) / sqrt2;
|
||||||
scale = _hypot(i1, i2);
|
scale = hypot(i1, i2);
|
||||||
if(_is_positive(scale))
|
if(is_positive(scale))
|
||||||
{
|
{
|
||||||
for(int i = 0; i < 4; ++i)
|
for(int i = 0; i < 4; ++i)
|
||||||
new_tm[i] /= scale;
|
new_tm[i] /= scale;
|
||||||
@ -383,8 +384,8 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, co
|
|||||||
if(i > 0) html_fout << ' ';
|
if(i > 0) html_fout << ' ';
|
||||||
|
|
||||||
double lw = line_width_array[i] * scale;
|
double lw = line_width_array[i] * scale;
|
||||||
html_fout << _round(lw);
|
html_fout << round(lw);
|
||||||
if(_is_positive(lw)) html_fout << "px";
|
if(is_positive(lw)) html_fout << "px";
|
||||||
}
|
}
|
||||||
html_fout << ";";
|
html_fout << ";";
|
||||||
}
|
}
|
||||||
@ -407,10 +408,10 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, co
|
|||||||
style_function(style_function_data, html_fout);
|
style_function(style_function_data, html_fout);
|
||||||
}
|
}
|
||||||
|
|
||||||
html_fout << "bottom:" << _round(y) << "px;"
|
html_fout << "bottom:" << round(y) << "px;"
|
||||||
<< "left:" << _round(x) << "px;"
|
<< "left:" << round(x) << "px;"
|
||||||
<< "width:" << _round(w * scale) << "px;"
|
<< "width:" << round(w * scale) << "px;"
|
||||||
<< "height:" << _round(h * scale) << "px;";
|
<< "height:" << round(h * scale) << "px;";
|
||||||
|
|
||||||
html_fout << "\"></div>";
|
html_fout << "\"></div>";
|
||||||
}
|
}
|
||||||
|
@ -11,7 +11,10 @@
|
|||||||
#include <cctype>
|
#include <cctype>
|
||||||
|
|
||||||
#include "HTMLRenderer.h"
|
#include "HTMLRenderer.h"
|
||||||
#include "namespace.h"
|
#include "util/namespace.h"
|
||||||
|
#include "util/base64stream.h"
|
||||||
|
#include "util/math.h"
|
||||||
|
#include "util/misc.h"
|
||||||
|
|
||||||
namespace pdf2htmlEX {
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
@ -38,7 +41,7 @@ void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suff
|
|||||||
css_fout << ")format(\"" << fontfileformat
|
css_fout << ")format(\"" << fontfileformat
|
||||||
<< "\");}.f" << info.id
|
<< "\");}.f" << info.id
|
||||||
<< "{font-family:f" << info.id
|
<< "{font-family:f" << info.id
|
||||||
<< ";line-height:" << _round(info.ascent - info.descent)
|
<< ";line-height:" << round(info.ascent - info.descent)
|
||||||
<< ";font-style:normal;font-weight:normal;}";
|
<< ";font-style:normal;font-weight:normal;}";
|
||||||
|
|
||||||
css_fout << endl;
|
css_fout << endl;
|
||||||
@ -81,14 +84,14 @@ void HTMLRenderer::export_local_font(const FontInfo & info, GfxFont * font, cons
|
|||||||
else
|
else
|
||||||
css_fout << "font-style:normal;";
|
css_fout << "font-style:normal;";
|
||||||
|
|
||||||
css_fout << "line-height:" << _round(info.ascent - info.descent) << ";";
|
css_fout << "line-height:" << round(info.ascent - info.descent) << ";";
|
||||||
|
|
||||||
css_fout << "}" << endl;
|
css_fout << "}" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::export_font_size (long long fs_id, double font_size)
|
void HTMLRenderer::export_font_size (long long fs_id, double font_size)
|
||||||
{
|
{
|
||||||
css_fout << ".s" << fs_id << "{font-size:" << _round(font_size) << "px;}" << endl;
|
css_fout << ".s" << fs_id << "{font-size:" << round(font_size) << "px;}" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm)
|
void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm)
|
||||||
@ -99,7 +102,7 @@ void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm)
|
|||||||
// we have already shifted the origin
|
// we have already shifted the origin
|
||||||
|
|
||||||
// TODO: recognize common matices
|
// TODO: recognize common matices
|
||||||
if(_tm_equal(tm, id_matrix, 4))
|
if(tm_equal(tm, ID_MATRIX, 4))
|
||||||
{
|
{
|
||||||
auto prefixes = {"", "-ms-", "-moz-", "-webkit-", "-o-"};
|
auto prefixes = {"", "-ms-", "-moz-", "-webkit-", "-o-"};
|
||||||
for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter)
|
for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter)
|
||||||
@ -112,10 +115,10 @@ void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm)
|
|||||||
{
|
{
|
||||||
// PDF use a different coordinate system from Web
|
// PDF use a different coordinate system from Web
|
||||||
css_fout << *iter << "transform:matrix("
|
css_fout << *iter << "transform:matrix("
|
||||||
<< _round(tm[0]) << ','
|
<< round(tm[0]) << ','
|
||||||
<< _round(-tm[1]) << ','
|
<< round(-tm[1]) << ','
|
||||||
<< _round(-tm[2]) << ','
|
<< round(-tm[2]) << ','
|
||||||
<< _round(tm[3]) << ',';
|
<< round(tm[3]) << ',';
|
||||||
|
|
||||||
css_fout << "0,0);";
|
css_fout << "0,0);";
|
||||||
}
|
}
|
||||||
@ -125,12 +128,12 @@ void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm)
|
|||||||
|
|
||||||
void HTMLRenderer::export_letter_space (long long ls_id, double letter_space)
|
void HTMLRenderer::export_letter_space (long long ls_id, double letter_space)
|
||||||
{
|
{
|
||||||
css_fout << ".l" << ls_id << "{letter-spacing:" << _round(letter_space) << "px;}" << endl;
|
css_fout << ".l" << ls_id << "{letter-spacing:" << round(letter_space) << "px;}" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::export_word_space (long long ws_id, double word_space)
|
void HTMLRenderer::export_word_space (long long ws_id, double word_space)
|
||||||
{
|
{
|
||||||
css_fout << ".w" << ws_id << "{word-spacing:" << _round(word_space) << "px;}" << endl;
|
css_fout << ".w" << ws_id << "{word-spacing:" << round(word_space) << "px;}" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::export_color (long long color_id, const GfxRGB * rgb)
|
void HTMLRenderer::export_color (long long color_id, const GfxRGB * rgb)
|
||||||
@ -141,19 +144,19 @@ void HTMLRenderer::export_color (long long color_id, const GfxRGB * rgb)
|
|||||||
void HTMLRenderer::export_whitespace (long long ws_id, double ws_width)
|
void HTMLRenderer::export_whitespace (long long ws_id, double ws_width)
|
||||||
{
|
{
|
||||||
if(ws_width > 0)
|
if(ws_width > 0)
|
||||||
css_fout << "._" << ws_id << "{display:inline-block;width:" << _round(ws_width) << "px;}" << endl;
|
css_fout << "._" << ws_id << "{display:inline-block;width:" << round(ws_width) << "px;}" << endl;
|
||||||
else
|
else
|
||||||
css_fout << "._" << ws_id << "{display:inline;margin-left:" << _round(ws_width) << "px;}" << endl;
|
css_fout << "._" << ws_id << "{display:inline;margin-left:" << round(ws_width) << "px;}" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::export_rise (long long rise_id, double rise)
|
void HTMLRenderer::export_rise (long long rise_id, double rise)
|
||||||
{
|
{
|
||||||
css_fout << ".r" << rise_id << "{top:" << _round(-rise) << "px;}" << endl;
|
css_fout << ".r" << rise_id << "{top:" << round(-rise) << "px;}" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::export_height (long long height_id, double height)
|
void HTMLRenderer::export_height (long long height_id, double height)
|
||||||
{
|
{
|
||||||
css_fout << ".h" << height_id << "{height:" << _round(height) << "px;}" << endl;
|
css_fout << ".h" << height_id << "{height:" << round(height) << "px;}" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -14,10 +14,14 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "HTMLRenderer.h"
|
#include "HTMLRenderer.h"
|
||||||
#include "BackgroundRenderer.h"
|
#include "TextLineBuffer.h"
|
||||||
#include "namespace.h"
|
|
||||||
#include "ffw.h"
|
|
||||||
#include "pdf2htmlEX-config.h"
|
#include "pdf2htmlEX-config.h"
|
||||||
|
#include "BackgroundRenderer/BackgroundRenderer.h"
|
||||||
|
#include "util/namespace.h"
|
||||||
|
#include "util/ffw.h"
|
||||||
|
#include "util/base64stream.h"
|
||||||
|
#include "util/math.h"
|
||||||
|
#include "util/path.h"
|
||||||
|
|
||||||
namespace pdf2htmlEX {
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
@ -28,6 +32,8 @@ using std::max;
|
|||||||
using std::min_element;
|
using std::min_element;
|
||||||
using std::vector;
|
using std::vector;
|
||||||
using std::abs;
|
using std::abs;
|
||||||
|
using std::cerr;
|
||||||
|
using std::endl;
|
||||||
|
|
||||||
static void dummy(void *, enum ErrorCategory, int pos, char *)
|
static void dummy(void *, enum ErrorCategory, int pos, char *)
|
||||||
{
|
{
|
||||||
@ -36,9 +42,9 @@ static void dummy(void *, enum ErrorCategory, int pos, char *)
|
|||||||
HTMLRenderer::HTMLRenderer(const Param * param)
|
HTMLRenderer::HTMLRenderer(const Param * param)
|
||||||
:OutputDev()
|
:OutputDev()
|
||||||
,line_opened(false)
|
,line_opened(false)
|
||||||
,line_buf(this)
|
,text_line_buf(new TextLineBuffer(this))
|
||||||
,preprocessor(param)
|
,preprocessor(param)
|
||||||
,image_count(0)
|
,tmp_files(*param)
|
||||||
,param(param)
|
,param(param)
|
||||||
{
|
{
|
||||||
if(!(param->debug))
|
if(!(param->debug))
|
||||||
@ -55,8 +61,8 @@ HTMLRenderer::HTMLRenderer(const Param * param)
|
|||||||
|
|
||||||
HTMLRenderer::~HTMLRenderer()
|
HTMLRenderer::~HTMLRenderer()
|
||||||
{
|
{
|
||||||
|
delete text_line_buf;
|
||||||
ffw_finalize();
|
ffw_finalize();
|
||||||
clean_tmp_files();
|
|
||||||
delete [] cur_mapping;
|
delete [] cur_mapping;
|
||||||
delete [] cur_mapping2;
|
delete [] cur_mapping2;
|
||||||
delete [] width_list;
|
delete [] width_list;
|
||||||
@ -76,7 +82,7 @@ void HTMLRenderer::process(PDFDoc *doc)
|
|||||||
bg_renderer->startDoc(doc);
|
bg_renderer->startDoc(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
int page_count = (param->last_page - param->first_page);
|
int page_count = (param->last_page - param->first_page + 1);
|
||||||
for(int i = param->first_page; i <= param->last_page ; ++i)
|
for(int i = param->first_page; i <= param->last_page ; ++i)
|
||||||
{
|
{
|
||||||
cerr << "Working: " << (i-param->first_page) << "/" << page_count << '\r' << flush;
|
cerr << "Working: " << (i-param->first_page) << "/" << page_count << '\r' << flush;
|
||||||
@ -87,21 +93,23 @@ void HTMLRenderer::process(PDFDoc *doc)
|
|||||||
html_fout.open((char*)page_fn, ofstream::binary);
|
html_fout.open((char*)page_fn, ofstream::binary);
|
||||||
if(!html_fout)
|
if(!html_fout)
|
||||||
throw string("Cannot open ") + (char*)page_fn + " for writing";
|
throw string("Cannot open ") + (char*)page_fn + " for writing";
|
||||||
fix_stream(html_fout);
|
set_stream_flags(html_fout);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(param->process_nontext)
|
if(param->process_nontext)
|
||||||
{
|
{
|
||||||
auto fn = str_fmt("%s/p%x.png", (param->single_html ? param->tmp_dir : param->dest_dir).c_str(), i);
|
auto fn = str_fmt("%s/p%x.png", (param->single_html ? param->tmp_dir : param->dest_dir).c_str(), i);
|
||||||
if(param->single_html)
|
if(param->single_html)
|
||||||
add_tmp_file((char*)fn);
|
tmp_files.add((char*)fn);
|
||||||
|
|
||||||
bg_renderer->render_page(doc, i, (char*)fn);
|
bg_renderer->render_page(doc, i, (char*)fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
doc->displayPage(this, i,
|
doc->displayPage(this, i,
|
||||||
text_zoom_factor() * DEFAULT_DPI, text_zoom_factor() * DEFAULT_DPI,
|
text_zoom_factor() * DEFAULT_DPI, text_zoom_factor() * DEFAULT_DPI,
|
||||||
0, true, false, false,
|
0,
|
||||||
|
(param->use_cropbox == 0),
|
||||||
|
false, false,
|
||||||
nullptr, nullptr, nullptr, nullptr);
|
nullptr, nullptr, nullptr, nullptr);
|
||||||
|
|
||||||
if(param->split_pages)
|
if(param->split_pages)
|
||||||
@ -170,8 +178,8 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state)
|
|||||||
cur_font_size = draw_font_size = 0;
|
cur_font_size = draw_font_size = 0;
|
||||||
cur_fs_id = install_font_size(cur_font_size);
|
cur_fs_id = install_font_size(cur_font_size);
|
||||||
|
|
||||||
memcpy(cur_text_tm, id_matrix, sizeof(cur_text_tm));
|
memcpy(cur_text_tm, ID_MATRIX, sizeof(cur_text_tm));
|
||||||
memcpy(draw_text_tm, id_matrix, sizeof(draw_text_tm));
|
memcpy(draw_text_tm, ID_MATRIX, sizeof(draw_text_tm));
|
||||||
cur_ttm_id = install_transform_matrix(draw_text_tm);
|
cur_ttm_id = install_transform_matrix(draw_text_tm);
|
||||||
|
|
||||||
cur_letter_space = cur_word_space = 0;
|
cur_letter_space = cur_word_space = 0;
|
||||||
@ -210,7 +218,7 @@ void HTMLRenderer::endPage() {
|
|||||||
for(int i = 0; i < 6; ++i)
|
for(int i = 0; i < 6; ++i)
|
||||||
{
|
{
|
||||||
if(i > 0) html_fout << ",";
|
if(i > 0) html_fout << ",";
|
||||||
html_fout << _round(default_ctm[i]);
|
html_fout << round(default_ctm[i]);
|
||||||
}
|
}
|
||||||
html_fout << "]";
|
html_fout << "]";
|
||||||
|
|
||||||
@ -232,17 +240,17 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
|
|||||||
|
|
||||||
vector<double> zoom_factors;
|
vector<double> zoom_factors;
|
||||||
|
|
||||||
if(_is_positive(param->zoom))
|
if(is_positive(param->zoom))
|
||||||
{
|
{
|
||||||
zoom_factors.push_back(param->zoom);
|
zoom_factors.push_back(param->zoom);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(_is_positive(param->fit_width))
|
if(is_positive(param->fit_width))
|
||||||
{
|
{
|
||||||
zoom_factors.push_back((param->fit_width) / preprocessor.get_max_width());
|
zoom_factors.push_back((param->fit_width) / preprocessor.get_max_width());
|
||||||
}
|
}
|
||||||
|
|
||||||
if(_is_positive(param->fit_height))
|
if(is_positive(param->fit_height))
|
||||||
{
|
{
|
||||||
zoom_factors.push_back((param->fit_height) / preprocessor.get_max_height());
|
zoom_factors.push_back((param->fit_height) / preprocessor.get_max_height());
|
||||||
}
|
}
|
||||||
@ -280,13 +288,13 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
|
|||||||
: str_fmt("%s/%s", param->dest_dir.c_str(), param->css_filename.c_str());
|
: str_fmt("%s/%s", param->dest_dir.c_str(), param->css_filename.c_str());
|
||||||
|
|
||||||
if(param->single_html && (!param->split_pages))
|
if(param->single_html && (!param->split_pages))
|
||||||
add_tmp_file((char*)fn);
|
tmp_files.add((char*)fn);
|
||||||
|
|
||||||
css_path = (char*)fn,
|
css_path = (char*)fn,
|
||||||
css_fout.open(css_path, ofstream::binary);
|
css_fout.open(css_path, ofstream::binary);
|
||||||
if(!css_fout)
|
if(!css_fout)
|
||||||
throw string("Cannot open ") + (char*)fn + " for writing";
|
throw string("Cannot open ") + (char*)fn + " for writing";
|
||||||
fix_stream(css_fout);
|
set_stream_flags(css_fout);
|
||||||
}
|
}
|
||||||
|
|
||||||
// if split-pages is specified, open & close the file in the process loop
|
// if split-pages is specified, open & close the file in the process loop
|
||||||
@ -301,13 +309,13 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
|
|||||||
* Otherwise just generate it
|
* Otherwise just generate it
|
||||||
*/
|
*/
|
||||||
auto fn = str_fmt("%s/__pages", param->tmp_dir.c_str());
|
auto fn = str_fmt("%s/__pages", param->tmp_dir.c_str());
|
||||||
add_tmp_file((char*)fn);
|
tmp_files.add((char*)fn);
|
||||||
|
|
||||||
html_path = (char*)fn;
|
html_path = (char*)fn;
|
||||||
html_fout.open(html_path, ofstream::binary);
|
html_fout.open(html_path, ofstream::binary);
|
||||||
if(!html_fout)
|
if(!html_fout)
|
||||||
throw string("Cannot open ") + (char*)fn + " for writing";
|
throw string("Cannot open ") + (char*)fn + " for writing";
|
||||||
fix_stream(html_fout);
|
set_stream_flags(html_fout);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -327,7 +335,7 @@ void HTMLRenderer::post_process()
|
|||||||
output.open((char*)fn, ofstream::binary);
|
output.open((char*)fn, ofstream::binary);
|
||||||
if(!output)
|
if(!output)
|
||||||
throw string("Cannot open ") + (char*)fn + " for writing";
|
throw string("Cannot open ") + (char*)fn + " for writing";
|
||||||
fix_stream(output);
|
set_stream_flags(output);
|
||||||
}
|
}
|
||||||
|
|
||||||
// apply manifest
|
// apply manifest
|
||||||
@ -385,40 +393,13 @@ void HTMLRenderer::post_process()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::fix_stream (std::ostream & out)
|
void HTMLRenderer::set_stream_flags(std::ostream & out)
|
||||||
{
|
{
|
||||||
// we output all ID's in hex
|
// we output all ID's in hex
|
||||||
// browsers are not happy with scientific notations
|
// browsers are not happy with scientific notations
|
||||||
out << hex << fixed;
|
out << hex << fixed;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::add_tmp_file(const string & fn)
|
|
||||||
{
|
|
||||||
if(!param->clean_tmp)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if(tmp_files.insert(fn).second && param->debug)
|
|
||||||
cerr << "Add new temporary file: " << fn << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
void HTMLRenderer::clean_tmp_files()
|
|
||||||
{
|
|
||||||
if(!param->clean_tmp)
|
|
||||||
return;
|
|
||||||
|
|
||||||
for(auto iter = tmp_files.begin(); iter != tmp_files.end(); ++iter)
|
|
||||||
{
|
|
||||||
const string & fn = *iter;
|
|
||||||
remove(fn.c_str());
|
|
||||||
if(param->debug)
|
|
||||||
cerr << "Remove temporary file: " << fn << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
remove(param->tmp_dir.c_str());
|
|
||||||
if(param->debug)
|
|
||||||
cerr << "Remove temporary directory: " << param->tmp_dir << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
void HTMLRenderer::embed_file(ostream & out, const string & path, const string & type, bool copy)
|
void HTMLRenderer::embed_file(ostream & out, const string & path, const string & type, bool copy)
|
||||||
{
|
{
|
||||||
string fn = get_filename(path);
|
string fn = get_filename(path);
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "HTMLRenderer.h"
|
#include "HTMLRenderer.h"
|
||||||
#include "namespace.h"
|
#include "util/namespace.h"
|
||||||
|
|
||||||
namespace pdf2htmlEX {
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
@ -15,12 +15,15 @@
|
|||||||
|
|
||||||
#include "Param.h"
|
#include "Param.h"
|
||||||
#include "HTMLRenderer.h"
|
#include "HTMLRenderer.h"
|
||||||
#include "namespace.h"
|
#include "util/namespace.h"
|
||||||
#include "util.h"
|
#include "util/math.h"
|
||||||
|
#include "util/misc.h"
|
||||||
|
|
||||||
namespace pdf2htmlEX {
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
using std::abs;
|
using std::abs;
|
||||||
|
using std::cerr;
|
||||||
|
using std::endl;
|
||||||
|
|
||||||
const FontInfo * HTMLRenderer::install_font(GfxFont * font)
|
const FontInfo * HTMLRenderer::install_font(GfxFont * font)
|
||||||
{
|
{
|
||||||
@ -203,7 +206,7 @@ void HTMLRenderer::install_external_font(GfxFont * font, FontInfo & info)
|
|||||||
long long HTMLRenderer::install_font_size(double font_size)
|
long long HTMLRenderer::install_font_size(double font_size)
|
||||||
{
|
{
|
||||||
auto iter = font_size_map.lower_bound(font_size - EPS);
|
auto iter = font_size_map.lower_bound(font_size - EPS);
|
||||||
if((iter != font_size_map.end()) && (_equal(iter->first, font_size)))
|
if((iter != font_size_map.end()) && (equal(iter->first, font_size)))
|
||||||
return iter->second;
|
return iter->second;
|
||||||
|
|
||||||
long long new_fs_id = font_size_map.size();
|
long long new_fs_id = font_size_map.size();
|
||||||
@ -218,7 +221,7 @@ long long HTMLRenderer::install_transform_matrix(const double * tm)
|
|||||||
memcpy(m.m, tm, sizeof(m.m));
|
memcpy(m.m, tm, sizeof(m.m));
|
||||||
|
|
||||||
auto iter = transform_matrix_map.lower_bound(m);
|
auto iter = transform_matrix_map.lower_bound(m);
|
||||||
if((iter != transform_matrix_map.end()) && (_tm_equal(m.m, iter->first.m, 4)))
|
if((iter != transform_matrix_map.end()) && (tm_equal(m.m, iter->first.m, 4)))
|
||||||
return iter->second;
|
return iter->second;
|
||||||
|
|
||||||
long long new_tm_id = transform_matrix_map.size();
|
long long new_tm_id = transform_matrix_map.size();
|
||||||
@ -230,7 +233,7 @@ long long HTMLRenderer::install_transform_matrix(const double * tm)
|
|||||||
long long HTMLRenderer::install_letter_space(double letter_space)
|
long long HTMLRenderer::install_letter_space(double letter_space)
|
||||||
{
|
{
|
||||||
auto iter = letter_space_map.lower_bound(letter_space - EPS);
|
auto iter = letter_space_map.lower_bound(letter_space - EPS);
|
||||||
if((iter != letter_space_map.end()) && (_equal(iter->first, letter_space)))
|
if((iter != letter_space_map.end()) && (equal(iter->first, letter_space)))
|
||||||
return iter->second;
|
return iter->second;
|
||||||
|
|
||||||
long long new_ls_id = letter_space_map.size();
|
long long new_ls_id = letter_space_map.size();
|
||||||
@ -242,7 +245,7 @@ long long HTMLRenderer::install_letter_space(double letter_space)
|
|||||||
long long HTMLRenderer::install_word_space(double word_space)
|
long long HTMLRenderer::install_word_space(double word_space)
|
||||||
{
|
{
|
||||||
auto iter = word_space_map.lower_bound(word_space - EPS);
|
auto iter = word_space_map.lower_bound(word_space - EPS);
|
||||||
if((iter != word_space_map.end()) && (_equal(iter->first, word_space)))
|
if((iter != word_space_map.end()) && (equal(iter->first, word_space)))
|
||||||
return iter->second;
|
return iter->second;
|
||||||
|
|
||||||
long long new_ws_id = word_space_map.size();
|
long long new_ws_id = word_space_map.size();
|
||||||
|
@ -11,16 +11,20 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
#include <HTMLRenderer.h>
|
|
||||||
#include <Link.h>
|
#include <Link.h>
|
||||||
|
|
||||||
#include "namespace.h"
|
#include "HTMLRenderer.h"
|
||||||
|
#include "util/namespace.h"
|
||||||
|
#include "util/math.h"
|
||||||
|
#include "util/misc.h"
|
||||||
|
|
||||||
namespace pdf2htmlEX {
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
using std::ostringstream;
|
using std::ostringstream;
|
||||||
using std::min;
|
using std::min;
|
||||||
using std::max;
|
using std::max;
|
||||||
|
using std::cerr;
|
||||||
|
using std::endl;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The detailed rectangle area of the link destination
|
* The detailed rectangle area of the link destination
|
||||||
@ -211,9 +215,9 @@ void HTMLRenderer::processLink(AnnotLink * al)
|
|||||||
border_top_bottom_width, border_left_right_width);
|
border_top_bottom_width, border_left_right_width);
|
||||||
|
|
||||||
if(abs(border_top_bottom_width - border_left_right_width) < EPS)
|
if(abs(border_top_bottom_width - border_left_right_width) < EPS)
|
||||||
html_fout << "border-width:" << _round(border_top_bottom_width) << "px;";
|
html_fout << "border-width:" << round(border_top_bottom_width) << "px;";
|
||||||
else
|
else
|
||||||
html_fout << "border-width:" << _round(border_top_bottom_width) << "px " << _round(border_left_right_width) << "px;";
|
html_fout << "border-width:" << round(border_top_bottom_width) << "px " << round(border_left_right_width) << "px;";
|
||||||
}
|
}
|
||||||
auto style = border->getStyle();
|
auto style = border->getStyle();
|
||||||
switch(style)
|
switch(style)
|
||||||
@ -267,13 +271,13 @@ void HTMLRenderer::processLink(AnnotLink * al)
|
|||||||
html_fout << "border-style:none;";
|
html_fout << "border-style:none;";
|
||||||
}
|
}
|
||||||
|
|
||||||
_tm_transform(default_ctm, x, y);
|
tm_transform(default_ctm, x, y);
|
||||||
|
|
||||||
html_fout << "position:absolute;"
|
html_fout << "position:absolute;"
|
||||||
<< "left:" << _round(x) << "px;"
|
<< "left:" << round(x) << "px;"
|
||||||
<< "bottom:" << _round(y) << "px;"
|
<< "bottom:" << round(y) << "px;"
|
||||||
<< "width:" << _round(w) << "px;"
|
<< "width:" << round(w) << "px;"
|
||||||
<< "height:" << _round(h) << "px;";
|
<< "height:" << round(h) << "px;";
|
||||||
|
|
||||||
// fix for IE
|
// fix for IE
|
||||||
html_fout << "background-color:rgba(255,255,255,0.000001);";
|
html_fout << "background-color:rgba(255,255,255,0.000001);";
|
||||||
|
@ -16,8 +16,9 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
#include "HTMLRenderer.h"
|
#include "HTMLRenderer.h"
|
||||||
#include "namespace.h"
|
#include "TextLineBuffer.h"
|
||||||
#include "util.h"
|
#include "util/namespace.h"
|
||||||
|
#include "util/math.h"
|
||||||
|
|
||||||
namespace pdf2htmlEX {
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
@ -104,7 +105,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
|||||||
}
|
}
|
||||||
|
|
||||||
double new_font_size = state->getFontSize();
|
double new_font_size = state->getFontSize();
|
||||||
if(!_equal(cur_font_size, new_font_size))
|
if(!equal(cur_font_size, new_font_size))
|
||||||
{
|
{
|
||||||
need_rescale_font = true;
|
need_rescale_font = true;
|
||||||
cur_font_size = new_font_size;
|
cur_font_size = new_font_size;
|
||||||
@ -132,7 +133,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
|||||||
new_ctm[5] = m1[1] * m2[4] + m1[3] * m2[5] + m1[5];
|
new_ctm[5] = m1[1] * m2[4] + m1[3] * m2[5] + m1[5];
|
||||||
//new_ctm[4] = new_ctm[5] = 0;
|
//new_ctm[4] = new_ctm[5] = 0;
|
||||||
|
|
||||||
if(!_tm_equal(new_ctm, cur_text_tm))
|
if(!tm_equal(new_ctm, cur_text_tm))
|
||||||
{
|
{
|
||||||
need_recheck_position = true;
|
need_recheck_position = true;
|
||||||
need_rescale_font = true;
|
need_rescale_font = true;
|
||||||
@ -147,10 +148,10 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
|||||||
double new_draw_text_tm[6];
|
double new_draw_text_tm[6];
|
||||||
memcpy(new_draw_text_tm, cur_text_tm, sizeof(new_draw_text_tm));
|
memcpy(new_draw_text_tm, cur_text_tm, sizeof(new_draw_text_tm));
|
||||||
|
|
||||||
double new_draw_text_scale = 1.0/text_scale_factor2 * _hypot(new_draw_text_tm[2], new_draw_text_tm[3]);
|
double new_draw_text_scale = 1.0/text_scale_factor2 * hypot(new_draw_text_tm[2], new_draw_text_tm[3]);
|
||||||
|
|
||||||
double new_draw_font_size = cur_font_size;
|
double new_draw_font_size = cur_font_size;
|
||||||
if(_is_positive(new_draw_text_scale))
|
if(is_positive(new_draw_text_scale))
|
||||||
{
|
{
|
||||||
new_draw_font_size *= new_draw_text_scale;
|
new_draw_font_size *= new_draw_text_scale;
|
||||||
for(int i = 0; i < 4; ++i)
|
for(int i = 0; i < 4; ++i)
|
||||||
@ -161,19 +162,28 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
|||||||
new_draw_text_scale = 1.0;
|
new_draw_text_scale = 1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!(_equal(new_draw_text_scale, draw_text_scale)))
|
if(!is_positive(new_draw_font_size))
|
||||||
|
{
|
||||||
|
// Page is flipped and css can't handle it.
|
||||||
|
new_draw_font_size = -new_draw_font_size;
|
||||||
|
|
||||||
|
for(int i = 0; i < 4; ++i)
|
||||||
|
new_draw_text_tm[i] *= -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!(equal(new_draw_text_scale, draw_text_scale)))
|
||||||
{
|
{
|
||||||
draw_text_scale_changed = true;
|
draw_text_scale_changed = true;
|
||||||
draw_text_scale = new_draw_text_scale;
|
draw_text_scale = new_draw_text_scale;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!(_equal(new_draw_font_size, draw_font_size)))
|
if(!(equal(new_draw_font_size, draw_font_size)))
|
||||||
{
|
{
|
||||||
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
|
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
|
||||||
draw_font_size = new_draw_font_size;
|
draw_font_size = new_draw_font_size;
|
||||||
cur_fs_id = install_font_size(draw_font_size);
|
cur_fs_id = install_font_size(draw_font_size);
|
||||||
}
|
}
|
||||||
if(!(_tm_equal(new_draw_text_tm, draw_text_tm, 4)))
|
if(!(tm_equal(new_draw_text_tm, draw_text_tm, 4)))
|
||||||
{
|
{
|
||||||
new_line_state = max<NewLineState>(new_line_state, NLS_DIV);
|
new_line_state = max<NewLineState>(new_line_state, NLS_DIV);
|
||||||
memcpy(draw_text_tm, new_draw_text_tm, sizeof(draw_text_tm));
|
memcpy(draw_text_tm, new_draw_text_tm, sizeof(draw_text_tm));
|
||||||
@ -199,21 +209,21 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
bool merged = false;
|
bool merged = false;
|
||||||
if(_tm_equal(old_ctm, cur_text_tm, 4))
|
if(tm_equal(old_ctm, cur_text_tm, 4))
|
||||||
{
|
{
|
||||||
double dy = cur_ty - draw_ty;
|
double dy = cur_ty - draw_ty;
|
||||||
double tdx = old_ctm[4] - cur_text_tm[4] - cur_text_tm[2] * dy;
|
double tdx = old_ctm[4] - cur_text_tm[4] - cur_text_tm[2] * dy;
|
||||||
double tdy = old_ctm[5] - cur_text_tm[5] - cur_text_tm[3] * dy;
|
double tdy = old_ctm[5] - cur_text_tm[5] - cur_text_tm[3] * dy;
|
||||||
|
|
||||||
if(_equal(cur_text_tm[0] * tdy, cur_text_tm[1] * tdx))
|
if(equal(cur_text_tm[0] * tdy, cur_text_tm[1] * tdx))
|
||||||
{
|
{
|
||||||
if(_is_positive(cur_text_tm[0]))
|
if(is_positive(cur_text_tm[0]))
|
||||||
{
|
{
|
||||||
draw_tx += tdx / cur_text_tm[0];
|
draw_tx += tdx / cur_text_tm[0];
|
||||||
draw_ty += dy;
|
draw_ty += dy;
|
||||||
merged = true;
|
merged = true;
|
||||||
}
|
}
|
||||||
else if (_is_positive(cur_text_tm[1]))
|
else if (is_positive(cur_text_tm[1]))
|
||||||
{
|
{
|
||||||
draw_tx += tdy / cur_text_tm[1];
|
draw_tx += tdy / cur_text_tm[1];
|
||||||
draw_ty += dy;
|
draw_ty += dy;
|
||||||
@ -221,7 +231,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if((_equal(tdx,0)) && (_equal(tdy,0)))
|
if((equal(tdx,0)) && (equal(tdy,0)))
|
||||||
{
|
{
|
||||||
// free
|
// free
|
||||||
draw_tx = cur_tx;
|
draw_tx = cur_tx;
|
||||||
@ -246,7 +256,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
|||||||
if(all_changed || letter_space_changed || draw_text_scale_changed)
|
if(all_changed || letter_space_changed || draw_text_scale_changed)
|
||||||
{
|
{
|
||||||
double new_letter_space = state->getCharSpace();
|
double new_letter_space = state->getCharSpace();
|
||||||
if(!_equal(cur_letter_space, new_letter_space))
|
if(!equal(cur_letter_space, new_letter_space))
|
||||||
{
|
{
|
||||||
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
|
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
|
||||||
cur_letter_space = new_letter_space;
|
cur_letter_space = new_letter_space;
|
||||||
@ -259,7 +269,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
|||||||
if(all_changed || word_space_changed || draw_text_scale_changed)
|
if(all_changed || word_space_changed || draw_text_scale_changed)
|
||||||
{
|
{
|
||||||
double new_word_space = state->getWordSpace();
|
double new_word_space = state->getWordSpace();
|
||||||
if(!_equal(cur_word_space, new_word_space))
|
if(!equal(cur_word_space, new_word_space))
|
||||||
{
|
{
|
||||||
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
|
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
|
||||||
cur_word_space = new_word_space;
|
cur_word_space = new_word_space;
|
||||||
@ -294,7 +304,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
|||||||
if(all_changed || rise_changed || draw_text_scale_changed)
|
if(all_changed || rise_changed || draw_text_scale_changed)
|
||||||
{
|
{
|
||||||
double new_rise = state->getRise();
|
double new_rise = state->getRise();
|
||||||
if(!_equal(cur_rise, new_rise))
|
if(!equal(cur_rise, new_rise))
|
||||||
{
|
{
|
||||||
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
|
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
|
||||||
cur_rise = new_rise;
|
cur_rise = new_rise;
|
||||||
@ -333,7 +343,7 @@ void HTMLRenderer::prepare_text_line(GfxState * state)
|
|||||||
{
|
{
|
||||||
close_text_line();
|
close_text_line();
|
||||||
|
|
||||||
line_buf.reset(state);
|
text_line_buf->reset(state);
|
||||||
|
|
||||||
//resync position
|
//resync position
|
||||||
draw_ty = cur_ty;
|
draw_ty = cur_ty;
|
||||||
@ -350,14 +360,14 @@ void HTMLRenderer::prepare_text_line(GfxState * state)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
line_buf.append_offset(target);
|
text_line_buf->append_offset(target);
|
||||||
draw_tx += target / draw_text_scale;
|
draw_tx += target / draw_text_scale;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(new_line_state != NLS_NONE)
|
if(new_line_state != NLS_NONE)
|
||||||
{
|
{
|
||||||
line_buf.append_state();
|
text_line_buf->append_state();
|
||||||
}
|
}
|
||||||
|
|
||||||
line_opened = true;
|
line_opened = true;
|
||||||
@ -368,7 +378,7 @@ void HTMLRenderer::close_text_line()
|
|||||||
if(line_opened)
|
if(line_opened)
|
||||||
{
|
{
|
||||||
line_opened = false;
|
line_opened = false;
|
||||||
line_buf.flush();
|
text_line_buf->flush();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -15,9 +15,14 @@
|
|||||||
#include <CharCodeToUnicode.h>
|
#include <CharCodeToUnicode.h>
|
||||||
#include <fofi/FoFiTrueType.h>
|
#include <fofi/FoFiTrueType.h>
|
||||||
|
|
||||||
#include "ffw.h"
|
|
||||||
#include "HTMLRenderer.h"
|
#include "HTMLRenderer.h"
|
||||||
#include "namespace.h"
|
#include "TextLineBuffer.h"
|
||||||
|
#include "util/ffw.h"
|
||||||
|
#include "util/namespace.h"
|
||||||
|
#include "util/unicode.h"
|
||||||
|
#include "util/path.h"
|
||||||
|
#include "util/math.h"
|
||||||
|
#include "util/misc.h"
|
||||||
|
|
||||||
namespace pdf2htmlEX {
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
@ -26,6 +31,8 @@ using std::min;
|
|||||||
using std::all_of;
|
using std::all_of;
|
||||||
using std::floor;
|
using std::floor;
|
||||||
using std::swap;
|
using std::swap;
|
||||||
|
using std::cerr;
|
||||||
|
using std::endl;
|
||||||
|
|
||||||
string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
|
string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
|
||||||
{
|
{
|
||||||
@ -127,7 +134,7 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
|
|||||||
obj.streamReset();
|
obj.streamReset();
|
||||||
|
|
||||||
filepath = (char*)str_fmt("%s/f%llx%s", param->tmp_dir.c_str(), fn_id, suffix.c_str());
|
filepath = (char*)str_fmt("%s/f%llx%s", param->tmp_dir.c_str(), fn_id, suffix.c_str());
|
||||||
add_tmp_file(filepath);
|
tmp_files.add(filepath);
|
||||||
|
|
||||||
ofstream outf(filepath, ofstream::binary);
|
ofstream outf(filepath, ofstream::binary);
|
||||||
if(!outf)
|
if(!outf)
|
||||||
@ -171,7 +178,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
|
|||||||
if(param->debug)
|
if(param->debug)
|
||||||
{
|
{
|
||||||
auto fn = str_fmt("%s/__raw_font_%lld", param->tmp_dir.c_str(), info.id, param->font_suffix.c_str());
|
auto fn = str_fmt("%s/__raw_font_%lld", param->tmp_dir.c_str(), info.id, param->font_suffix.c_str());
|
||||||
add_tmp_file((char*)fn);
|
tmp_files.add((char*)fn);
|
||||||
ofstream((char*)fn, ofstream::binary) << ifstream(filepath).rdbuf();
|
ofstream((char*)fn, ofstream::binary) << ifstream(filepath).rdbuf();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -374,7 +381,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
|
|||||||
// in auto mode, just drop the tounicode map
|
// in auto mode, just drop the tounicode map
|
||||||
if(!retried)
|
if(!retried)
|
||||||
{
|
{
|
||||||
cerr << "ToUnicode CMap is not valid and got dropped" << endl;
|
cerr << "ToUnicode CMap is not valid and got dropped for font: " << hex << info.id << dec << endl;
|
||||||
retried = true;
|
retried = true;
|
||||||
codeset.clear();
|
codeset.clear();
|
||||||
info.use_tounicode = false;
|
info.use_tounicode = false;
|
||||||
@ -410,7 +417,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
|
|||||||
|
|
||||||
ffw_reencode_raw(cur_mapping, max_key + 1, 1);
|
ffw_reencode_raw(cur_mapping, max_key + 1, 1);
|
||||||
|
|
||||||
// we need the space chracter for offsets
|
// we need the space character for offsets
|
||||||
if(!has_space)
|
if(!has_space)
|
||||||
{
|
{
|
||||||
int space_width;
|
int space_width;
|
||||||
@ -437,9 +444,9 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
string cur_tmp_fn = (char*)str_fmt("%s/__tmp_font1%s", param->tmp_dir.c_str(), param->font_suffix.c_str());
|
string cur_tmp_fn = (char*)str_fmt("%s/__tmp_font1%s", param->tmp_dir.c_str(), param->font_suffix.c_str());
|
||||||
add_tmp_file(cur_tmp_fn);
|
tmp_files.add(cur_tmp_fn);
|
||||||
string other_tmp_fn = (char*)str_fmt("%s/__tmp_font2%s", param->tmp_dir.c_str(), param->font_suffix.c_str());
|
string other_tmp_fn = (char*)str_fmt("%s/__tmp_font2%s", param->tmp_dir.c_str(), param->font_suffix.c_str());
|
||||||
add_tmp_file(other_tmp_fn);
|
tmp_files.add(other_tmp_fn);
|
||||||
|
|
||||||
ffw_save(cur_tmp_fn.c_str());
|
ffw_save(cur_tmp_fn.c_str());
|
||||||
|
|
||||||
@ -482,7 +489,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
|
|||||||
info.id, param->font_suffix.c_str());
|
info.id, param->font_suffix.c_str());
|
||||||
|
|
||||||
if(param->single_html)
|
if(param->single_html)
|
||||||
add_tmp_file(fn);
|
tmp_files.add(fn);
|
||||||
|
|
||||||
ffw_load_font(cur_tmp_fn.c_str());
|
ffw_load_font(cur_tmp_fn.c_str());
|
||||||
ffw_metric(&info.ascent, &info.descent);
|
ffw_metric(&info.ascent, &info.descent);
|
||||||
@ -517,14 +524,6 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
|
|||||||
char *p = s->getCString();
|
char *p = s->getCString();
|
||||||
int len = s->getLength();
|
int len = s->getLength();
|
||||||
|
|
||||||
//debug
|
|
||||||
{
|
|
||||||
if(strcmp(p, "ORTUG") == 0)
|
|
||||||
{
|
|
||||||
cerr << "DEBUG: " << (int)(state->getRender()) << endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
double dx = 0;
|
double dx = 0;
|
||||||
double dy = 0;
|
double dy = 0;
|
||||||
double dxerr = 0;
|
double dxerr = 0;
|
||||||
@ -538,10 +537,11 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
|
|||||||
CharCode code;
|
CharCode code;
|
||||||
Unicode *u = nullptr;
|
Unicode *u = nullptr;
|
||||||
|
|
||||||
while (len > 0) {
|
while (len > 0)
|
||||||
|
{
|
||||||
auto n = font->getNextChar(p, len, &code, &u, &uLen, &dx1, &dy1, &ox, &oy);
|
auto n = font->getNextChar(p, len, &code, &u, &uLen, &dx1, &dy1, &ox, &oy);
|
||||||
|
|
||||||
if(!(_equal(ox, 0) && _equal(oy, 0)))
|
if(!(equal(ox, 0) && equal(oy, 0)))
|
||||||
{
|
{
|
||||||
cerr << "TODO: non-zero origins" << endl;
|
cerr << "TODO: non-zero origins" << endl;
|
||||||
}
|
}
|
||||||
@ -556,25 +556,25 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
|
|||||||
if(is_space && (param->space_as_offset))
|
if(is_space && (param->space_as_offset))
|
||||||
{
|
{
|
||||||
// ignore horiz_scaling, as it's merged in CTM
|
// ignore horiz_scaling, as it's merged in CTM
|
||||||
line_buf.append_offset((dx1 * cur_font_size + cur_letter_space + cur_word_space) * draw_text_scale);
|
text_line_buf->append_offset((dx1 * cur_font_size + cur_letter_space + cur_word_space) * draw_text_scale);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if((param->decompose_ligature) && (uLen > 1) && all_of(u, u+uLen, isLegalUnicode))
|
if((param->decompose_ligature) && (uLen > 1) && all_of(u, u+uLen, isLegalUnicode))
|
||||||
{
|
{
|
||||||
line_buf.append_unicodes(u, uLen);
|
text_line_buf->append_unicodes(u, uLen);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if(cur_font_info->use_tounicode)
|
if(cur_font_info->use_tounicode)
|
||||||
{
|
{
|
||||||
Unicode uu = check_unicode(u, uLen, code, font);
|
Unicode uu = check_unicode(u, uLen, code, font);
|
||||||
line_buf.append_unicodes(&uu, 1);
|
text_line_buf->append_unicodes(&uu, 1);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
Unicode uu = unicode_from_font(code, font);
|
Unicode uu = unicode_from_font(code, font);
|
||||||
line_buf.append_unicodes(&uu, 1);
|
text_line_buf->append_unicodes(&uu, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -28,6 +28,7 @@ struct Param
|
|||||||
double zoom;
|
double zoom;
|
||||||
double fit_width, fit_height;
|
double fit_width, fit_height;
|
||||||
double h_dpi, v_dpi;
|
double h_dpi, v_dpi;
|
||||||
|
int use_cropbox;
|
||||||
|
|
||||||
int process_nontext;
|
int process_nontext;
|
||||||
int single_html;
|
int single_html;
|
@ -1,235 +0,0 @@
|
|||||||
/*
|
|
||||||
* Constants & Misc functions
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* by WangLu
|
|
||||||
* 2012.08.10
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef UTIL_H__
|
|
||||||
#define UTIL_H__
|
|
||||||
|
|
||||||
#include <cstdio>
|
|
||||||
#include <iostream>
|
|
||||||
#include <algorithm>
|
|
||||||
#include <cmath>
|
|
||||||
#include <vector>
|
|
||||||
#include <string>
|
|
||||||
#include <map>
|
|
||||||
|
|
||||||
#ifndef nullptr
|
|
||||||
#define nullptr (NULL)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace pdf2htmlEX {
|
|
||||||
|
|
||||||
static const double EPS = 1e-6;
|
|
||||||
extern const double id_matrix[6];
|
|
||||||
|
|
||||||
static const double DEFAULT_DPI = 72.0;
|
|
||||||
|
|
||||||
extern const std::map<std::string, std::string> BASE_14_FONT_CSS_FONT_MAP;
|
|
||||||
extern const std::map<std::string, std::string> GB_ENCODED_FONT_NAME_MAP;
|
|
||||||
// map to embed files into html
|
|
||||||
// key: (suffix, if_embed_content)
|
|
||||||
// value: (prefix string, suffix string)
|
|
||||||
extern const std::map<std::pair<std::string, bool>, std::pair<std::string, std::string> > EMBED_STRING_MAP;
|
|
||||||
|
|
||||||
static inline double _round(double x) { return (std::abs(x) > EPS) ? x : 0.0; }
|
|
||||||
static inline bool _equal(double x, double y) { return std::abs(x-y) < EPS; }
|
|
||||||
static inline bool _is_positive(double x) { return x > EPS; }
|
|
||||||
static inline bool _tm_equal(const double * tm1, const double * tm2, int size = 6)
|
|
||||||
{
|
|
||||||
for(int i = 0; i < size; ++i)
|
|
||||||
if(!_equal(tm1[i], tm2[i]))
|
|
||||||
return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
static inline double _hypot(double x, double y) { return std::sqrt(x*x+y*y); }
|
|
||||||
|
|
||||||
void _tm_transform(const double * tm, double & x, double & y, bool is_delta = false);
|
|
||||||
void _tm_multiply(double * tm_left, const double * tm_right);
|
|
||||||
|
|
||||||
static inline long long hash_ref(const Ref * id)
|
|
||||||
{
|
|
||||||
return (((long long)(id->num)) << (sizeof(id->gen)*8)) | (id->gen);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* http://en.wikipedia.org/wiki/HTML_decimal_character_rendering
|
|
||||||
*/
|
|
||||||
bool isLegalUnicode(Unicode u);
|
|
||||||
|
|
||||||
Unicode map_to_private(CharCode code);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Try to determine the Unicode value directly from the information in the font
|
|
||||||
*/
|
|
||||||
Unicode unicode_from_font (CharCode code, GfxFont * font);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We have to use a single Unicode value to reencode fonts
|
|
||||||
* if we got multi-unicode values, it might be expanded ligature, try to restore it
|
|
||||||
* if we cannot figure it out at the end, use a private mapping
|
|
||||||
*/
|
|
||||||
Unicode check_unicode(Unicode * u, int len, CharCode code, GfxFont * font);
|
|
||||||
|
|
||||||
void outputUnicodes(std::ostream & out, const Unicode * u, int uLen);
|
|
||||||
|
|
||||||
class GfxRGB_hash
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
size_t operator () (const GfxRGB & rgb) const
|
|
||||||
{
|
|
||||||
return (colToByte(rgb.r) << 16) | (colToByte(rgb.g) << 8) | (colToByte(rgb.b));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
class GfxRGB_equal
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
bool operator ()(const GfxRGB & rgb1, const GfxRGB & rgb2) const
|
|
||||||
{
|
|
||||||
return ((rgb1.r == rgb2.r) && (rgb1.g == rgb2.g) && (rgb1.b == rgb1.b));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// we may need more info of a font in the future
|
|
||||||
class FontInfo
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
long long id;
|
|
||||||
bool use_tounicode;
|
|
||||||
int em_size;
|
|
||||||
double ascent, descent;
|
|
||||||
};
|
|
||||||
|
|
||||||
class Matrix_less
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
bool operator () (const Matrix & m1, const Matrix & m2) const
|
|
||||||
{
|
|
||||||
// Note that we only care about the first 4 elements
|
|
||||||
for(int i = 0; i < 4; ++i)
|
|
||||||
{
|
|
||||||
if(m1.m[i] < m2.m[i] - EPS)
|
|
||||||
return true;
|
|
||||||
if(m1.m[i] > m2.m[i] + EPS)
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
class base64stream
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
|
|
||||||
base64stream(std::istream & in) : in(&in) { }
|
|
||||||
base64stream(std::istream && in) : in(&in) { }
|
|
||||||
|
|
||||||
std::ostream & dumpto(std::ostream & out)
|
|
||||||
{
|
|
||||||
unsigned char buf[3];
|
|
||||||
while(in->read((char*)buf, 3))
|
|
||||||
{
|
|
||||||
out << base64_encoding[(buf[0] & 0xfc)>>2]
|
|
||||||
<< base64_encoding[((buf[0] & 0x03)<<4) | ((buf[1] & 0xf0)>>4)]
|
|
||||||
<< base64_encoding[((buf[1] & 0x0f)<<2) | ((buf[2] & 0xc0)>>6)]
|
|
||||||
<< base64_encoding[(buf[2] & 0x3f)];
|
|
||||||
}
|
|
||||||
auto cnt = in->gcount();
|
|
||||||
if(cnt > 0)
|
|
||||||
{
|
|
||||||
for(int i = cnt; i < 3; ++i)
|
|
||||||
buf[i] = 0;
|
|
||||||
|
|
||||||
out << base64_encoding[(buf[0] & 0xfc)>>2]
|
|
||||||
<< base64_encoding[((buf[0] & 0x03)<<4) | ((buf[1] & 0xf0)>>4)];
|
|
||||||
|
|
||||||
if(cnt > 1)
|
|
||||||
{
|
|
||||||
out << base64_encoding[(buf[1] & 0x0f)<<2];
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
out << '=';
|
|
||||||
}
|
|
||||||
out << '=';
|
|
||||||
}
|
|
||||||
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::istream * in;
|
|
||||||
static const char * base64_encoding;
|
|
||||||
};
|
|
||||||
|
|
||||||
static inline std::ostream & operator << (std::ostream & out, base64stream & bf) { return bf.dumpto(out); }
|
|
||||||
static inline std::ostream & operator << (std::ostream & out, base64stream && bf) { return bf.dumpto(out); }
|
|
||||||
|
|
||||||
class string_formatter
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
class guarded_pointer
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
guarded_pointer(string_formatter * sf) : sf(sf) { ++(sf->buf_cnt); }
|
|
||||||
~guarded_pointer(void) { --(sf->buf_cnt); }
|
|
||||||
operator char* () { return &(sf->buf.front()); }
|
|
||||||
private:
|
|
||||||
string_formatter * sf;
|
|
||||||
};
|
|
||||||
|
|
||||||
string_formatter() : buf_cnt(0) { buf.reserve(L_tmpnam); }
|
|
||||||
/*
|
|
||||||
* Important:
|
|
||||||
* there is only one buffer, so new strings will replace old ones
|
|
||||||
*/
|
|
||||||
guarded_pointer operator () (const char * format, ...) {
|
|
||||||
assert((buf_cnt == 0) && "string_formatter: buffer is reused!");
|
|
||||||
|
|
||||||
va_list vlist;
|
|
||||||
va_start(vlist, format);
|
|
||||||
int l = vsnprintf(&buf.front(), buf.capacity(), format, vlist);
|
|
||||||
va_end(vlist);
|
|
||||||
if(l >= (int)buf.capacity())
|
|
||||||
{
|
|
||||||
buf.reserve(std::max<long>((long)(l+1), (long)buf.capacity() * 2));
|
|
||||||
va_start(vlist, format);
|
|
||||||
l = vsnprintf(&buf.front(), buf.capacity(), format, vlist);
|
|
||||||
va_end(vlist);
|
|
||||||
}
|
|
||||||
assert(l >= 0); // we should fail when vsnprintf fail
|
|
||||||
assert(l < (int)buf.capacity());
|
|
||||||
return guarded_pointer(this);
|
|
||||||
}
|
|
||||||
private:
|
|
||||||
friend class guarded_pointer;
|
|
||||||
std::vector<char> buf;
|
|
||||||
int buf_cnt;
|
|
||||||
};
|
|
||||||
|
|
||||||
void create_directories(std::string path);
|
|
||||||
|
|
||||||
bool is_truetype_suffix(const std::string & suffix);
|
|
||||||
|
|
||||||
std::string get_filename(const std::string & path);
|
|
||||||
std::string get_suffix(const std::string & path);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* In PDF, edges of the rectangle are in the middle of the borders
|
|
||||||
* In HTML, edges are completely outside the rectangle
|
|
||||||
*/
|
|
||||||
void css_fix_rectangle_border_width(double x1, double y1, double x2, double y2,
|
|
||||||
double border_width,
|
|
||||||
double & x, double & y, double & w, double & h,
|
|
||||||
double & border_top_bottom_width,
|
|
||||||
double & border_left_right_width);
|
|
||||||
|
|
||||||
std::ostream & operator << (std::ostream & out, const GfxRGB & rgb);
|
|
||||||
|
|
||||||
} // namespace util
|
|
||||||
#endif //UTIL_H__
|
|
@ -19,10 +19,11 @@
|
|||||||
#include <PDFDocFactory.h>
|
#include <PDFDocFactory.h>
|
||||||
#include <GlobalParams.h>
|
#include <GlobalParams.h>
|
||||||
|
|
||||||
#include "HTMLRenderer.h"
|
|
||||||
#include "Param.h"
|
#include "Param.h"
|
||||||
#include "pdf2htmlEX-config.h"
|
#include "pdf2htmlEX-config.h"
|
||||||
#include "ArgParser.h"
|
#include "HTMLRenderer/HTMLRenderer.h"
|
||||||
|
#include "util/ArgParser.h"
|
||||||
|
#include "util/path.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace pdf2htmlEX;
|
using namespace pdf2htmlEX;
|
||||||
@ -66,6 +67,7 @@ void parse_options (int argc, char **argv)
|
|||||||
.add("fit-height", ¶m.fit_height, 0, "fit height", nullptr, true)
|
.add("fit-height", ¶m.fit_height, 0, "fit height", nullptr, true)
|
||||||
.add("hdpi", ¶m.h_dpi, 144.0, "horizontal DPI for non-text")
|
.add("hdpi", ¶m.h_dpi, 144.0, "horizontal DPI for non-text")
|
||||||
.add("vdpi", ¶m.v_dpi, 144.0, "vertical DPI for non-text")
|
.add("vdpi", ¶m.v_dpi, 144.0, "vertical DPI for non-text")
|
||||||
|
.add("use-cropbox", ¶m.use_cropbox, 0, "use CropBox instead of MediaBox")
|
||||||
|
|
||||||
.add("process-nontext", ¶m.process_nontext, 1, "process nontext objects")
|
.add("process-nontext", ¶m.process_nontext, 1, "process nontext objects")
|
||||||
.add("single-html", ¶m.single_html, 1, "combine everything into one single HTML file")
|
.add("single-html", ¶m.single_html, 1, "combine everything into one single HTML file")
|
||||||
|
322
src/util.cc
322
src/util.cc
@ -1,322 +0,0 @@
|
|||||||
/*
|
|
||||||
* Misc functions
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* by WangLu
|
|
||||||
* 2012.08.10
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <errno.h>
|
|
||||||
#include <cctype>
|
|
||||||
|
|
||||||
#include <GfxState.h>
|
|
||||||
#include <GfxFont.h>
|
|
||||||
#include <CharTypes.h>
|
|
||||||
#include <GlobalParams.h>
|
|
||||||
#include <Object.h>
|
|
||||||
|
|
||||||
// for mkdir
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
|
|
||||||
#include "util.h"
|
|
||||||
|
|
||||||
using std::cerr;
|
|
||||||
using std::endl;
|
|
||||||
using std::string;
|
|
||||||
using std::map;
|
|
||||||
using std::ostream;
|
|
||||||
|
|
||||||
namespace pdf2htmlEX {
|
|
||||||
|
|
||||||
const double id_matrix[6] = {1.0, 0.0, 0.0, 1.0, 0.0, 0.0};
|
|
||||||
|
|
||||||
const map<string, string> BASE_14_FONT_CSS_FONT_MAP({
|
|
||||||
{ "Courier", "Courier,monospace" },
|
|
||||||
{ "Helvetica", "Helvetica,Arial,\"Nimbus Sans L\",sans-serif" },
|
|
||||||
{ "Times", "Times,\"Time New Roman\",\"Nimbus Roman No9 L\",serif" },
|
|
||||||
{ "Symbol", "Symbol,\"Standard Symbols L\"" },
|
|
||||||
{ "ZapfDingbats", "ZapfDingbats,\"Dingbats\"" },
|
|
||||||
});
|
|
||||||
|
|
||||||
const map<string, string> GB_ENCODED_FONT_NAME_MAP({
|
|
||||||
{"\xCB\xCE\xCC\xE5", "SimSun"},
|
|
||||||
{"\xBA\xDA\xCC\xE5", "SimHei"},
|
|
||||||
{"\xBF\xAC\xCC\xE5_GB2312", "SimKai"},
|
|
||||||
{"\xB7\xC2\xCB\xCE_GB2312", "SimFang"},
|
|
||||||
{"\xC1\xA5\xCA\xE9", "SimLi"},
|
|
||||||
});
|
|
||||||
|
|
||||||
const std::map<std::pair<std::string, bool>, std::pair<std::string, std::string> > EMBED_STRING_MAP({
|
|
||||||
{{".css", 0}, {"<link rel=\"stylesheet\" type=\"text/css\" href=\"", "\"/>"}},
|
|
||||||
{{".css", 1}, {"<style type=\"text/css\">", "</style>"}},
|
|
||||||
{{".js", 0}, {"<script type=\"text/javascript\" src=\"", "\"></script>"}},
|
|
||||||
{{".js", 1}, {"<script type=\"text/javascript\">", "</script>"}}
|
|
||||||
});
|
|
||||||
|
|
||||||
void _tm_transform(const double * tm, double & x, double & y, bool is_delta)
|
|
||||||
{
|
|
||||||
double xx = x, yy = y;
|
|
||||||
x = tm[0] * xx + tm[2] * yy;
|
|
||||||
y = tm[1] * xx + tm[3] * yy;
|
|
||||||
if(!is_delta)
|
|
||||||
{
|
|
||||||
x += tm[4];
|
|
||||||
y += tm[5];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void _tm_multiply(double * tm_left, const double * tm_right)
|
|
||||||
{
|
|
||||||
double old[4];
|
|
||||||
memcpy(old, tm_left, sizeof(old));
|
|
||||||
|
|
||||||
tm_left[0] = old[0] * tm_right[0] + old[2] * tm_right[1];
|
|
||||||
tm_left[1] = old[1] * tm_right[0] + old[3] * tm_right[1];
|
|
||||||
tm_left[2] = old[0] * tm_right[2] + old[2] * tm_right[3];
|
|
||||||
tm_left[3] = old[1] * tm_right[2] + old[3] * tm_right[3];
|
|
||||||
tm_left[4] += old[0] * tm_right[4] + old[2] * tm_right[5];
|
|
||||||
tm_left[5] += old[1] * tm_right[4] + old[3] * tm_right[5];
|
|
||||||
}
|
|
||||||
|
|
||||||
bool isLegalUnicode(Unicode u)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
if((u == 9) || (u == 10) || (u == 13))
|
|
||||||
return true;
|
|
||||||
*/
|
|
||||||
|
|
||||||
if(u <= 31)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if((u >= 127) && (u <= 159))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if((u >= 0xd800) && (u <= 0xdfff))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
Unicode map_to_private(CharCode code)
|
|
||||||
{
|
|
||||||
Unicode private_mapping = (Unicode)(code + 0xE000);
|
|
||||||
if(private_mapping > 0xF8FF)
|
|
||||||
{
|
|
||||||
private_mapping = (Unicode)((private_mapping - 0xF8FF) + 0xF0000);
|
|
||||||
if(private_mapping > 0xFFFFD)
|
|
||||||
{
|
|
||||||
private_mapping = (Unicode)((private_mapping - 0xFFFFD) + 0x100000);
|
|
||||||
if(private_mapping > 0x10FFFD)
|
|
||||||
{
|
|
||||||
cerr << "Warning: all private use unicode are used" << endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return private_mapping;
|
|
||||||
}
|
|
||||||
|
|
||||||
Unicode unicode_from_font (CharCode code, GfxFont * font)
|
|
||||||
{
|
|
||||||
if(!font->isCIDFont())
|
|
||||||
{
|
|
||||||
char * cname = dynamic_cast<Gfx8BitFont*>(font)->getCharName(code);
|
|
||||||
// may be untranslated ligature
|
|
||||||
if(cname)
|
|
||||||
{
|
|
||||||
Unicode ou = globalParams->mapNameToUnicode(cname);
|
|
||||||
|
|
||||||
if(isLegalUnicode(ou))
|
|
||||||
return ou;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return map_to_private(code);
|
|
||||||
}
|
|
||||||
|
|
||||||
Unicode check_unicode(Unicode * u, int len, CharCode code, GfxFont * font)
|
|
||||||
{
|
|
||||||
if(len == 0)
|
|
||||||
return map_to_private(code);
|
|
||||||
|
|
||||||
if(len == 1)
|
|
||||||
{
|
|
||||||
if(isLegalUnicode(*u))
|
|
||||||
return *u;
|
|
||||||
}
|
|
||||||
|
|
||||||
return unicode_from_font(code, font);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Copied from UTF.h / UTF8.h in poppler
|
|
||||||
*/
|
|
||||||
static int mapUTF8(Unicode u, char *buf, int bufSize) {
|
|
||||||
if (u <= 0x0000007f) {
|
|
||||||
if (bufSize < 1) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
buf[0] = (char)u;
|
|
||||||
return 1;
|
|
||||||
} else if (u <= 0x000007ff) {
|
|
||||||
if (bufSize < 2) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
buf[0] = (char)(0xc0 + (u >> 6));
|
|
||||||
buf[1] = (char)(0x80 + (u & 0x3f));
|
|
||||||
return 2;
|
|
||||||
} else if (u <= 0x0000ffff) {
|
|
||||||
if (bufSize < 3) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
buf[0] = (char)(0xe0 + (u >> 12));
|
|
||||||
buf[1] = (char)(0x80 + ((u >> 6) & 0x3f));
|
|
||||||
buf[2] = (char)(0x80 + (u & 0x3f));
|
|
||||||
return 3;
|
|
||||||
} else if (u <= 0x0010ffff) {
|
|
||||||
if (bufSize < 4) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
buf[0] = (char)(0xf0 + (u >> 18));
|
|
||||||
buf[1] = (char)(0x80 + ((u >> 12) & 0x3f));
|
|
||||||
buf[2] = (char)(0x80 + ((u >> 6) & 0x3f));
|
|
||||||
buf[3] = (char)(0x80 + (u & 0x3f));
|
|
||||||
return 4;
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void outputUnicodes(ostream & out, const Unicode * u, int uLen)
|
|
||||||
{
|
|
||||||
for(int i = 0; i < uLen; ++i)
|
|
||||||
{
|
|
||||||
switch(u[i])
|
|
||||||
{
|
|
||||||
case '&':
|
|
||||||
out << "&";
|
|
||||||
break;
|
|
||||||
case '\"':
|
|
||||||
out << """;
|
|
||||||
break;
|
|
||||||
case '\'':
|
|
||||||
out << "'";
|
|
||||||
break;
|
|
||||||
case '<':
|
|
||||||
out << "<";
|
|
||||||
break;
|
|
||||||
case '>':
|
|
||||||
out << ">";
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
char buf[4];
|
|
||||||
auto n = mapUTF8(u[i], buf, 4);
|
|
||||||
out.write(buf, n);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const char * base64stream::base64_encoding = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
|
||||||
|
|
||||||
void create_directories(string path)
|
|
||||||
{
|
|
||||||
if(path.empty()) return;
|
|
||||||
|
|
||||||
size_t idx = path.rfind('/');
|
|
||||||
if(idx != string::npos)
|
|
||||||
{
|
|
||||||
create_directories(path.substr(0, idx));
|
|
||||||
}
|
|
||||||
|
|
||||||
int r = mkdir(path.c_str(), S_IRWXU);
|
|
||||||
if(r != 0)
|
|
||||||
{
|
|
||||||
if(errno == EEXIST)
|
|
||||||
{
|
|
||||||
struct stat stat_buf;
|
|
||||||
if((stat(path.c_str(), &stat_buf) == 0) && S_ISDIR(stat_buf.st_mode))
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
throw string("Cannot create directory: ") + path;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool is_truetype_suffix(const string & suffix)
|
|
||||||
{
|
|
||||||
return (suffix == ".ttf") || (suffix == ".ttc") || (suffix == ".otf");
|
|
||||||
}
|
|
||||||
|
|
||||||
string get_filename (const string & path)
|
|
||||||
{
|
|
||||||
size_t idx = path.rfind('/');
|
|
||||||
if(idx == string::npos)
|
|
||||||
return path;
|
|
||||||
else if (idx == path.size() - 1)
|
|
||||||
return "";
|
|
||||||
return path.substr(idx + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
string get_suffix(const string & path)
|
|
||||||
{
|
|
||||||
string fn = get_filename(path);
|
|
||||||
size_t idx = fn.rfind('.');
|
|
||||||
if(idx == string::npos)
|
|
||||||
return "";
|
|
||||||
else
|
|
||||||
{
|
|
||||||
string s = fn.substr(idx);
|
|
||||||
for(auto iter = s.begin(); iter != s.end(); ++iter)
|
|
||||||
*iter = tolower(*iter);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void css_fix_rectangle_border_width(double x1, double y1,
|
|
||||||
double x2, double y2,
|
|
||||||
double border_width,
|
|
||||||
double & x, double & y, double & w, double & h,
|
|
||||||
double & border_top_bottom_width,
|
|
||||||
double & border_left_right_width)
|
|
||||||
{
|
|
||||||
w = x2 - x1;
|
|
||||||
if(w > border_width)
|
|
||||||
{
|
|
||||||
w -= border_width;
|
|
||||||
border_left_right_width = border_width;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
border_left_right_width = border_width + w/2;
|
|
||||||
w = 0;
|
|
||||||
}
|
|
||||||
x = x1 - border_width / 2;
|
|
||||||
|
|
||||||
h = y2 - y1;
|
|
||||||
if(h > border_width)
|
|
||||||
{
|
|
||||||
h -= border_width;
|
|
||||||
border_top_bottom_width = border_width;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
border_top_bottom_width = border_width + h/2;
|
|
||||||
h = 0;
|
|
||||||
}
|
|
||||||
y = y1 - border_width / 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
ostream & operator << (ostream & out, const GfxRGB & rgb)
|
|
||||||
{
|
|
||||||
auto flags= out.flags();
|
|
||||||
out << std::dec << "rgb("
|
|
||||||
<< (int)colToByte(rgb.r) << ","
|
|
||||||
<< (int)colToByte(rgb.g) << ","
|
|
||||||
<< (int)colToByte(rgb.b) << ")";
|
|
||||||
out.flags(flags);
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace pdf2htmlEX
|
|
@ -15,7 +15,8 @@
|
|||||||
#include <GfxFont.h>
|
#include <GfxFont.h>
|
||||||
|
|
||||||
#include "Preprocessor.h"
|
#include "Preprocessor.h"
|
||||||
#include "util.h"
|
#include "util/misc.h"
|
||||||
|
#include "util/const.h"
|
||||||
|
|
||||||
namespace pdf2htmlEX {
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
@ -41,7 +42,7 @@ Preprocessor::~Preprocessor(void)
|
|||||||
|
|
||||||
void Preprocessor::process(PDFDoc * doc)
|
void Preprocessor::process(PDFDoc * doc)
|
||||||
{
|
{
|
||||||
int page_count = (param->last_page - param->first_page);
|
int page_count = (param->last_page - param->first_page + 1);
|
||||||
for(int i = param->first_page; i <= param->last_page ; ++i)
|
for(int i = param->first_page; i <= param->last_page ; ++i)
|
||||||
{
|
{
|
||||||
cerr << "Preprocessing: " << (i-param->first_page) << "/" << page_count << '\r' << flush;
|
cerr << "Preprocessing: " << (i-param->first_page) << "/" << page_count << '\r' << flush;
|
30
src/util/StringFormatter.cc
Normal file
30
src/util/StringFormatter.cc
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
#include <cstdarg>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
#include "StringFormatter.h"
|
||||||
|
|
||||||
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
StringFormatter::GuardedPointer StringFormatter::operator () (const char * format, ...)
|
||||||
|
{
|
||||||
|
assert((buf_cnt == 0) && "StringFormatter: buffer is reused!");
|
||||||
|
|
||||||
|
va_list vlist;
|
||||||
|
va_start(vlist, format);
|
||||||
|
int l = vsnprintf(&buf.front(), buf.capacity(), format, vlist);
|
||||||
|
va_end(vlist);
|
||||||
|
if(l >= (int)buf.capacity())
|
||||||
|
{
|
||||||
|
buf.reserve(std::max<long>((long)(l+1), (long)buf.capacity() * 2));
|
||||||
|
va_start(vlist, format);
|
||||||
|
l = vsnprintf(&buf.front(), buf.capacity(), format, vlist);
|
||||||
|
va_end(vlist);
|
||||||
|
}
|
||||||
|
assert(l >= 0); // we should fail when vsnprintf fail
|
||||||
|
assert(l < (int)buf.capacity());
|
||||||
|
return GuardedPointer(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
} //namespace pdf2htmlEX
|
||||||
|
|
44
src/util/StringFormatter.h
Normal file
44
src/util/StringFormatter.h
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
/*
|
||||||
|
* Buffer reusing string formatter
|
||||||
|
*
|
||||||
|
* by WangLu
|
||||||
|
* 2012.11.29
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef STRINGFORMATTER_H__
|
||||||
|
#define STRINGFORMATTER_H__
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <cstdio>
|
||||||
|
|
||||||
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
class StringFormatter
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
class GuardedPointer
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
GuardedPointer(StringFormatter * sf) : sf(sf) { ++(sf->buf_cnt); }
|
||||||
|
GuardedPointer(const GuardedPointer & gp) : sf(gp.sf) { ++(sf->buf_cnt); }
|
||||||
|
~GuardedPointer(void) { --(sf->buf_cnt); }
|
||||||
|
operator char* () const { return &(sf->buf.front()); }
|
||||||
|
private:
|
||||||
|
StringFormatter * sf;
|
||||||
|
};
|
||||||
|
|
||||||
|
StringFormatter() : buf_cnt(0) { buf.reserve(L_tmpnam); }
|
||||||
|
/*
|
||||||
|
* Important:
|
||||||
|
* there is only one buffer, so new strings will replace old ones
|
||||||
|
*/
|
||||||
|
GuardedPointer operator () (const char * format, ...);
|
||||||
|
|
||||||
|
private:
|
||||||
|
friend class GuardedPointer;
|
||||||
|
std::vector<char> buf;
|
||||||
|
int buf_cnt;
|
||||||
|
};
|
||||||
|
|
||||||
|
} //namespace pdf2htmlEX
|
||||||
|
#endif //STRINGFORMATTER_H__
|
56
src/util/TmpFiles.cc
Normal file
56
src/util/TmpFiles.cc
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
/*
|
||||||
|
* TmpFiles.cc
|
||||||
|
*
|
||||||
|
* Collect and clean-up temporary files
|
||||||
|
*
|
||||||
|
* implemented by WangLu
|
||||||
|
* split off by Filodej <philodej@gmail.com>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include "TmpFiles.h"
|
||||||
|
#include "Param.h"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
|
||||||
|
TmpFiles::TmpFiles( const Param& param )
|
||||||
|
: param( param )
|
||||||
|
{ }
|
||||||
|
|
||||||
|
TmpFiles::~TmpFiles()
|
||||||
|
{
|
||||||
|
clean();
|
||||||
|
}
|
||||||
|
|
||||||
|
void TmpFiles::add( const string & fn)
|
||||||
|
{
|
||||||
|
if(!param.clean_tmp)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if(tmp_files.insert(fn).second && param.debug)
|
||||||
|
cerr << "Add new temporary file: " << fn << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void TmpFiles::clean()
|
||||||
|
{
|
||||||
|
if(!param.clean_tmp)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for(auto iter = tmp_files.begin(); iter != tmp_files.end(); ++iter)
|
||||||
|
{
|
||||||
|
const string & fn = *iter;
|
||||||
|
remove(fn.c_str());
|
||||||
|
if(param.debug)
|
||||||
|
cerr << "Remove temporary file: " << fn << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
remove(param.tmp_dir.c_str());
|
||||||
|
if(param.debug)
|
||||||
|
cerr << "Remove temporary directory: " << param.tmp_dir << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace pdf2htmlEX
|
29
src/util/TmpFiles.h
Normal file
29
src/util/TmpFiles.h
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
#ifndef TMPFILES_H__
|
||||||
|
#define TMPFILES_H__
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <set>
|
||||||
|
#include "Param.h"
|
||||||
|
|
||||||
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
class TmpFiles
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit TmpFiles( const Param& param );
|
||||||
|
~TmpFiles();
|
||||||
|
|
||||||
|
void add( const std::string& fn);
|
||||||
|
|
||||||
|
private:
|
||||||
|
void clean();
|
||||||
|
|
||||||
|
private:
|
||||||
|
const Param& param;
|
||||||
|
std::set<std::string> tmp_files;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace pdf2htmlEX
|
||||||
|
|
||||||
|
#endif //TMPFILES_H__
|
45
src/util/base64stream.cc
Normal file
45
src/util/base64stream.cc
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
#include "base64stream.h"
|
||||||
|
|
||||||
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
using std::ostream;
|
||||||
|
|
||||||
|
ostream & base64stream::dumpto(ostream & out)
|
||||||
|
{
|
||||||
|
unsigned char buf[3];
|
||||||
|
while(in->read((char*)buf, 3))
|
||||||
|
{
|
||||||
|
out << base64_encoding[(buf[0] & 0xfc)>>2]
|
||||||
|
<< base64_encoding[((buf[0] & 0x03)<<4) | ((buf[1] & 0xf0)>>4)]
|
||||||
|
<< base64_encoding[((buf[1] & 0x0f)<<2) | ((buf[2] & 0xc0)>>6)]
|
||||||
|
<< base64_encoding[(buf[2] & 0x3f)];
|
||||||
|
}
|
||||||
|
auto cnt = in->gcount();
|
||||||
|
if(cnt > 0)
|
||||||
|
{
|
||||||
|
for(int i = cnt; i < 3; ++i)
|
||||||
|
buf[i] = 0;
|
||||||
|
|
||||||
|
out << base64_encoding[(buf[0] & 0xfc)>>2]
|
||||||
|
<< base64_encoding[((buf[0] & 0x03)<<4) | ((buf[1] & 0xf0)>>4)];
|
||||||
|
|
||||||
|
if(cnt > 1)
|
||||||
|
{
|
||||||
|
out << base64_encoding[(buf[1] & 0x0f)<<2];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
out << '=';
|
||||||
|
}
|
||||||
|
out << '=';
|
||||||
|
}
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char * base64stream::base64_encoding = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||||
|
|
||||||
|
ostream & operator << (ostream & out, base64stream & bf) { return bf.dumpto(out); }
|
||||||
|
ostream & operator << (ostream & out, base64stream && bf) { return bf.dumpto(out); }
|
||||||
|
|
||||||
|
} //namespace pdf2htmlEX
|
33
src/util/base64stream.h
Normal file
33
src/util/base64stream.h
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
/*
|
||||||
|
* Base64 Encoding
|
||||||
|
*
|
||||||
|
* by WangLu
|
||||||
|
* 2012.11.29
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef BASE64STREAM_H__
|
||||||
|
#define BASE64STREAM_H__
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
class base64stream
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
base64stream(std::istream & in) : in(&in) { }
|
||||||
|
base64stream(std::istream && in) : in(&in) { }
|
||||||
|
|
||||||
|
std::ostream & dumpto(std::ostream & out);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::istream * in;
|
||||||
|
static const char * base64_encoding;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::ostream & operator << (std::ostream & out, base64stream & bf);
|
||||||
|
std::ostream & operator << (std::ostream & out, base64stream && bf);
|
||||||
|
|
||||||
|
} //namespace pdf2htmlEX
|
||||||
|
#endif //BASE64STREAM_H__
|
39
src/util/const.cc
Normal file
39
src/util/const.cc
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
/*
|
||||||
|
* Constants
|
||||||
|
*
|
||||||
|
* by WangLu
|
||||||
|
* 2012.11.29
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "const.h"
|
||||||
|
|
||||||
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
using std::map;
|
||||||
|
using std::string;
|
||||||
|
|
||||||
|
const double ID_MATRIX[6] = {1.0, 0.0, 0.0, 1.0, 0.0, 0.0};
|
||||||
|
|
||||||
|
const map<string, string> BASE_14_FONT_CSS_FONT_MAP({
|
||||||
|
{ "Courier", "Courier,monospace" },
|
||||||
|
{ "Helvetica", "Helvetica,Arial,\"Nimbus Sans L\",sans-serif" },
|
||||||
|
{ "Times", "Times,\"Time New Roman\",\"Nimbus Roman No9 L\",serif" },
|
||||||
|
{ "Symbol", "Symbol,\"Standard Symbols L\"" },
|
||||||
|
{ "ZapfDingbats", "ZapfDingbats,\"Dingbats\"" },
|
||||||
|
});
|
||||||
|
|
||||||
|
const map<string, string> GB_ENCODED_FONT_NAME_MAP({
|
||||||
|
{"\xCB\xCE\xCC\xE5", "SimSun"},
|
||||||
|
{"\xBA\xDA\xCC\xE5", "SimHei"},
|
||||||
|
{"\xBF\xAC\xCC\xE5_GB2312", "SimKai"},
|
||||||
|
{"\xB7\xC2\xCB\xCE_GB2312", "SimFang"},
|
||||||
|
{"\xC1\xA5\xCA\xE9", "SimLi"},
|
||||||
|
});
|
||||||
|
|
||||||
|
const std::map<std::pair<std::string, bool>, std::pair<std::string, std::string> > EMBED_STRING_MAP({
|
||||||
|
{{".css", 0}, {"<link rel=\"stylesheet\" type=\"text/css\" href=\"", "\"/>"}},
|
||||||
|
{{".css", 1}, {"<style type=\"text/css\">", "</style>"}},
|
||||||
|
{{".js", 0}, {"<script type=\"text/javascript\" src=\"", "\"></script>"}},
|
||||||
|
{{".js", 1}, {"<script type=\"text/javascript\">", "</script>"}}
|
||||||
|
});
|
||||||
|
} //namespace pdf2htmlEX
|
35
src/util/const.h
Normal file
35
src/util/const.h
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
/*
|
||||||
|
* Constants
|
||||||
|
*
|
||||||
|
* by WangLu
|
||||||
|
* 2012.11.29
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef CONST_H__
|
||||||
|
#define CONST_H__
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
#ifndef nullptr
|
||||||
|
#define nullptr (NULL)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static const double EPS = 1e-6;
|
||||||
|
static const double DEFAULT_DPI = 72.0;
|
||||||
|
extern const double ID_MATRIX[6];
|
||||||
|
|
||||||
|
// PDF base 14 font name -> CSS font name
|
||||||
|
extern const std::map<std::string, std::string> BASE_14_FONT_CSS_FONT_MAP;
|
||||||
|
// For GB encoded font names
|
||||||
|
extern const std::map<std::string, std::string> GB_ENCODED_FONT_NAME_MAP;
|
||||||
|
// map to embed files into html
|
||||||
|
// key: (suffix, if_embed_content)
|
||||||
|
// value: (prefix string, suffix string)
|
||||||
|
extern const std::map<std::pair<std::string, bool>, std::pair<std::string, std::string> > EMBED_STRING_MAP;
|
||||||
|
|
||||||
|
} // namespace pdf2htmlEX
|
||||||
|
|
||||||
|
#endif //CONST_H__
|
32
src/util/math.cc
Normal file
32
src/util/math.cc
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
#include <cstring>
|
||||||
|
#include "math.h"
|
||||||
|
|
||||||
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
void tm_transform(const double * tm, double & x, double & y, bool is_delta)
|
||||||
|
{
|
||||||
|
double xx = x, yy = y;
|
||||||
|
x = tm[0] * xx + tm[2] * yy;
|
||||||
|
y = tm[1] * xx + tm[3] * yy;
|
||||||
|
if(!is_delta)
|
||||||
|
{
|
||||||
|
x += tm[4];
|
||||||
|
y += tm[5];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void tm_multiply(double * tm_left, const double * tm_right)
|
||||||
|
{
|
||||||
|
double old[4];
|
||||||
|
memcpy(old, tm_left, sizeof(old));
|
||||||
|
|
||||||
|
tm_left[0] = old[0] * tm_right[0] + old[2] * tm_right[1];
|
||||||
|
tm_left[1] = old[1] * tm_right[0] + old[3] * tm_right[1];
|
||||||
|
tm_left[2] = old[0] * tm_right[2] + old[2] * tm_right[3];
|
||||||
|
tm_left[3] = old[1] * tm_right[2] + old[3] * tm_right[3];
|
||||||
|
tm_left[4] += old[0] * tm_right[4] + old[2] * tm_right[5];
|
||||||
|
tm_left[5] += old[1] * tm_right[4] + old[3] * tm_right[5];
|
||||||
|
}
|
||||||
|
|
||||||
|
} //namespace pdf2htmlEX
|
||||||
|
|
33
src/util/math.h
Normal file
33
src/util/math.h
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
/*
|
||||||
|
* Math functions
|
||||||
|
*
|
||||||
|
* by WangLu
|
||||||
|
* 2012.11.29
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MATH_H__
|
||||||
|
#define MATH_H__
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
#include "const.h"
|
||||||
|
|
||||||
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
static inline double round(double x) { return (std::abs(x) > EPS) ? x : 0.0; }
|
||||||
|
static inline bool equal(double x, double y) { return std::abs(x-y) < EPS; }
|
||||||
|
static inline bool is_positive(double x) { return x > EPS; }
|
||||||
|
static inline bool tm_equal(const double * tm1, const double * tm2, int size = 6)
|
||||||
|
{
|
||||||
|
for(int i = 0; i < size; ++i)
|
||||||
|
if(!equal(tm1[i], tm2[i]))
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
static inline double hypot(double x, double y) { return std::sqrt(x*x+y*y); }
|
||||||
|
|
||||||
|
void tm_transform(const double * tm, double & x, double & y, bool is_delta = false);
|
||||||
|
void tm_multiply(double * tm_left, const double * tm_right);
|
||||||
|
|
||||||
|
} //namespace pdf2htmlEX
|
||||||
|
#endif //MATH_H__
|
66
src/util/misc.cc
Normal file
66
src/util/misc.cc
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
/*
|
||||||
|
* Misc functions
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* by WangLu
|
||||||
|
* 2012.08.10
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
#include "misc.h"
|
||||||
|
|
||||||
|
using std::cerr;
|
||||||
|
using std::endl;
|
||||||
|
using std::string;
|
||||||
|
using std::map;
|
||||||
|
using std::ostream;
|
||||||
|
|
||||||
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
void css_fix_rectangle_border_width(double x1, double y1,
|
||||||
|
double x2, double y2,
|
||||||
|
double border_width,
|
||||||
|
double & x, double & y, double & w, double & h,
|
||||||
|
double & border_top_bottom_width,
|
||||||
|
double & border_left_right_width)
|
||||||
|
{
|
||||||
|
w = x2 - x1;
|
||||||
|
if(w > border_width)
|
||||||
|
{
|
||||||
|
w -= border_width;
|
||||||
|
border_left_right_width = border_width;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
border_left_right_width = border_width + w/2;
|
||||||
|
w = 0;
|
||||||
|
}
|
||||||
|
x = x1 - border_width / 2;
|
||||||
|
|
||||||
|
h = y2 - y1;
|
||||||
|
if(h > border_width)
|
||||||
|
{
|
||||||
|
h -= border_width;
|
||||||
|
border_top_bottom_width = border_width;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
border_top_bottom_width = border_width + h/2;
|
||||||
|
h = 0;
|
||||||
|
}
|
||||||
|
y = y1 - border_width / 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
ostream & operator << (ostream & out, const GfxRGB & rgb)
|
||||||
|
{
|
||||||
|
auto flags= out.flags();
|
||||||
|
out << std::dec << "rgb("
|
||||||
|
<< (int)colToByte(rgb.r) << ","
|
||||||
|
<< (int)colToByte(rgb.g) << ","
|
||||||
|
<< (int)colToByte(rgb.b) << ")";
|
||||||
|
out.flags(flags);
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace pdf2htmlEX
|
37
src/util/misc.h
Normal file
37
src/util/misc.h
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
/*
|
||||||
|
* Help classes and Functions
|
||||||
|
*
|
||||||
|
* by WangLu
|
||||||
|
* 2012.08.10
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef UTIL_H__
|
||||||
|
#define UTIL_H__
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include <GfxState.h>
|
||||||
|
|
||||||
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
static inline long long hash_ref(const Ref * id)
|
||||||
|
{
|
||||||
|
return (((long long)(id->num)) << (sizeof(id->gen)*8)) | (id->gen);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In PDF, edges of the rectangle are in the middle of the borders
|
||||||
|
* In HTML, edges are completely outside the rectangle
|
||||||
|
*/
|
||||||
|
void css_fix_rectangle_border_width(double x1, double y1, double x2, double y2,
|
||||||
|
double border_width,
|
||||||
|
double & x, double & y, double & w, double & h,
|
||||||
|
double & border_top_bottom_width,
|
||||||
|
double & border_left_right_width);
|
||||||
|
|
||||||
|
std::ostream & operator << (std::ostream & out, const GfxRGB & rgb);
|
||||||
|
|
||||||
|
} // namespace pdf2htmlEX
|
||||||
|
|
||||||
|
#endif //UTIL_H__
|
@ -12,8 +12,6 @@
|
|||||||
using std::hex;
|
using std::hex;
|
||||||
using std::dec;
|
using std::dec;
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::cout;
|
|
||||||
using std::cerr;
|
|
||||||
using std::endl;
|
using std::endl;
|
||||||
using std::make_pair;
|
using std::make_pair;
|
||||||
using std::ifstream;
|
using std::ifstream;
|
73
src/util/path.cc
Normal file
73
src/util/path.cc
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
/*
|
||||||
|
* Functions manipulating filenames and paths
|
||||||
|
*
|
||||||
|
* by WangLu
|
||||||
|
* 2012.11.29
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <errno.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
#include "path.h"
|
||||||
|
|
||||||
|
using std::string;
|
||||||
|
|
||||||
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
void create_directories(const string & path)
|
||||||
|
{
|
||||||
|
if(path.empty()) return;
|
||||||
|
|
||||||
|
size_t idx = path.rfind('/');
|
||||||
|
if(idx != string::npos)
|
||||||
|
{
|
||||||
|
create_directories(path.substr(0, idx));
|
||||||
|
}
|
||||||
|
|
||||||
|
int r = mkdir(path.c_str(), S_IRWXU);
|
||||||
|
if(r != 0)
|
||||||
|
{
|
||||||
|
if(errno == EEXIST)
|
||||||
|
{
|
||||||
|
struct stat stat_buf;
|
||||||
|
if((stat(path.c_str(), &stat_buf) == 0) && S_ISDIR(stat_buf.st_mode))
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw string("Cannot create directory: ") + path;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_truetype_suffix(const string & suffix)
|
||||||
|
{
|
||||||
|
return (suffix == ".ttf") || (suffix == ".ttc") || (suffix == ".otf");
|
||||||
|
}
|
||||||
|
|
||||||
|
string get_filename (const string & path)
|
||||||
|
{
|
||||||
|
size_t idx = path.rfind('/');
|
||||||
|
if(idx == string::npos)
|
||||||
|
return path;
|
||||||
|
else if (idx == path.size() - 1)
|
||||||
|
return "";
|
||||||
|
return path.substr(idx + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
string get_suffix(const string & path)
|
||||||
|
{
|
||||||
|
string fn = get_filename(path);
|
||||||
|
size_t idx = fn.rfind('.');
|
||||||
|
if(idx == string::npos)
|
||||||
|
return "";
|
||||||
|
else
|
||||||
|
{
|
||||||
|
string s = fn.substr(idx);
|
||||||
|
for(auto iter = s.begin(); iter != s.end(); ++iter)
|
||||||
|
*iter = tolower(*iter);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} //namespace pdf2htmlEX
|
23
src/util/path.h
Normal file
23
src/util/path.h
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
/*
|
||||||
|
* Function handling filenames and paths
|
||||||
|
*
|
||||||
|
* by WangLu
|
||||||
|
* 2012.11.29
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef PATH_H__
|
||||||
|
#define PATH_H__
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
void create_directories(const std::string & path);
|
||||||
|
|
||||||
|
bool is_truetype_suffix(const std::string & suffix);
|
||||||
|
|
||||||
|
std::string get_filename(const std::string & path);
|
||||||
|
std::string get_suffix(const std::string & path);
|
||||||
|
|
||||||
|
} //namespace pdf2htmlEX
|
||||||
|
#endif //PATH_H__
|
157
src/util/unicode.cc
Normal file
157
src/util/unicode.cc
Normal file
@ -0,0 +1,157 @@
|
|||||||
|
/*
|
||||||
|
* Unicode manipulation functions
|
||||||
|
*
|
||||||
|
* by WangLu
|
||||||
|
* 2012.11.29
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <GlobalParams.h>
|
||||||
|
|
||||||
|
#include "unicode.h"
|
||||||
|
|
||||||
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
using std::cerr;
|
||||||
|
using std::endl;
|
||||||
|
using std::ostream;
|
||||||
|
|
||||||
|
bool isLegalUnicode(Unicode u)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
if((u == 9) || (u == 10) || (u == 13))
|
||||||
|
return true;
|
||||||
|
*/
|
||||||
|
|
||||||
|
if(u <= 31)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if((u >= 127) && (u <= 159))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if((u >= 0xd800) && (u <= 0xdfff))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
Unicode map_to_private(CharCode code)
|
||||||
|
{
|
||||||
|
Unicode private_mapping = (Unicode)(code + 0xE000);
|
||||||
|
if(private_mapping > 0xF8FF)
|
||||||
|
{
|
||||||
|
private_mapping = (Unicode)((private_mapping - 0xF8FF) + 0xF0000);
|
||||||
|
if(private_mapping > 0xFFFFD)
|
||||||
|
{
|
||||||
|
private_mapping = (Unicode)((private_mapping - 0xFFFFD) + 0x100000);
|
||||||
|
if(private_mapping > 0x10FFFD)
|
||||||
|
{
|
||||||
|
cerr << "Warning: all private use unicode are used" << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return private_mapping;
|
||||||
|
}
|
||||||
|
|
||||||
|
Unicode unicode_from_font (CharCode code, GfxFont * font)
|
||||||
|
{
|
||||||
|
if(!font->isCIDFont())
|
||||||
|
{
|
||||||
|
char * cname = dynamic_cast<Gfx8BitFont*>(font)->getCharName(code);
|
||||||
|
// may be untranslated ligature
|
||||||
|
if(cname)
|
||||||
|
{
|
||||||
|
Unicode ou = globalParams->mapNameToUnicode(cname);
|
||||||
|
|
||||||
|
if(isLegalUnicode(ou))
|
||||||
|
return ou;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return map_to_private(code);
|
||||||
|
}
|
||||||
|
|
||||||
|
Unicode check_unicode(Unicode * u, int len, CharCode code, GfxFont * font)
|
||||||
|
{
|
||||||
|
if(len == 0)
|
||||||
|
return map_to_private(code);
|
||||||
|
|
||||||
|
if(len == 1)
|
||||||
|
{
|
||||||
|
if(isLegalUnicode(*u))
|
||||||
|
return *u;
|
||||||
|
}
|
||||||
|
|
||||||
|
return unicode_from_font(code, font);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copied from UTF.h / UTF8.h in poppler
|
||||||
|
*/
|
||||||
|
static int mapUTF8(Unicode u, char *buf, int bufSize) {
|
||||||
|
if (u <= 0x0000007f) {
|
||||||
|
if (bufSize < 1) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
buf[0] = (char)u;
|
||||||
|
return 1;
|
||||||
|
} else if (u <= 0x000007ff) {
|
||||||
|
if (bufSize < 2) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
buf[0] = (char)(0xc0 + (u >> 6));
|
||||||
|
buf[1] = (char)(0x80 + (u & 0x3f));
|
||||||
|
return 2;
|
||||||
|
} else if (u <= 0x0000ffff) {
|
||||||
|
if (bufSize < 3) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
buf[0] = (char)(0xe0 + (u >> 12));
|
||||||
|
buf[1] = (char)(0x80 + ((u >> 6) & 0x3f));
|
||||||
|
buf[2] = (char)(0x80 + (u & 0x3f));
|
||||||
|
return 3;
|
||||||
|
} else if (u <= 0x0010ffff) {
|
||||||
|
if (bufSize < 4) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
buf[0] = (char)(0xf0 + (u >> 18));
|
||||||
|
buf[1] = (char)(0x80 + ((u >> 12) & 0x3f));
|
||||||
|
buf[2] = (char)(0x80 + ((u >> 6) & 0x3f));
|
||||||
|
buf[3] = (char)(0x80 + (u & 0x3f));
|
||||||
|
return 4;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void outputUnicodes(ostream & out, const Unicode * u, int uLen)
|
||||||
|
{
|
||||||
|
for(int i = 0; i < uLen; ++i)
|
||||||
|
{
|
||||||
|
switch(u[i])
|
||||||
|
{
|
||||||
|
case '&':
|
||||||
|
out << "&";
|
||||||
|
break;
|
||||||
|
case '\"':
|
||||||
|
out << """;
|
||||||
|
break;
|
||||||
|
case '\'':
|
||||||
|
out << "'";
|
||||||
|
break;
|
||||||
|
case '<':
|
||||||
|
out << "<";
|
||||||
|
break;
|
||||||
|
case '>':
|
||||||
|
out << ">";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
char buf[4];
|
||||||
|
auto n = mapUTF8(u[i], buf, 4);
|
||||||
|
out.write(buf, n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} //namespace pdf2htmlEX
|
41
src/util/unicode.h
Normal file
41
src/util/unicode.h
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
/*
|
||||||
|
* Unicode manipulation functions
|
||||||
|
*
|
||||||
|
* by WangLu
|
||||||
|
* 2012.11.29
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef UNICODE_H__
|
||||||
|
#define UNICODE_H__
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include <GfxFont.h>
|
||||||
|
#include <CharTypes.h>
|
||||||
|
|
||||||
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if the unicode is valid for HTML
|
||||||
|
* http://en.wikipedia.org/wiki/HTML_decimal_character_rendering
|
||||||
|
*/
|
||||||
|
bool isLegalUnicode(Unicode u);
|
||||||
|
|
||||||
|
Unicode map_to_private(CharCode code);
|
||||||
|
|
||||||
|
/* * Try to determine the Unicode value directly from the information in the font */
|
||||||
|
Unicode unicode_from_font (CharCode code, GfxFont * font);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We have to use a single Unicode value to reencode fonts
|
||||||
|
* if we got multi-unicode values, it might be expanded ligature, try to restore it
|
||||||
|
* if we cannot figure it out at the end, use a private mapping
|
||||||
|
*/
|
||||||
|
Unicode check_unicode(Unicode * u, int len, CharCode code, GfxFont * font);
|
||||||
|
|
||||||
|
void outputUnicodes(std::ostream & out, const Unicode * u, int uLen);
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace pdf2htmlEX
|
||||||
|
|
||||||
|
#endif //UNICODE_H__
|
Loading…
Reference in New Issue
Block a user