mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-10-05 19:41:40 +00:00
Merge branch 'master' of github.com:coolwanglu/pdf2htmlEX
This commit is contained in:
commit
b8e49c448b
14
AUTHORS
Normal file
14
AUTHORS
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
Deepak <iapain@gmail.com>
|
||||||
|
filodej <philode@gmail.com>
|
||||||
|
hasufell <julian.ospald@googlemail.com>
|
||||||
|
Herbert Jones <herbert@mediafire.com>
|
||||||
|
Hongliang Tian <tatetian@gmail.com>
|
||||||
|
John Hewson <john@jahewson.com>
|
||||||
|
Lu Wang <coolwanglu@gmail.com>
|
||||||
|
|
||||||
|
Packagers:
|
||||||
|
Arthur Titeica <arthur.titeica@gmail.com>
|
||||||
|
Deepak Thukral <iapain@iapa.in>
|
||||||
|
Jamie Ly <me@jamie.ly>
|
||||||
|
Lu Wang <coolwanglu@gmail.com>
|
||||||
|
|
@ -7,7 +7,7 @@ cmake_minimum_required(VERSION 2.6.0 FATAL_ERROR)
|
|||||||
|
|
||||||
include_directories(${CMAKE_SOURCE_DIR}/src)
|
include_directories(${CMAKE_SOURCE_DIR}/src)
|
||||||
|
|
||||||
set(PDF2HTMLEX_VERSION "0.6")
|
set(PDF2HTMLEX_VERSION "0.7")
|
||||||
set(ARCHIVE_NAME pdf2htmlex-${PDF2HTMLEX_VERSION})
|
set(ARCHIVE_NAME pdf2htmlex-${PDF2HTMLEX_VERSION})
|
||||||
add_custom_target(dist
|
add_custom_target(dist
|
||||||
COMMAND git archive --prefix=${ARCHIVE_NAME}/ HEAD
|
COMMAND git archive --prefix=${ARCHIVE_NAME}/ HEAD
|
||||||
@ -154,6 +154,7 @@ add_executable(pdf2htmlEX
|
|||||||
src/HTMLRenderer/TextLineBuffer.h
|
src/HTMLRenderer/TextLineBuffer.h
|
||||||
src/HTMLRenderer/TextLineBuffer.cc
|
src/HTMLRenderer/TextLineBuffer.cc
|
||||||
src/HTMLRenderer/link.cc
|
src/HTMLRenderer/link.cc
|
||||||
|
src/HTMLRenderer/outline.cc
|
||||||
src/HTMLRenderer/state.cc
|
src/HTMLRenderer/state.cc
|
||||||
src/HTMLRenderer/text.cc
|
src/HTMLRenderer/text.cc
|
||||||
src/BackgroundRenderer/BackgroundRenderer.h
|
src/BackgroundRenderer/BackgroundRenderer.h
|
||||||
|
13
ChangeLog
13
ChangeLog
@ -1,8 +1,17 @@
|
|||||||
Latest v0.6
|
Latest v0.7
|
||||||
|
|
||||||
|
* Process outline
|
||||||
|
* Fix build with poppler
|
||||||
|
* Many code cleaning jobs [John Hewson]
|
||||||
|
|
||||||
|
v0.6
|
||||||
|
2013.01.26
|
||||||
|
|
||||||
|
* new option --no-drm [John Hewson]
|
||||||
|
* Travis CI integration [John Hewson]
|
||||||
* Add a class for 'left'
|
* Add a class for 'left'
|
||||||
* Fixed a bug of hashing/finding GfxRGB
|
* Fixed a bug of hashing/finding GfxRGB
|
||||||
* new option -v, --version [Thanks to John Hewson]
|
* new option -v, --version [John Hewson]
|
||||||
* Render Type 3 fonts as image
|
* Render Type 3 fonts as image
|
||||||
* New parameter: --use-cropbox
|
* New parameter: --use-cropbox
|
||||||
* Progress indicator
|
* Progress indicator
|
||||||
|
62
README.md
62
README.md
@ -43,42 +43,26 @@ Readers can also be benefitted
|
|||||||
- Color
|
- Color
|
||||||
- Transformation
|
- Transformation
|
||||||
* Links
|
* Links
|
||||||
|
* Outline
|
||||||
* [EXPERIMENTAL] Path drawing with CSS
|
* [EXPERIMENTAL] Path drawing with CSS
|
||||||
- Orthogonal lines
|
- Orthogonal lines
|
||||||
- Rectangles
|
- Rectangles
|
||||||
- Linear gradients
|
- Linear gradients
|
||||||
* Not fully supported, and rendered as images
|
* Not fully supported (Rendered as images)
|
||||||
- Type 3 fonts
|
- Type 3 fonts
|
||||||
- Non-text object
|
- Non-text object
|
||||||
|
|
||||||
## Get started
|
## Get started
|
||||||
|
|
||||||
### Ubuntu
|
### Install
|
||||||
|
|
||||||
[PPA](https://launchpad.net/~coolwanglu/+archive/pdf2htmlex), which is not so up-to-date.
|
Thanks to all packagers!
|
||||||
|
|
||||||
### ArchLinux
|
|
||||||
|
|
||||||
[AUR Package](https://aur.archlinux.org/packages.php?ID=62426), special thanks to Arthur Titeica <arthur.titeica@gmail.com>
|
|
||||||
|
|
||||||
### Gentoo
|
|
||||||
|
|
||||||
Install through Overlay gentoo-zh, mrueg or sunrise, thanks to the packagers.
|
|
||||||
|
|
||||||
### Mac
|
|
||||||
|
|
||||||
[Homebrew Formula](https://github.com/jamiely/homebrew/blob/pdf2htmlex/Library/Formula/pdf2htmlex.rb), special thanks to Jamie Ly <me@jamie.ly>
|
|
||||||
|
|
||||||
[Macports (local repo)](https://github.com/iapain/pdf2htmlEX-macport), special thanks to Deepak Thukral <iapain@iapa.in>
|
|
||||||
|
|
||||||
### Windows
|
|
||||||
|
|
||||||
The code may be built with Cygwin.
|
|
||||||
|
|
||||||
Or with MinGW with some modifications.
|
|
||||||
|
|
||||||
More info can be found on [the pdf2htmlEX page in TeX Wiki](http://oku.edu.mie-u.ac.jp/~okumura/texwiki/?pdf2htmlEX) (in Japanese), special thanks to Haruhiko Okumura
|
|
||||||
|
|
||||||
|
* [Ubuntu PPA](https://launchpad.net/~coolwanglu/+archive/pdf2htmlex) by Lu Wang <coolwanglu@gmail.com>, not always up-to-date.
|
||||||
|
* [ArchLinux AUR](https://aur.archlinux.org/packages.php?ID=62426) by Arthur Titeica <arthur.titeica@gmail.com>
|
||||||
|
* [Gentoo Overlay](http://gpo.zugaina.org/app-text/pdf2htmlex), gentoo-zh, mrueg or sunrise, by respective packagers.
|
||||||
|
* [Homebrew Formula](https://github.com/jamiely/homebrew/blob/pdf2htmlex/Library/Formula/pdf2htmlex.rb) by Jamie Ly <me@jamie.ly>
|
||||||
|
* [Macports (local repo)](https://github.com/iapain/pdf2htmlEX-macport) by Deepak Thukral <iapain@iapa.in>
|
||||||
|
|
||||||
### Build from source
|
### Build from source
|
||||||
|
|
||||||
@ -96,6 +80,10 @@ More info can be found on [the pdf2htmlEX page in TeX Wiki](http://oku.edu.mie-u
|
|||||||
* git version is recommended to avoid annoying compilation issues
|
* git version is recommended to avoid annoying compilation issues
|
||||||
* [Optional] **ttfautohint**
|
* [Optional] **ttfautohint**
|
||||||
* run pdf2htmlEX with **--external-hint-tool=ttfautohint** to enable it
|
* run pdf2htmlEX with **--external-hint-tool=ttfautohint** to enable it
|
||||||
|
* [For Windows]
|
||||||
|
* Cygwin
|
||||||
|
* or MinGW, with some modifications to pdf2htmlEX. See [pdf2htmlEX on TeX Wiki](http://oku.edu.mie-u.ac.jp/~okumura/texwiki/?pdf2htmlEX) (in Japanese), special thanks to Haruhiko Okumura
|
||||||
|
|
||||||
|
|
||||||
#### Compiling
|
#### Compiling
|
||||||
|
|
||||||
@ -106,9 +94,7 @@ More info can be found on [the pdf2htmlEX page in TeX Wiki](http://oku.edu.mie-u
|
|||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
pdf2htmlEX /path/to/foobar.pdf
|
pdf2htmlEX /path/to/foobar.pdf
|
||||||
|
|
||||||
pdf2htmlEX --help
|
pdf2htmlEX --help
|
||||||
|
|
||||||
man pdf2htmlEX
|
man pdf2htmlEX
|
||||||
|
|
||||||
## FAQ
|
## FAQ
|
||||||
@ -131,6 +117,16 @@ GPLv2 & GPLv3 Dual licensed
|
|||||||
|
|
||||||
### [**Donate Now**](http://coolwanglu.github.com/pdf2htmlEX/donate.html)
|
### [**Donate Now**](http://coolwanglu.github.com/pdf2htmlEX/donate.html)
|
||||||
|
|
||||||
|
## Contact
|
||||||
|
|
||||||
|
* Mailing list <pdf2htmlex@googlegroups.com>
|
||||||
|
* Please read `man pdf2htmlEX` and [**FAQ**](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ) before sending emails. Or your message might be ignored.
|
||||||
|
* Please use the **latest master branch**.
|
||||||
|
|
||||||
|
* Lu Wang <coolwanglu@gmail.com>
|
||||||
|
* Please use the mailing list above unless for personal enquiries.
|
||||||
|
* Accepting messages in **Chinese**, **English** or **Japanese**.
|
||||||
|
|
||||||
## Acknowledge
|
## Acknowledge
|
||||||
|
|
||||||
pdf2htmlEX is made possible thanks to the following projects:
|
pdf2htmlEX is made possible thanks to the following projects:
|
||||||
@ -147,18 +143,6 @@ pdf2htmlEX is inspired by the following projects:
|
|||||||
* Crocodoc
|
* Crocodoc
|
||||||
* Google Doc
|
* Google Doc
|
||||||
|
|
||||||
|
|
||||||
## Contact
|
|
||||||
|
|
||||||
* Mailing list <pdf2htmlex@googlegroups.com>
|
|
||||||
* Please read [**FAQ**](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ) before sending emails. Or your message might be ignored.
|
|
||||||
* Please use the **latest master branch**.
|
|
||||||
|
|
||||||
* Lu Wang <coolwanglu@gmail.com>
|
|
||||||
* Please use the mailing list above unless for personal enquiries.
|
|
||||||
* Accepting messages in **Chinese**, **English** or **Japanese**.
|
|
||||||
|
|
||||||
|
|
||||||
### Special Thanks
|
### Special Thanks
|
||||||
|
|
||||||
* Hongliang Tian <tatetian@gmail.com>
|
* Hongliang Tian <tatetian@gmail.com>
|
||||||
|
3
TODO
3
TODO
@ -1,6 +1,3 @@
|
|||||||
word space/offset before the first letter (calendar pdf)
|
|
||||||
add class for "left"
|
|
||||||
|
|
||||||
== Future: ==
|
== Future: ==
|
||||||
|
|
||||||
Too difficult/complicated to implement:
|
Too difficult/complicated to implement:
|
||||||
|
@ -5,7 +5,7 @@ Dirty script for building package for PPA
|
|||||||
by WangLu
|
by WangLu
|
||||||
2011.01.13
|
2011.01.13
|
||||||
|
|
||||||
modified by pdf2htmlEX
|
modified for pdf2htmlEX
|
||||||
2012.08.28
|
2012.08.28
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
19
debian/changelog
vendored
19
debian/changelog
vendored
@ -1,7 +1,24 @@
|
|||||||
|
pdf2htmlex (0.7-1~git201301292229r2595c-0ubuntu1) quantal; urgency=low
|
||||||
|
|
||||||
|
* Fixed a CSS issue
|
||||||
|
|
||||||
|
-- WANG Lu <coolwanglu@gmail.com> Tue, 29 Jan 2013 22:29:21 +0800
|
||||||
|
|
||||||
|
pdf2htmlex (0.7-1~git201301282229r2595c-0ubuntu1) quantal; urgency=low
|
||||||
|
|
||||||
|
* Process PDF Outline
|
||||||
|
|
||||||
|
-- WANG Lu <coolwanglu@gmail.com> Mon, 28 Jan 2013 22:29:35 +0800
|
||||||
|
|
||||||
|
pdf2htmlex (0.7-1~git201301261427r2595c-0ubuntu1) quantal; urgency=low
|
||||||
|
|
||||||
|
* New version, see Changelog for changelog
|
||||||
|
|
||||||
|
-- WANG Lu <coolwanglu@gmail.com> Sat, 26 Jan 2013 14:27:18 +0800
|
||||||
|
|
||||||
pdf2htmlex (0.6-1~git201212182148rd76af-0ubuntu1) quantal; urgency=low
|
pdf2htmlex (0.6-1~git201212182148rd76af-0ubuntu1) quantal; urgency=low
|
||||||
|
|
||||||
* fix dependency of poppler for quantal
|
* fix dependency of poppler for quantal
|
||||||
*
|
|
||||||
|
|
||||||
-- WANG Lu <coolwanglu@gmail.com> Tue, 18 Dec 2012 21:48:35 +0800
|
-- WANG Lu <coolwanglu@gmail.com> Tue, 18 Dec 2012 21:48:35 +0800
|
||||||
|
|
||||||
|
193
pdf2htmlEX.1.in
193
pdf2htmlEX.1.in
@ -1,4 +1,4 @@
|
|||||||
.TH pdf2htmlEX 1 "Aug 31, 2012" "pdf2htmlEX 0.1"
|
.TH pdf2htmlEX 1 "pdf2htmlEX @PDF2HTMLEX_VERSION@"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
.PP
|
.PP
|
||||||
.nf
|
.nf
|
||||||
@ -22,59 +22,73 @@ Fonts are extracted form PDF and then embedded into HTML (Type 3 fonts are not s
|
|||||||
Other objects are rendered as images and also embedded.
|
Other objects are rendered as images and also embedded.
|
||||||
|
|
||||||
.SH OPTIONS
|
.SH OPTIONS
|
||||||
.TP
|
|
||||||
.B --help
|
.SS Pages
|
||||||
Show all options
|
|
||||||
.TP
|
|
||||||
.B -v, --version
|
|
||||||
Show copyright and version
|
|
||||||
.TP
|
|
||||||
.B -o, --owner-password <password>
|
|
||||||
Specify owner password
|
|
||||||
.TP
|
|
||||||
.B -u, --user-password <password>
|
|
||||||
Specify user password
|
|
||||||
.TP
|
|
||||||
.B --no-drm <0|1> (Default: 0)
|
|
||||||
Override document DRM settings
|
|
||||||
.TP
|
|
||||||
.B --dest-dir <dir> (Default: .)
|
|
||||||
Specify destination folder
|
|
||||||
.TP
|
|
||||||
.B --data-dir <dir> (Default: @CMAKE_INSTALL_PREFIX@/share/pdf2htmlEX)
|
|
||||||
Specify the folder holding the manifest and other files
|
|
||||||
.TP
|
.TP
|
||||||
.B -f, --first-page <num> (Default: 1)
|
.B -f, --first-page <num> (Default: 1)
|
||||||
Specify the first page to process
|
Specify the first page to process
|
||||||
|
|
||||||
.TP
|
.TP
|
||||||
.B -l, --last-page <num> (Default: last page)
|
.B -l, --last-page <num> (Default: last page)
|
||||||
Specify the last page to process
|
Specify the last page to process
|
||||||
.TP
|
|
||||||
|
.SS Dimensions
|
||||||
|
|
||||||
.B --zoom <ratio>, --fit-width <width>, --fit-height <height>
|
.B --zoom <ratio>, --fit-width <width>, --fit-height <height>
|
||||||
--zoom specifies the zoom factor directly; --fit-width/height specifies the maximum width/height of a page, the values are in pixels.
|
--zoom specifies the zoom factor directly; --fit-width/height specifies the maximum width/height of a page, the values are in pixels.
|
||||||
|
|
||||||
If multiple values are specified, the minimum one will be used.
|
If multiple values are specified, the minimum one will be used.
|
||||||
|
|
||||||
If none is specified, pages will be rendered as 72DPI.
|
If none is specified, pages will be rendered as 72DPI.
|
||||||
.TP
|
|
||||||
.B --hdpi <dpi>, --vdpi <dpi> (Default: 144)
|
|
||||||
Specify the horizontal and vertical DPI for images
|
|
||||||
.TP
|
.TP
|
||||||
.B --use-cropbox <0|1> (Default: 0)
|
.B --use-cropbox <0|1> (Default: 0)
|
||||||
Use CropBox instead of MediaBox for output.
|
Use CropBox instead of MediaBox for output.
|
||||||
|
|
||||||
.TP
|
.TP
|
||||||
.B --process-nontext <0|1> (Default: 1)
|
.B --hdpi <dpi>, --vdpi <dpi> (Default: 144)
|
||||||
Whether to process non-text objects (as images)
|
Specify the horizontal and vertical DPI for images
|
||||||
|
|
||||||
|
|
||||||
|
.SS Output Files
|
||||||
|
|
||||||
.TP
|
.TP
|
||||||
.B --single-html <0|1> (Default: 1)
|
.B --single-html <0|1> (Default: 1)
|
||||||
Whether to embed everything into one HTML file.
|
Whether to embed everything into one HTML file.
|
||||||
|
|
||||||
If switched off, there will be several files generated along with the HTML file including files for fonts, css, images.
|
If switched off, there will be several files generated along with the HTML file including files for fonts, css, images.
|
||||||
|
|
||||||
|
Note that the outline will always be embedded into the main HTML file no matter if this switch is on or not.
|
||||||
|
And only when this switch is off will there be a separate .outline file contains the outline.
|
||||||
|
You need to modify the manifest if you do not want outline embedded.
|
||||||
|
|
||||||
.TP
|
.TP
|
||||||
.B --split-pages <0|1> (Default: 0)
|
.B --split-pages <0|1> (Default: 0)
|
||||||
If turned on, each page is saved in a separated files, also the generated css file will be store separatedly as if single-html=0
|
If turned on, pages will be stored into separated files named as <output-filename>0.page, <output-filename>1.page, ...
|
||||||
|
|
||||||
|
Also the css and outline will be stored into separated files, and the will be no <output-filename>.html generated.
|
||||||
|
|
||||||
|
This switch is useful if you want pages to be loaded separately & dynamically -- in which case you need to compose the page yourself, and a supporting backend might be necessary.
|
||||||
|
|
||||||
|
.TP
|
||||||
|
.B --dest-dir <dir> (Default: .)
|
||||||
|
Specify destination folder
|
||||||
|
|
||||||
|
.TP
|
||||||
|
.B --css-filename <filename> (Default: <none>)
|
||||||
|
Specify the filename of the generated css file, if not embedded.
|
||||||
|
|
||||||
|
If it's empty, the file name will be determined automatically.
|
||||||
|
|
||||||
|
.TP
|
||||||
|
.B --outline-filename <filename> (Default: <none>)
|
||||||
|
Specify the filename of the generated outline file, if not embedded.
|
||||||
|
|
||||||
|
If it's empty, the file name will be determined automatically.
|
||||||
|
|
||||||
|
.SS Fonts
|
||||||
|
|
||||||
The output files will be named as <output-filename>0.page, <output-filename>1.page, ...
|
|
||||||
.TP
|
.TP
|
||||||
.B --embed-base-font <0|1> (Default: 1)
|
.B --embed-base-font <0|1> (Default: 1)
|
||||||
Whether to embed base 14 fonts.
|
Whether to embed base 14 fonts.
|
||||||
@ -82,20 +96,55 @@ Whether to embed base 14 fonts.
|
|||||||
There are several base font defined in PDF standards, which are supposed to be provided by the PDF reader.
|
There are several base font defined in PDF standards, which are supposed to be provided by the PDF reader.
|
||||||
|
|
||||||
If this switch is on, local matched font will be used and embedded; otherwise only font names are exported such that web browsers may try to find proper fonts themselves.
|
If this switch is on, local matched font will be used and embedded; otherwise only font names are exported such that web browsers may try to find proper fonts themselves.
|
||||||
|
|
||||||
.TP
|
.TP
|
||||||
.B --embed-external-font <0|1> (Default: 0)
|
.B --embed-external-font <0|1> (Default: 0)
|
||||||
Similar as above but for non-base fonts.
|
Similar as above but for non-base fonts.
|
||||||
|
|
||||||
|
.TP
|
||||||
|
.B --font-suffix <suffix> (Default: .ttf)
|
||||||
|
Specify the suffix of fonts extracted from the PDF file.
|
||||||
|
|
||||||
.TP
|
.TP
|
||||||
.B --decompose-ligature <0|1> (Default: 0)
|
.B --decompose-ligature <0|1> (Default: 0)
|
||||||
Decompose ligatures. For example 'fi' -> 'f''i'.
|
Decompose ligatures. For example 'fi' -> 'f''i'.
|
||||||
|
|
||||||
|
.TP
|
||||||
|
.B --remove-unused-glyph <0|1> (Default: 1)
|
||||||
|
If set to 1, remove unused glyphs in embedded fonts in order to reduce the file size.
|
||||||
|
|
||||||
|
.TP
|
||||||
|
.B --auto-hint <0|1> (Default: 0)
|
||||||
|
If set to 1, hints will be generated for the fonts using fontforge.
|
||||||
|
|
||||||
|
This may be preceded by --external-hint-tool.
|
||||||
|
|
||||||
|
.TP
|
||||||
|
.B --external-hint-tool <tool> (Default: <none>)
|
||||||
|
If specified, the tool will be called in order to enhanced hinting for fonts, this will precede --auto-hint.
|
||||||
|
|
||||||
|
The tool will be called as '<tool> <in.suffix> <out.suffix>', where suffix will be the same as specified for --font-suffix.
|
||||||
|
|
||||||
|
.TP
|
||||||
|
.B --stretch-narrow-glyph <0|1> (Default: 0)
|
||||||
|
If set to 1, glyphs narrower than described in PDF will be stretched; otherwise space will be padded to the right of the glyphs
|
||||||
|
|
||||||
|
.TP
|
||||||
|
.B --squeeze-wide-glyph <0|1> (Default: 1)
|
||||||
|
If set to 1, glyphs wider than described in PDF will be squeezed; otherwise it will be truncated.
|
||||||
|
|
||||||
|
.SS Text
|
||||||
|
|
||||||
.TP
|
.TP
|
||||||
.B --heps <len>, --veps <len> (Default: 1)
|
.B --heps <len>, --veps <len> (Default: 1)
|
||||||
Specify the maximum tolerable horizontal/vertical offset (in pixels).
|
Specify the maximum tolerable horizontal/vertical offset (in pixels).
|
||||||
|
|
||||||
pdf2htmlEX would try to optimize the generated HTML file moving Text within this distance.
|
pdf2htmlEX would try to optimize the generated HTML file moving Text within this distance.
|
||||||
|
|
||||||
.TP
|
.TP
|
||||||
.B --space-threshold <ratio> (Default: 1.0/6)
|
.B --space-threshold <ratio> (Default: 1.0/6)
|
||||||
pdf2htmlEX would insert a whitespace character ' ' if the distance between two consecutive letters in the same line is wider than ratio * font_size.
|
pdf2htmlEX would insert a whitespace character ' ' if the distance between two consecutive letters in the same line is wider than ratio * font_size.
|
||||||
|
|
||||||
.TP
|
.TP
|
||||||
.B --font-size-multiplier <ratio> (Default: 4.0)
|
.B --font-size-multiplier <ratio> (Default: 4.0)
|
||||||
Many web browsers limit the minimum font size, and many would round the given font size, which results in incorrect rendering.
|
Many web browsers limit the minimum font size, and many would round the given font size, which results in incorrect rendering.
|
||||||
@ -103,11 +152,13 @@ Many web browsers limit the minimum font size, and many would round the given fo
|
|||||||
Specify a ratio greater than 1 would resolve this issue, however it might freeze some browsers.
|
Specify a ratio greater than 1 would resolve this issue, however it might freeze some browsers.
|
||||||
|
|
||||||
For some versions of Firefox, however, there will be a problem when the font size is too large, in which case a smaller value should be specified here.
|
For some versions of Firefox, however, there will be a problem when the font size is too large, in which case a smaller value should be specified here.
|
||||||
.TP
|
|
||||||
.B --auto-hint <0|1> (Default: 0)
|
|
||||||
If set to 1, hints will be generated for the fonts using fontforge.
|
|
||||||
|
|
||||||
This may be preceded by --external-hint-tool.
|
.TP
|
||||||
|
.B --space-as-offset <0|1> (Default: 0)
|
||||||
|
Treat space characters as offsets, which may increase the size of the output.
|
||||||
|
|
||||||
|
Turn it on if space characters are not displayed correctly, or you want to remove positional spaces.
|
||||||
|
|
||||||
.TP
|
.TP
|
||||||
.B --tounicode <-1|0|1> (Default: 0)
|
.B --tounicode <-1|0|1> (Default: 0)
|
||||||
A ToUnicode map may be provided for each font in PDF which indicates the 'meaning' of the characters. However often there is better "ToUnicode" info in Type 0/1 fonts, and sometimes the ToUnicode map provided is wrong.
|
A ToUnicode map may be provided for each font in PDF which indicates the 'meaning' of the characters. However often there is better "ToUnicode" info in Type 0/1 fonts, and sometimes the ToUnicode map provided is wrong.
|
||||||
@ -117,40 +168,62 @@ If this value is set to 1, the ToUnicode Map is always applied, if provided in P
|
|||||||
If set to -1, a customized map is used such that rendering will be correct in HTML (visually the same), but you may not get correct characters by select & copy & paste.
|
If set to -1, a customized map is used such that rendering will be correct in HTML (visually the same), but you may not get correct characters by select & copy & paste.
|
||||||
|
|
||||||
If set to 0, pdf2htmlEX would try its best to balance the two methods above.
|
If set to 0, pdf2htmlEX would try its best to balance the two methods above.
|
||||||
.TP
|
|
||||||
.B --space-as-offset <0|1> (Default: 0)
|
|
||||||
Treat space characters as offsets, which may increase the size of the output.
|
|
||||||
|
|
||||||
Turn it on if space characters are not displayed correctly, or you want to remove positional spaces.
|
.SS PDF Protection
|
||||||
.TP
|
|
||||||
.B --stretch-narrow-glyph <0|1> (Default: 0)
|
|
||||||
If set to 1, glyphs narrower than described in PDF will be stretched; otherwise space will be padded to the right of the glyphs
|
|
||||||
.TP
|
|
||||||
.B --squeeze-wide-glyph <0|1> (Default: 1)
|
|
||||||
If set to 1, glyphs wider than described in PDF will be squeezed; otherwise it will be truncated.
|
|
||||||
.TP
|
|
||||||
.B --remove-unused-glyph <0|1> (Default: 1)
|
|
||||||
If set to 1, remove unused glyphs in embedded fonts in order to reduce the file size.
|
|
||||||
.TP
|
|
||||||
.B --font-suffix <suffix> (Default: .ttf), --font-format <format> (Default: truetype)
|
|
||||||
Specify the suffix and format of fonts extracted from the PDF file. They should be consistent.
|
|
||||||
.TP
|
|
||||||
.B --external-hint-tool <tool> (Default: <none>)
|
|
||||||
If specified, the tool will be called in order to enhanced hinting for fonts, this will precede --auto-hint.
|
|
||||||
|
|
||||||
The tool will be called as '<tool> <in.suffix> <out.suffix>', where suffix will be the same as specified for --font-suffix.
|
|
||||||
.TP
|
.TP
|
||||||
.B --css-filename <filename> (Default: <none>)
|
.B -o, --owner-password <password>
|
||||||
Specify the filename of the generated css file, if not embedded.
|
Specify owner password
|
||||||
|
|
||||||
If it's empty, the file name will be determined automatically.
|
|
||||||
.TP
|
.TP
|
||||||
.B --debug <0|1> (Default: 0)
|
.B -u, --user-password <password>
|
||||||
Show debug information.
|
Specify user password
|
||||||
|
|
||||||
|
.TP
|
||||||
|
.B --no-drm <0|1> (Default: 0)
|
||||||
|
Override document DRM settings
|
||||||
|
|
||||||
|
.SS Misc.
|
||||||
|
|
||||||
.TP
|
.TP
|
||||||
.B --clean-tmp <0|1> (Default: 1)
|
.B --clean-tmp <0|1> (Default: 1)
|
||||||
If switched off, intermediate files won't be cleaned in the end.
|
If switched off, intermediate files won't be cleaned in the end.
|
||||||
|
|
||||||
|
.TP
|
||||||
|
.B --process-nontext <0|1> (Default: 1)
|
||||||
|
Whether to process non-text objects (as images)
|
||||||
|
|
||||||
|
.TP
|
||||||
|
.B --data-dir <dir> (Default: @CMAKE_INSTALL_PREFIX@/share/pdf2htmlEX)
|
||||||
|
Specify the folder holding the manifest and other files (see below for the manifest file)`
|
||||||
|
|
||||||
|
.TP
|
||||||
|
.B --css-draw <0|1> (Default: 0)
|
||||||
|
Experimental and unsupported CSS drawing
|
||||||
|
|
||||||
|
.TP
|
||||||
|
.B --debug <0|1> (Default: 0)
|
||||||
|
Print debug information.
|
||||||
|
|
||||||
|
.SS Meta
|
||||||
|
|
||||||
|
.TP
|
||||||
|
.B -v, --version
|
||||||
|
Print copyright and version info
|
||||||
|
|
||||||
|
.TP
|
||||||
|
.B --help
|
||||||
|
Print usage information
|
||||||
|
|
||||||
|
.SH MANIFEST and DATA-DIR
|
||||||
|
When split-pages is 0, the manifest file describes how the final html page should be generated.
|
||||||
|
|
||||||
|
By default, pdf2htmlEX will use the manifest in the default data-dir (run `pdf2htmlEX -v` to check), which gives a simple demo of its syntax.
|
||||||
|
|
||||||
|
You can modify the default one, or you can create a new one and specify the correct data-dir in the command line.
|
||||||
|
|
||||||
|
When single-html is 1, all files referred by the manifest must be located in the data-dir.
|
||||||
|
|
||||||
.SH EXAMPLE
|
.SH EXAMPLE
|
||||||
.TP
|
.TP
|
||||||
.B pdf2htmlEX /path/to/file.pdf
|
.B pdf2htmlEX /path/to/file.pdf
|
||||||
@ -164,7 +237,7 @@ Convert file.pdf into out/file.html and leave font/image files separated.
|
|||||||
|
|
||||||
.SH COPYRIGHT
|
.SH COPYRIGHT
|
||||||
.PP
|
.PP
|
||||||
Copyright 2012 Lu Wang <coolwanglu@gmail.com>
|
Copyright 2012,2013 Lu Wang <coolwanglu@gmail.com>
|
||||||
|
|
||||||
pdf2htmlEX is GPLv2 & GPLv3 dual licensed
|
pdf2htmlEX is GPLv2 & GPLv3 dual licensed
|
||||||
|
|
||||||
|
@ -1,19 +1,63 @@
|
|||||||
/* Base CSS */
|
/* Base CSS */
|
||||||
/* Copyright 2012 Lu Wang <coolwanglu@gmail.com> */
|
/* Copyright 2012 Lu Wang <coolwanglu@gmail.com> */
|
||||||
#pdf-main { /* PDF container */
|
#pdf-outline { /* PDF Outline */
|
||||||
position:absolute;
|
position:absolute;
|
||||||
top:0;
|
top:0;
|
||||||
left:0;
|
left:0;
|
||||||
bottom:0;
|
bottom:0;
|
||||||
|
width:193px;
|
||||||
|
overflow:auto;
|
||||||
|
margin:0px;
|
||||||
|
padding:0 0 0 7px;
|
||||||
|
background-color:#707070;
|
||||||
|
display:none;
|
||||||
|
}
|
||||||
|
#pdf-outline.opened {
|
||||||
|
display:block;
|
||||||
|
}
|
||||||
|
#pdf-outline ul {
|
||||||
|
margin-left:13px;
|
||||||
|
margin-right:3px;
|
||||||
|
padding-left:3px;
|
||||||
|
}
|
||||||
|
#pdf-outline li {
|
||||||
|
list-style-type:disc;
|
||||||
|
list-style-position:outside;
|
||||||
|
}
|
||||||
|
#pdf-outline a {
|
||||||
|
font-size:13px;
|
||||||
|
color:#e8e8e8;
|
||||||
|
}
|
||||||
|
#pdf-outline a:visited {
|
||||||
|
color:#e8e8e8;
|
||||||
|
}
|
||||||
|
#pdf-outline a:hover{
|
||||||
|
color:#e8e8e8;
|
||||||
|
}
|
||||||
|
#pdf-outline a:active{
|
||||||
|
color:#e8e8e8;
|
||||||
|
}
|
||||||
|
#pdf-main { /* PDF container */
|
||||||
|
position:absolute;
|
||||||
|
top:0;
|
||||||
|
left:0px;
|
||||||
|
bottom:0;
|
||||||
right:0;
|
right:0;
|
||||||
overflow:auto;
|
overflow:auto;
|
||||||
background-color:grey;
|
background-color:#808080;
|
||||||
/* margin & border-width have to be 0,
|
/* margin & border-width have to be 0,
|
||||||
* otherwise pdf2htmlEX may not calculate the coordinates correctly
|
* otherwise pdf2htmlEX may not calculate the coordinates correctly
|
||||||
*/
|
*/
|
||||||
margin:0;
|
margin:0;
|
||||||
border-width:0;
|
border-width:0;
|
||||||
}
|
}
|
||||||
|
#pdf-outline.opened + #pdf-main {
|
||||||
|
left:200px;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* The followings are base classes, which are meant to be override by PDF specific classes
|
||||||
|
* So do not increase the specificity
|
||||||
|
*/
|
||||||
.d { /* page decoration */
|
.d { /* page decoration */
|
||||||
position:relative;
|
position:relative;
|
||||||
margin: 13px auto;
|
margin: 13px auto;
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
# manifest
|
# pdf2htmlEX manifest
|
||||||
# by WangLu
|
# by WangLu
|
||||||
# 2012.09.12
|
# 2012.09.12
|
||||||
#
|
#
|
||||||
@ -22,26 +22,52 @@
|
|||||||
<meta name="generator" content="pdf2htmlEX"/>
|
<meta name="generator" content="pdf2htmlEX"/>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# base CSS styles
|
||||||
@base.css
|
@base.css
|
||||||
|
# PDF specific CSS styles
|
||||||
$css
|
$css
|
||||||
|
# necessary Javascript codes
|
||||||
@jquery.js
|
@jquery.js
|
||||||
@pdf2htmlEX.js
|
@pdf2htmlEX.js
|
||||||
|
|
||||||
|
# entry point of pdf2htmlEX
|
||||||
"""
|
"""
|
||||||
<script type="text/javascript">
|
<script type="text/javascript">
|
||||||
new pdf2htmlEX.Viewer('pdf-main');
|
new pdf2htmlEX.Viewer('pdf-main', 'pdf-outline');
|
||||||
</script>
|
</script>
|
||||||
|
"""
|
||||||
|
|
||||||
|
"""
|
||||||
<title></title>
|
<title></title>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div id="pdf-main">
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
$pages
|
# The container of outline
|
||||||
|
# By default this is hidden, pdf2htmlEX.js will add the 'opened' class if it is not empty
|
||||||
|
# You can add a class 'opened' here if you want it always opened or you don't use pdf2htmlEX.js
|
||||||
|
# e.g.
|
||||||
|
# <div id="pdf-outline" class="opened">
|
||||||
|
"""
|
||||||
|
<div id="pdf-outline">
|
||||||
|
"""
|
||||||
|
$outline
|
||||||
"""
|
"""
|
||||||
</div>
|
</div>
|
||||||
|
"""
|
||||||
|
|
||||||
|
# The container of PDF pages
|
||||||
|
# check base.css for an example and requirements of its CSS styles
|
||||||
|
"""
|
||||||
|
<div id="pdf-main">
|
||||||
|
"""
|
||||||
|
$pages
|
||||||
|
"""
|
||||||
|
</div>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
"""
|
"""
|
||||||
|
@ -30,7 +30,7 @@ var pdf2htmlEX = (function(){
|
|||||||
,ctm[1] * pos[0] + ctm[3] * pos[1] + ctm[5]];
|
,ctm[1] * pos[0] + ctm[3] * pos[1] + ctm[5]];
|
||||||
};
|
};
|
||||||
var Page = function(page, container) {
|
var Page = function(page, container) {
|
||||||
if(page == undefined) return undefined;
|
if(page == undefined) return;
|
||||||
|
|
||||||
this.p = $(page);
|
this.p = $(page);
|
||||||
this.n = parseInt(this.p.attr('data-page-no'), 16);
|
this.n = parseInt(this.p.attr('data-page-no'), 16);
|
||||||
@ -94,8 +94,9 @@ var pdf2htmlEX = (function(){
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
pdf2htmlEX.Viewer = function(container_id) {
|
pdf2htmlEX.Viewer = function(container_id, outline_id) {
|
||||||
this.container_id = container_id;
|
this.container_id = container_id;
|
||||||
|
this.outline_id = outline_id;
|
||||||
this.init_before_loading_content();
|
this.init_before_loading_content();
|
||||||
|
|
||||||
var _ = this;
|
var _ = this;
|
||||||
@ -113,8 +114,14 @@ var pdf2htmlEX = (function(){
|
|||||||
},
|
},
|
||||||
|
|
||||||
init_after_loading_content : function() {
|
init_after_loading_content : function() {
|
||||||
|
this.outline = $('#'+this.outline_id);
|
||||||
this.container = $('#'+this.container_id);
|
this.container = $('#'+this.container_id);
|
||||||
|
|
||||||
|
// need a better design
|
||||||
|
if(this.outline.children().length > 0) {
|
||||||
|
this.outline.addClass('opened');
|
||||||
|
}
|
||||||
|
|
||||||
var new_pages = new Array();
|
var new_pages = new Array();
|
||||||
var pl= $('.p', this.container);
|
var pl= $('.p', this.container);
|
||||||
/* don't use for(..in..) */
|
/* don't use for(..in..) */
|
||||||
@ -129,7 +136,10 @@ var pdf2htmlEX = (function(){
|
|||||||
|
|
||||||
//this.zoom_fixer();
|
//this.zoom_fixer();
|
||||||
|
|
||||||
this.container.on('click', '.a', this, this.annot_link_handler);
|
// used by outline/annot_link etc
|
||||||
|
// note that one is for the class 'a' and the other is for the tag 'a'
|
||||||
|
this.container.on('click', '.a', this, this.link_handler);
|
||||||
|
this.outline.on('click', 'a', this, this.link_handler);
|
||||||
|
|
||||||
this.render();
|
this.render();
|
||||||
},
|
},
|
||||||
@ -228,18 +238,24 @@ var pdf2htmlEX = (function(){
|
|||||||
|
|
||||||
get_containing_page : function(obj) {
|
get_containing_page : function(obj) {
|
||||||
/* get the page obj containing obj */
|
/* get the page obj containing obj */
|
||||||
return this.pages[(new Page(obj.closest('.p')[0])).n];
|
var p = obj.closest('.p')[0];
|
||||||
|
return p && this.pages[(new Page(p)).n];
|
||||||
},
|
},
|
||||||
|
|
||||||
annot_link_handler : function (e) {
|
link_handler : function (e) {
|
||||||
var _ = e.data;
|
var _ = e.data;
|
||||||
var t = $(e.currentTarget);
|
var t = $(e.currentTarget);
|
||||||
var cur_page = _.get_containing_page(t);
|
|
||||||
if(cur_page == undefined) return;
|
|
||||||
|
|
||||||
var cur_pos = cur_page.position();
|
var cur_pos = [0,0];
|
||||||
|
|
||||||
|
// cur_page might be undefined, e.g. from Outline
|
||||||
|
var cur_page = _.get_containing_page(t);
|
||||||
|
if(cur_page != undefined)
|
||||||
|
{
|
||||||
|
cur_pos = cur_page.position();
|
||||||
//get the coordinates in default user system
|
//get the coordinates in default user system
|
||||||
cur_pos = transform(cur_page.ictm, [cur_pos[0], cur_page.height()-cur_pos[1]]);
|
cur_pos = transform(cur_page.ictm, [cur_pos[0], cur_page.height()-cur_pos[1]]);
|
||||||
|
}
|
||||||
|
|
||||||
var detail_str = t.attr('data-dest-detail');
|
var detail_str = t.attr('data-dest-detail');
|
||||||
if(detail_str == undefined) return;
|
if(detail_str == undefined) return;
|
||||||
@ -281,9 +297,6 @@ var pdf2htmlEX = (function(){
|
|||||||
upside_down = false;
|
upside_down = false;
|
||||||
ok = true;
|
ok = true;
|
||||||
break;
|
break;
|
||||||
pos = [0,0];
|
|
||||||
ok = true;
|
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
ok = false;
|
ok = false;
|
||||||
break;
|
break;
|
||||||
|
@ -147,7 +147,9 @@ class HTMLRenderer : public OutputDev
|
|||||||
virtual void setDefaultCTM(double *ctm);
|
virtual void setDefaultCTM(double *ctm);
|
||||||
|
|
||||||
// Start a page.
|
// Start a page.
|
||||||
|
// UGLY: These 2 versions are for different versions of poppler
|
||||||
virtual void startPage(int pageNum, GfxState *state);
|
virtual void startPage(int pageNum, GfxState *state);
|
||||||
|
virtual void startPage(int pageNum, GfxState *state, XRef * xref);
|
||||||
|
|
||||||
// End a page.
|
// End a page.
|
||||||
virtual void endPage();
|
virtual void endPage();
|
||||||
@ -210,12 +212,17 @@ class HTMLRenderer : public OutputDev
|
|||||||
void pre_process(PDFDoc * doc);
|
void pre_process(PDFDoc * doc);
|
||||||
void post_process();
|
void post_process();
|
||||||
|
|
||||||
// set flags
|
void process_outline();
|
||||||
|
void process_outline_items(GooList * items);
|
||||||
|
|
||||||
void set_stream_flags (std::ostream & out);
|
void set_stream_flags (std::ostream & out);
|
||||||
|
|
||||||
std::string dump_embedded_font (GfxFont * font, long long fn_id);
|
std::string dump_embedded_font (GfxFont * font, long long fn_id);
|
||||||
void embed_font(const std::string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only = false);
|
void embed_font(const std::string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only = false);
|
||||||
|
|
||||||
|
// convert a LinkAction to a string that our Javascript code can understand
|
||||||
|
std::string get_linkaction_str(LinkAction *, std::string & detail);
|
||||||
|
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
// manage styles
|
// manage styles
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
@ -241,7 +248,7 @@ class HTMLRenderer : public OutputDev
|
|||||||
* remote font: to be retrieved from the web server
|
* remote font: to be retrieved from the web server
|
||||||
* local font: to be substituted with a local (client side) font
|
* local font: to be substituted with a local (client side) font
|
||||||
*/
|
*/
|
||||||
void export_remote_font(const FontInfo & info, const std::string & suffix, const std::string & fontfileformat, GfxFont * font);
|
void export_remote_font(const FontInfo & info, const std::string & suffix, GfxFont * font);
|
||||||
void export_remote_default_font(long long fn_id);
|
void export_remote_default_font(long long fn_id);
|
||||||
void export_local_font(const FontInfo & info, GfxFont * font, const std::string & original_font_name, const std::string & cssfont);
|
void export_local_font(const FontInfo & info, GfxFont * font, const std::string & original_font_name, const std::string & cssfont);
|
||||||
|
|
||||||
@ -300,6 +307,8 @@ class HTMLRenderer : public OutputDev
|
|||||||
|
|
||||||
XRef * xref;
|
XRef * xref;
|
||||||
PDFDoc * cur_doc;
|
PDFDoc * cur_doc;
|
||||||
|
Catalog * cur_catalog;
|
||||||
|
|
||||||
double default_ctm[6];
|
double default_ctm[6];
|
||||||
|
|
||||||
// page info
|
// page info
|
||||||
@ -424,8 +433,11 @@ class HTMLRenderer : public OutputDev
|
|||||||
std::map<double, long long> left_map;
|
std::map<double, long long> left_map;
|
||||||
|
|
||||||
const Param * param;
|
const Param * param;
|
||||||
std::ofstream html_fout, css_fout;
|
|
||||||
std::string html_path, css_path;
|
struct {
|
||||||
|
std::ofstream fs;
|
||||||
|
std::string path;
|
||||||
|
} f_outline, f_pages, f_css;
|
||||||
|
|
||||||
static const std::string MANIFEST_FILENAME;
|
static const std::string MANIFEST_FILENAME;
|
||||||
};
|
};
|
||||||
|
@ -83,7 +83,7 @@ void HTMLRenderer::TextLineBuffer::flush(void)
|
|||||||
max_ascent = max<double>(max_ascent, s.ascent * s.draw_font_size);
|
max_ascent = max<double>(max_ascent, s.ascent * s.draw_font_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
ostream & out = renderer->html_fout;
|
ostream & out = renderer->f_pages.fs;
|
||||||
out << "<div style=\""
|
out << "<div style=\""
|
||||||
<< "bottom:" << round(y) << "px;"
|
<< "bottom:" << round(y) << "px;"
|
||||||
<< "\""
|
<< "\""
|
||||||
|
@ -372,48 +372,48 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, co
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
html_fout << "<div class=\"Cd t" << install_transform_matrix(new_tm) << "\" style=\"";
|
f_pages.fs << "<div class=\"Cd t" << install_transform_matrix(new_tm) << "\" style=\"";
|
||||||
|
|
||||||
if(line_color)
|
if(line_color)
|
||||||
{
|
{
|
||||||
html_fout << "border-color:" << *line_color << ";";
|
f_pages.fs << "border-color:" << *line_color << ";";
|
||||||
|
|
||||||
html_fout << "border-width:";
|
f_pages.fs << "border-width:";
|
||||||
for(int i = 0; i < line_width_count; ++i)
|
for(int i = 0; i < line_width_count; ++i)
|
||||||
{
|
{
|
||||||
if(i > 0) html_fout << ' ';
|
if(i > 0) f_pages.fs << ' ';
|
||||||
|
|
||||||
double lw = line_width_array[i] * scale;
|
double lw = line_width_array[i] * scale;
|
||||||
html_fout << round(lw);
|
f_pages.fs << round(lw);
|
||||||
if(is_positive(lw)) html_fout << "px";
|
if(is_positive(lw)) f_pages.fs << "px";
|
||||||
}
|
}
|
||||||
html_fout << ";";
|
f_pages.fs << ";";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
html_fout << "border:none;";
|
f_pages.fs << "border:none;";
|
||||||
}
|
}
|
||||||
|
|
||||||
if(fill_color)
|
if(fill_color)
|
||||||
{
|
{
|
||||||
html_fout << "background-color:" << (*fill_color) << ";";
|
f_pages.fs << "background-color:" << (*fill_color) << ";";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
html_fout << "background-color:transparent;";
|
f_pages.fs << "background-color:transparent;";
|
||||||
}
|
}
|
||||||
|
|
||||||
if(style_function)
|
if(style_function)
|
||||||
{
|
{
|
||||||
style_function(style_function_data, html_fout);
|
style_function(style_function_data, f_pages.fs);
|
||||||
}
|
}
|
||||||
|
|
||||||
html_fout << "bottom:" << round(y) << "px;"
|
f_pages.fs << "bottom:" << round(y) << "px;"
|
||||||
<< "left:" << round(x) << "px;"
|
<< "left:" << round(x) << "px;"
|
||||||
<< "width:" << round(w * scale) << "px;"
|
<< "width:" << round(w * scale) << "px;"
|
||||||
<< "height:" << round(h * scale) << "px;";
|
<< "height:" << round(h * scale) << "px;";
|
||||||
|
|
||||||
html_fout << "\"></div>";
|
f_pages.fs << "\"></div>";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -18,9 +18,43 @@
|
|||||||
|
|
||||||
namespace pdf2htmlEX {
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suffix, const string & fontfileformat, GfxFont * font)
|
using std::cerr;
|
||||||
|
using std::endl;
|
||||||
|
|
||||||
|
void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suffix, GfxFont * font)
|
||||||
{
|
{
|
||||||
css_fout << "@font-face{"
|
string mime_type, format;
|
||||||
|
if(suffix == ".ttf")
|
||||||
|
{
|
||||||
|
format = "truetype";
|
||||||
|
mime_type = "application/x-font-ttf";
|
||||||
|
}
|
||||||
|
else if(suffix == ".otf")
|
||||||
|
{
|
||||||
|
format = "opentype";
|
||||||
|
mime_type = "application/x-font-otf";
|
||||||
|
}
|
||||||
|
else if(suffix == ".woff")
|
||||||
|
{
|
||||||
|
format = "woff";
|
||||||
|
mime_type = "application/font-woff";
|
||||||
|
}
|
||||||
|
else if(suffix == ".eot")
|
||||||
|
{
|
||||||
|
format = "embedded-opentype";
|
||||||
|
mime_type = "application/vnd.ms-fontobject";
|
||||||
|
}
|
||||||
|
else if(suffix == ".svg")
|
||||||
|
{
|
||||||
|
format = "svg";
|
||||||
|
mime_type = "image/svg+xml";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cerr << "Warning: unknown font suffix: " << suffix << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
f_css.fs << "@font-face{"
|
||||||
<< "font-family:f" << info.id << ";"
|
<< "font-family:f" << info.id << ";"
|
||||||
<< "src:url(";
|
<< "src:url(";
|
||||||
|
|
||||||
@ -32,16 +66,16 @@ void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suff
|
|||||||
ifstream fin(path, ifstream::binary);
|
ifstream fin(path, ifstream::binary);
|
||||||
if(!fin)
|
if(!fin)
|
||||||
throw "Cannot locate font file: " + path;
|
throw "Cannot locate font file: " + path;
|
||||||
css_fout << "'data:font/" + fontfileformat + ";base64," << base64stream(fin) << "'";
|
f_css.fs << "'data:font/" + mime_type + ";base64," << base64stream(fin) << "'";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
css_fout << (char*)fn;
|
f_css.fs << (char*)fn;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
css_fout << ")"
|
f_css.fs << ")"
|
||||||
<< "format(\"" << fontfileformat << "\");"
|
<< "format(\"" << format << "\");"
|
||||||
<< "}" // end of @font-face
|
<< "}" // end of @font-face
|
||||||
<< ".f" << info.id << "{"
|
<< ".f" << info.id << "{"
|
||||||
<< "font-family:f" << info.id << ";"
|
<< "font-family:f" << info.id << ";"
|
||||||
@ -55,9 +89,9 @@ void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suff
|
|||||||
|
|
||||||
static string general_font_family(GfxFont * font)
|
static string general_font_family(GfxFont * font)
|
||||||
{
|
{
|
||||||
if(font -> isFixedWidth())
|
if(font->isFixedWidth())
|
||||||
return "monospace";
|
return "monospace";
|
||||||
else if (font -> isSerif())
|
else if (font->isSerif())
|
||||||
return "serif";
|
return "serif";
|
||||||
else
|
else
|
||||||
return "sans-serif";
|
return "sans-serif";
|
||||||
@ -66,45 +100,45 @@ static string general_font_family(GfxFont * font)
|
|||||||
// TODO: this function is called when some font is unable to process, may use the name there as a hint
|
// TODO: this function is called when some font is unable to process, may use the name there as a hint
|
||||||
void HTMLRenderer::export_remote_default_font(long long fn_id)
|
void HTMLRenderer::export_remote_default_font(long long fn_id)
|
||||||
{
|
{
|
||||||
css_fout << ".f" << fn_id << "{font-family:sans-serif;visibility:hidden;}" << endl;
|
f_css.fs << ".f" << fn_id << "{font-family:sans-serif;visibility:hidden;}" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::export_local_font(const FontInfo & info, GfxFont * font, const string & original_font_name, const string & cssfont)
|
void HTMLRenderer::export_local_font(const FontInfo & info, GfxFont * font, const string & original_font_name, const string & cssfont)
|
||||||
{
|
{
|
||||||
css_fout << ".f" << info.id << "{";
|
f_css.fs << ".f" << info.id << "{";
|
||||||
css_fout << "font-family:" << ((cssfont == "") ? (original_font_name + "," + general_font_family(font)) : cssfont) << ";";
|
f_css.fs << "font-family:" << ((cssfont == "") ? (original_font_name + "," + general_font_family(font)) : cssfont) << ";";
|
||||||
|
|
||||||
string fn = original_font_name;
|
string fn = original_font_name;
|
||||||
for(auto iter = fn.begin(); iter != fn.end(); ++iter)
|
for(auto iter = fn.begin(); iter != fn.end(); ++iter)
|
||||||
*iter = tolower(*iter);
|
*iter = tolower(*iter);
|
||||||
|
|
||||||
if(font->isBold() || (fn.find("bold") != string::npos))
|
if(font->isBold() || (fn.find("bold") != string::npos))
|
||||||
css_fout << "font-weight:bold;";
|
f_css.fs << "font-weight:bold;";
|
||||||
else
|
else
|
||||||
css_fout << "font-weight:normal;";
|
f_css.fs << "font-weight:normal;";
|
||||||
|
|
||||||
if(fn.find("oblique") != string::npos)
|
if(fn.find("oblique") != string::npos)
|
||||||
css_fout << "font-style:oblique;";
|
f_css.fs << "font-style:oblique;";
|
||||||
else if(font->isItalic() || (fn.find("italic") != string::npos))
|
else if(font->isItalic() || (fn.find("italic") != string::npos))
|
||||||
css_fout << "font-style:italic;";
|
f_css.fs << "font-style:italic;";
|
||||||
else
|
else
|
||||||
css_fout << "font-style:normal;";
|
f_css.fs << "font-style:normal;";
|
||||||
|
|
||||||
css_fout << "line-height:" << round(info.ascent - info.descent) << ";";
|
f_css.fs << "line-height:" << round(info.ascent - info.descent) << ";";
|
||||||
|
|
||||||
css_fout << "visibility:visible;";
|
f_css.fs << "visibility:visible;";
|
||||||
|
|
||||||
css_fout << "}" << endl;
|
f_css.fs << "}" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::export_font_size (long long fs_id, double font_size)
|
void HTMLRenderer::export_font_size (long long fs_id, double font_size)
|
||||||
{
|
{
|
||||||
css_fout << ".s" << fs_id << "{font-size:" << round(font_size) << "px;}" << endl;
|
f_css.fs << ".s" << fs_id << "{font-size:" << round(font_size) << "px;}" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm)
|
void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm)
|
||||||
{
|
{
|
||||||
css_fout << ".t" << tm_id << "{";
|
f_css.fs << ".t" << tm_id << "{";
|
||||||
|
|
||||||
// always ignore tm[4] and tm[5] because
|
// always ignore tm[4] and tm[5] because
|
||||||
// we have already shifted the origin
|
// we have already shifted the origin
|
||||||
@ -114,7 +148,7 @@ void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm)
|
|||||||
{
|
{
|
||||||
auto prefixes = {"", "-ms-", "-moz-", "-webkit-", "-o-"};
|
auto prefixes = {"", "-ms-", "-moz-", "-webkit-", "-o-"};
|
||||||
for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter)
|
for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter)
|
||||||
css_fout << *iter << "transform:none;";
|
f_css.fs << *iter << "transform:none;";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -122,53 +156,53 @@ void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm)
|
|||||||
for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter)
|
for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter)
|
||||||
{
|
{
|
||||||
// PDF use a different coordinate system from Web
|
// PDF use a different coordinate system from Web
|
||||||
css_fout << *iter << "transform:matrix("
|
f_css.fs << *iter << "transform:matrix("
|
||||||
<< round(tm[0]) << ','
|
<< round(tm[0]) << ','
|
||||||
<< round(-tm[1]) << ','
|
<< round(-tm[1]) << ','
|
||||||
<< round(-tm[2]) << ','
|
<< round(-tm[2]) << ','
|
||||||
<< round(tm[3]) << ',';
|
<< round(tm[3]) << ',';
|
||||||
|
|
||||||
css_fout << "0,0);";
|
f_css.fs << "0,0);";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
css_fout << "}" << endl;
|
f_css.fs << "}" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::export_letter_space (long long ls_id, double letter_space)
|
void HTMLRenderer::export_letter_space (long long ls_id, double letter_space)
|
||||||
{
|
{
|
||||||
css_fout << ".l" << ls_id << "{letter-spacing:" << round(letter_space) << "px;}" << endl;
|
f_css.fs << ".l" << ls_id << "{letter-spacing:" << round(letter_space) << "px;}" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::export_word_space (long long ws_id, double word_space)
|
void HTMLRenderer::export_word_space (long long ws_id, double word_space)
|
||||||
{
|
{
|
||||||
css_fout << ".w" << ws_id << "{word-spacing:" << round(word_space) << "px;}" << endl;
|
f_css.fs << ".w" << ws_id << "{word-spacing:" << round(word_space) << "px;}" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::export_color (long long color_id, const GfxRGB * rgb)
|
void HTMLRenderer::export_color (long long color_id, const GfxRGB * rgb)
|
||||||
{
|
{
|
||||||
css_fout << ".c" << color_id << "{color:" << (*rgb) << ";}" << endl;
|
f_css.fs << ".c" << color_id << "{color:" << (*rgb) << ";}" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::export_whitespace (long long ws_id, double ws_width)
|
void HTMLRenderer::export_whitespace (long long ws_id, double ws_width)
|
||||||
{
|
{
|
||||||
if(ws_width > 0)
|
if(ws_width > 0)
|
||||||
css_fout << "._" << ws_id << "{display:inline-block;width:" << round(ws_width) << "px;}" << endl;
|
f_css.fs << "._" << ws_id << "{display:inline-block;width:" << round(ws_width) << "px;}" << endl;
|
||||||
else
|
else
|
||||||
css_fout << "._" << ws_id << "{display:inline;margin-left:" << round(ws_width) << "px;}" << endl;
|
f_css.fs << "._" << ws_id << "{display:inline;margin-left:" << round(ws_width) << "px;}" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::export_rise (long long rise_id, double rise)
|
void HTMLRenderer::export_rise (long long rise_id, double rise)
|
||||||
{
|
{
|
||||||
css_fout << ".r" << rise_id << "{top:" << round(-rise) << "px;}" << endl;
|
f_css.fs << ".r" << rise_id << "{top:" << round(-rise) << "px;}" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::export_height (long long height_id, double height)
|
void HTMLRenderer::export_height (long long height_id, double height)
|
||||||
{
|
{
|
||||||
css_fout << ".h" << height_id << "{height:" << round(height) << "px;}" << endl;
|
f_css.fs << ".h" << height_id << "{height:" << round(height) << "px;}" << endl;
|
||||||
}
|
}
|
||||||
void HTMLRenderer::export_left (long long left_id, double left)
|
void HTMLRenderer::export_left (long long left_id, double left)
|
||||||
{
|
{
|
||||||
css_fout << ".L" << left_id << "{left:" << round(left) << "px;}" << endl;
|
f_css.fs << ".L" << left_id << "{left:" << round(left) << "px;}" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -13,6 +13,8 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include <GlobalParams.h>
|
||||||
|
|
||||||
#include "HTMLRenderer.h"
|
#include "HTMLRenderer.h"
|
||||||
#include "TextLineBuffer.h"
|
#include "TextLineBuffer.h"
|
||||||
#include "pdf2htmlEX-config.h"
|
#include "pdf2htmlEX-config.h"
|
||||||
@ -35,10 +37,6 @@ using std::abs;
|
|||||||
using std::cerr;
|
using std::cerr;
|
||||||
using std::endl;
|
using std::endl;
|
||||||
|
|
||||||
static void dummy(void *, enum ErrorCategory, int pos, char *)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
HTMLRenderer::HTMLRenderer(const Param * param)
|
HTMLRenderer::HTMLRenderer(const Param * param)
|
||||||
:OutputDev()
|
:OutputDev()
|
||||||
,line_opened(false)
|
,line_opened(false)
|
||||||
@ -49,8 +47,8 @@ HTMLRenderer::HTMLRenderer(const Param * param)
|
|||||||
{
|
{
|
||||||
if(!(param->debug))
|
if(!(param->debug))
|
||||||
{
|
{
|
||||||
//disable error function of poppler
|
//disable error messages of poppler
|
||||||
setErrorCallback(&dummy, nullptr);
|
globalParams->setErrQuiet(gTrue);
|
||||||
}
|
}
|
||||||
|
|
||||||
ffw_init(param->debug);
|
ffw_init(param->debug);
|
||||||
@ -71,10 +69,14 @@ HTMLRenderer::~HTMLRenderer()
|
|||||||
void HTMLRenderer::process(PDFDoc *doc)
|
void HTMLRenderer::process(PDFDoc *doc)
|
||||||
{
|
{
|
||||||
cur_doc = doc;
|
cur_doc = doc;
|
||||||
|
cur_catalog = doc->getCatalog();
|
||||||
xref = doc->getXRef();
|
xref = doc->getXRef();
|
||||||
|
|
||||||
pre_process(doc);
|
pre_process(doc);
|
||||||
|
|
||||||
|
///////////////////
|
||||||
|
// Process pages
|
||||||
|
|
||||||
BackgroundRenderer * bg_renderer = nullptr;
|
BackgroundRenderer * bg_renderer = nullptr;
|
||||||
if(param->process_nontext)
|
if(param->process_nontext)
|
||||||
{
|
{
|
||||||
@ -90,10 +92,10 @@ void HTMLRenderer::process(PDFDoc *doc)
|
|||||||
if(param->split_pages)
|
if(param->split_pages)
|
||||||
{
|
{
|
||||||
auto page_fn = str_fmt("%s/%s%d.page", param->dest_dir.c_str(), param->output_filename.c_str(), i);
|
auto page_fn = str_fmt("%s/%s%d.page", param->dest_dir.c_str(), param->output_filename.c_str(), i);
|
||||||
html_fout.open((char*)page_fn, ofstream::binary);
|
f_pages.fs.open((char*)page_fn, ofstream::binary);
|
||||||
if(!html_fout)
|
if(!f_pages.fs)
|
||||||
throw string("Cannot open ") + (char*)page_fn + " for writing";
|
throw string("Cannot open ") + (char*)page_fn + " for writing";
|
||||||
set_stream_flags(html_fout);
|
set_stream_flags(f_pages.fs);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(param->process_nontext)
|
if(param->process_nontext)
|
||||||
@ -114,13 +116,17 @@ void HTMLRenderer::process(PDFDoc *doc)
|
|||||||
|
|
||||||
if(param->split_pages)
|
if(param->split_pages)
|
||||||
{
|
{
|
||||||
html_fout.close();
|
f_pages.fs.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(page_count >= 0)
|
if(page_count >= 0)
|
||||||
cerr << "Working: " << page_count << "/" << page_count;
|
cerr << "Working: " << page_count << "/" << page_count;
|
||||||
cerr << endl;
|
cerr << endl;
|
||||||
|
|
||||||
|
////////////////////////
|
||||||
|
// Process Outline
|
||||||
|
process_outline();
|
||||||
|
|
||||||
post_process();
|
post_process();
|
||||||
|
|
||||||
if(bg_renderer)
|
if(bg_renderer)
|
||||||
@ -135,6 +141,11 @@ void HTMLRenderer::setDefaultCTM(double *ctm)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::startPage(int pageNum, GfxState *state)
|
void HTMLRenderer::startPage(int pageNum, GfxState *state)
|
||||||
|
{
|
||||||
|
startPage(pageNum, state, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
|
||||||
{
|
{
|
||||||
this->pageNum = pageNum;
|
this->pageNum = pageNum;
|
||||||
this->pageWidth = state->getPageWidth();
|
this->pageWidth = state->getPageWidth();
|
||||||
@ -142,7 +153,7 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state)
|
|||||||
|
|
||||||
assert((!line_opened) && "Open line in startPage detected!");
|
assert((!line_opened) && "Open line in startPage detected!");
|
||||||
|
|
||||||
html_fout
|
f_pages.fs
|
||||||
<< "<div class=\"d\" style=\"width:"
|
<< "<div class=\"d\" style=\"width:"
|
||||||
<< (pageWidth) << "px;height:"
|
<< (pageWidth) << "px;height:"
|
||||||
<< (pageHeight) << "px;\">"
|
<< (pageHeight) << "px;\">"
|
||||||
@ -151,7 +162,7 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state)
|
|||||||
|
|
||||||
if(param->process_nontext)
|
if(param->process_nontext)
|
||||||
{
|
{
|
||||||
html_fout << "background-image:url(";
|
f_pages.fs << "background-image:url(";
|
||||||
|
|
||||||
{
|
{
|
||||||
if(param->single_html)
|
if(param->single_html)
|
||||||
@ -160,18 +171,18 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state)
|
|||||||
ifstream fin((char*)path, ifstream::binary);
|
ifstream fin((char*)path, ifstream::binary);
|
||||||
if(!fin)
|
if(!fin)
|
||||||
throw string("Cannot read background image ") + (char*)path;
|
throw string("Cannot read background image ") + (char*)path;
|
||||||
html_fout << "'data:image/png;base64," << base64stream(fin) << "'";
|
f_pages.fs << "'data:image/png;base64," << base64stream(fin) << "'";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
html_fout << str_fmt("p%x.png", pageNum);
|
f_pages.fs << str_fmt("p%x.png", pageNum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
html_fout << ");background-position:0 0;background-size:" << pageWidth << "px " << pageHeight << "px;background-repeat:no-repeat;";
|
f_pages.fs << ");background-position:0 0;background-size:" << pageWidth << "px " << pageHeight << "px;background-repeat:no-repeat;";
|
||||||
}
|
}
|
||||||
|
|
||||||
html_fout << "\">";
|
f_pages.fs << "\">";
|
||||||
draw_text_scale = 1.0;
|
draw_text_scale = 1.0;
|
||||||
|
|
||||||
cur_font_info = install_font(nullptr);
|
cur_font_info = install_font(nullptr);
|
||||||
@ -206,26 +217,26 @@ void HTMLRenderer::endPage() {
|
|||||||
cur_doc->processLinks(this, pageNum);
|
cur_doc->processLinks(this, pageNum);
|
||||||
|
|
||||||
// close box
|
// close box
|
||||||
html_fout << "</div>";
|
f_pages.fs << "</div>";
|
||||||
|
|
||||||
// dump info for js
|
// dump info for js
|
||||||
// TODO: create a function for this
|
// TODO: create a function for this
|
||||||
// BE CAREFUL WITH ESCAPES
|
// BE CAREFUL WITH ESCAPES
|
||||||
html_fout << "<div class=\"j\" data-data='{";
|
f_pages.fs << "<div class=\"j\" data-data='{";
|
||||||
|
|
||||||
//default CTM
|
//default CTM
|
||||||
html_fout << "\"ctm\":[";
|
f_pages.fs << "\"ctm\":[";
|
||||||
for(int i = 0; i < 6; ++i)
|
for(int i = 0; i < 6; ++i)
|
||||||
{
|
{
|
||||||
if(i > 0) html_fout << ",";
|
if(i > 0) f_pages.fs << ",";
|
||||||
html_fout << round(default_ctm[i]);
|
f_pages.fs << round(default_ctm[i]);
|
||||||
}
|
}
|
||||||
html_fout << "]";
|
f_pages.fs << "]";
|
||||||
|
|
||||||
html_fout << "}'></div>";
|
f_pages.fs << "}'></div>";
|
||||||
|
|
||||||
// close page
|
// close page
|
||||||
html_fout << "</div></div>" << endl;
|
f_pages.fs << "</div></div>" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::pre_process(PDFDoc * doc)
|
void HTMLRenderer::pre_process(PDFDoc * doc)
|
||||||
@ -290,11 +301,32 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
|
|||||||
if(param->single_html && (!param->split_pages))
|
if(param->single_html && (!param->split_pages))
|
||||||
tmp_files.add((char*)fn);
|
tmp_files.add((char*)fn);
|
||||||
|
|
||||||
css_path = (char*)fn,
|
f_css.path = (char*)fn;
|
||||||
css_fout.open(css_path, ofstream::binary);
|
f_css.fs.open(f_css.path, ofstream::binary);
|
||||||
if(!css_fout)
|
if(!f_css.fs)
|
||||||
throw string("Cannot open ") + (char*)fn + " for writing";
|
throw string("Cannot open ") + (char*)fn + " for writing";
|
||||||
set_stream_flags(css_fout);
|
set_stream_flags(f_css.fs);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* The logic for outline is similar to css
|
||||||
|
*/
|
||||||
|
|
||||||
|
auto fn = (param->single_html && (!param->split_pages))
|
||||||
|
? str_fmt("%s/__outline", param->tmp_dir.c_str())
|
||||||
|
: str_fmt("%s/%s", param->dest_dir.c_str(), param->outline_filename.c_str());
|
||||||
|
|
||||||
|
if(param->single_html && (!param->split_pages))
|
||||||
|
tmp_files.add((char*)fn);
|
||||||
|
|
||||||
|
f_outline.path = (char*)fn;
|
||||||
|
f_outline.fs.open(f_outline.path, ofstream::binary);
|
||||||
|
if(!f_outline.fs)
|
||||||
|
throw string("Cannot open") + (char*)fn + " for writing";
|
||||||
|
|
||||||
|
// might not be necessary
|
||||||
|
set_stream_flags(f_outline.fs);
|
||||||
}
|
}
|
||||||
|
|
||||||
// if split-pages is specified, open & close the file in the process loop
|
// if split-pages is specified, open & close the file in the process loop
|
||||||
@ -303,7 +335,7 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
|
|||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* If single-html
|
* If single-html
|
||||||
* we have to keep the html file (for page) into a temporary place
|
* we have to keep the html file for pages into a temporary place
|
||||||
* because we'll have to embed css before it
|
* because we'll have to embed css before it
|
||||||
*
|
*
|
||||||
* Otherwise just generate it
|
* Otherwise just generate it
|
||||||
@ -311,21 +343,22 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
|
|||||||
auto fn = str_fmt("%s/__pages", param->tmp_dir.c_str());
|
auto fn = str_fmt("%s/__pages", param->tmp_dir.c_str());
|
||||||
tmp_files.add((char*)fn);
|
tmp_files.add((char*)fn);
|
||||||
|
|
||||||
html_path = (char*)fn;
|
f_pages.path = (char*)fn;
|
||||||
html_fout.open(html_path, ofstream::binary);
|
f_pages.fs.open(f_pages.path, ofstream::binary);
|
||||||
if(!html_fout)
|
if(!f_pages.fs)
|
||||||
throw string("Cannot open ") + (char*)fn + " for writing";
|
throw string("Cannot open ") + (char*)fn + " for writing";
|
||||||
set_stream_flags(html_fout);
|
set_stream_flags(f_pages.fs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::post_process()
|
void HTMLRenderer::post_process()
|
||||||
{
|
{
|
||||||
// close files
|
// close files
|
||||||
html_fout.close();
|
f_outline.fs.close();
|
||||||
css_fout.close();
|
f_pages.fs.close();
|
||||||
|
f_css.fs.close();
|
||||||
|
|
||||||
//only when split-page, do we have some work left to do
|
//only when split-page == 0, do we have some work left to do
|
||||||
if(param->split_pages)
|
if(param->split_pages)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -359,7 +392,9 @@ void HTMLRenderer::post_process()
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(line.empty() || line[0] == '#')
|
if(line.empty()
|
||||||
|
|| (line.find_first_not_of(' ') == string::npos)
|
||||||
|
|| line[0] == '#')
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
|
||||||
@ -373,14 +408,23 @@ void HTMLRenderer::post_process()
|
|||||||
{
|
{
|
||||||
if(line == "$css")
|
if(line == "$css")
|
||||||
{
|
{
|
||||||
embed_file(output, css_path, ".css", false);
|
embed_file(output, f_css.path, ".css", false);
|
||||||
}
|
}
|
||||||
else if (line == "$pages")
|
else if (line == "$outline")
|
||||||
{
|
{
|
||||||
ifstream fin(html_path, ifstream::binary);
|
ifstream fin(f_outline.path, ifstream::binary);
|
||||||
if(!fin)
|
if(!fin)
|
||||||
throw "Cannot open read the pages";
|
throw "Cannot open read the pages";
|
||||||
output << fin.rdbuf();
|
output << fin.rdbuf();
|
||||||
|
output.clear(); // output will set fail big if fin is empty
|
||||||
|
}
|
||||||
|
else if (line == "$pages")
|
||||||
|
{
|
||||||
|
ifstream fin(f_pages.path, ifstream::binary);
|
||||||
|
if(!fin)
|
||||||
|
throw "Cannot open read the pages";
|
||||||
|
output << fin.rdbuf();
|
||||||
|
output.clear(); // output will set fail big if fin is empty
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -418,8 +462,9 @@ void HTMLRenderer::embed_file(ostream & out, const string & path, const string &
|
|||||||
if(!fin)
|
if(!fin)
|
||||||
throw string("Cannot open file ") + path + " for embedding";
|
throw string("Cannot open file ") + path + " for embedding";
|
||||||
out << iter->second.first << endl
|
out << iter->second.first << endl
|
||||||
<< fin.rdbuf()
|
<< fin.rdbuf();
|
||||||
<< iter->second.second << endl;
|
out.clear(); // out will set fail big if fin is empty
|
||||||
|
out << iter->second.second << endl;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -437,6 +482,7 @@ void HTMLRenderer::embed_file(ostream & out, const string & path, const string &
|
|||||||
if(!out)
|
if(!out)
|
||||||
throw string("Cannot open file ") + path + " for embedding";
|
throw string("Cannot open file ") + path + " for embedding";
|
||||||
out << fin.rdbuf();
|
out << fin.rdbuf();
|
||||||
|
out.clear(); // out will set fail big if fin is empty
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -110,7 +110,7 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, FontInfo & info)
|
|||||||
if(path != "")
|
if(path != "")
|
||||||
{
|
{
|
||||||
embed_font(path, font, info);
|
embed_font(path, font, info);
|
||||||
export_remote_font(info, param->font_suffix, param->font_format, font);
|
export_remote_font(info, param->font_suffix, font);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -129,7 +129,7 @@ void HTMLRenderer::install_base_font(GfxFont * font, GfxFontLoc * font_loc, Font
|
|||||||
if(localfontloc != nullptr)
|
if(localfontloc != nullptr)
|
||||||
{
|
{
|
||||||
embed_font(localfontloc->path->getCString(), font, info);
|
embed_font(localfontloc->path->getCString(), font, info);
|
||||||
export_remote_font(info, param->font_suffix, param->font_format, font);
|
export_remote_font(info, param->font_suffix, font);
|
||||||
delete localfontloc;
|
delete localfontloc;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -186,7 +186,7 @@ void HTMLRenderer::install_external_font(GfxFont * font, FontInfo & info)
|
|||||||
if(localfontloc != nullptr)
|
if(localfontloc != nullptr)
|
||||||
{
|
{
|
||||||
embed_font(string(localfontloc->path->getCString()), font, info);
|
embed_font(string(localfontloc->path->getCString()), font, info);
|
||||||
export_remote_font(info, param->font_suffix, param->font_format, font);
|
export_remote_font(info, param->font_suffix, font);
|
||||||
delete localfontloc;
|
delete localfontloc;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -281,7 +281,7 @@ long long HTMLRenderer::install_whitespace(double ws_width, double & actual_widt
|
|||||||
{
|
{
|
||||||
// ws_width is already mulitpled by draw_scale
|
// ws_width is already mulitpled by draw_scale
|
||||||
auto iter = whitespace_map.lower_bound(ws_width - param->h_eps);
|
auto iter = whitespace_map.lower_bound(ws_width - param->h_eps);
|
||||||
if((iter != whitespace_map.end()) && (abs(iter->first - ws_width) < param->h_eps))
|
if((iter != whitespace_map.end()) && (abs(iter->first - ws_width) <= param->h_eps))
|
||||||
{
|
{
|
||||||
actual_width = iter->first;
|
actual_width = iter->first;
|
||||||
return iter->second;
|
return iter->second;
|
||||||
@ -297,7 +297,7 @@ long long HTMLRenderer::install_whitespace(double ws_width, double & actual_widt
|
|||||||
long long HTMLRenderer::install_rise(double rise)
|
long long HTMLRenderer::install_rise(double rise)
|
||||||
{
|
{
|
||||||
auto iter = rise_map.lower_bound(rise - param->v_eps);
|
auto iter = rise_map.lower_bound(rise - param->v_eps);
|
||||||
if((iter != rise_map.end()) && (abs(iter->first - rise) < param->v_eps))
|
if((iter != rise_map.end()) && (abs(iter->first - rise) <= param->v_eps))
|
||||||
{
|
{
|
||||||
return iter->second;
|
return iter->second;
|
||||||
}
|
}
|
||||||
@ -311,7 +311,7 @@ long long HTMLRenderer::install_rise(double rise)
|
|||||||
long long HTMLRenderer::install_height(double height)
|
long long HTMLRenderer::install_height(double height)
|
||||||
{
|
{
|
||||||
auto iter = height_map.lower_bound(height - EPS);
|
auto iter = height_map.lower_bound(height - EPS);
|
||||||
if((iter != height_map.end()) && (abs(iter->first - height) < EPS))
|
if((iter != height_map.end()) && (abs(iter->first - height) <= EPS))
|
||||||
{
|
{
|
||||||
return iter->second;
|
return iter->second;
|
||||||
}
|
}
|
||||||
@ -324,7 +324,7 @@ long long HTMLRenderer::install_height(double height)
|
|||||||
long long HTMLRenderer::install_left(double left)
|
long long HTMLRenderer::install_left(double left)
|
||||||
{
|
{
|
||||||
auto iter = left_map.lower_bound(left - param->h_eps);
|
auto iter = left_map.lower_bound(left - param->h_eps);
|
||||||
if((iter != left_map.end()) && (abs(iter->first - left) < param->h_eps))
|
if((iter != left_map.end()) && (abs(iter->first - left) <= param->h_eps))
|
||||||
{
|
{
|
||||||
return iter->second;
|
return iter->second;
|
||||||
}
|
}
|
||||||
|
@ -29,9 +29,27 @@ using std::endl;
|
|||||||
/*
|
/*
|
||||||
* The detailed rectangle area of the link destination
|
* The detailed rectangle area of the link destination
|
||||||
* Will be parsed and performed by Javascript
|
* Will be parsed and performed by Javascript
|
||||||
|
* The string will be put into a HTML attribute, surrounded by single quotes
|
||||||
|
* So pay attention to the characters used here
|
||||||
*/
|
*/
|
||||||
static string get_dest_detail_str(int pageno, LinkDest * dest)
|
static string get_linkdest_detail_str(LinkDest * dest, Catalog * catalog, int & pageno)
|
||||||
{
|
{
|
||||||
|
pageno = 0;
|
||||||
|
if(dest->isPageRef())
|
||||||
|
{
|
||||||
|
auto pageref = dest->getPageRef();
|
||||||
|
pageno = catalog->findPage(pageref.num, pageref.gen);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pageno = dest->getPageNum();
|
||||||
|
}
|
||||||
|
|
||||||
|
if(pageno <= 0)
|
||||||
|
{
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
ostringstream sout;
|
ostringstream sout;
|
||||||
// dec
|
// dec
|
||||||
sout << "[" << pageno;
|
sout << "[" << pageno;
|
||||||
@ -109,15 +127,10 @@ static string get_dest_detail_str(int pageno, LinkDest * dest)
|
|||||||
return sout.str();
|
return sout.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
string HTMLRenderer::get_linkaction_str(LinkAction * action, string & detail)
|
||||||
* Based on pdftohtml from poppler
|
|
||||||
* TODO: CSS for link rectangles
|
|
||||||
* TODO: share rectangle draw with css-draw
|
|
||||||
*/
|
|
||||||
void HTMLRenderer::processLink(AnnotLink * al)
|
|
||||||
{
|
{
|
||||||
std::string dest_str, dest_detail_str;
|
string dest_str;
|
||||||
auto action = al->getAction();
|
detail = "";
|
||||||
if(action)
|
if(action)
|
||||||
{
|
{
|
||||||
auto kind = action->getKind();
|
auto kind = action->getKind();
|
||||||
@ -125,34 +138,21 @@ void HTMLRenderer::processLink(AnnotLink * al)
|
|||||||
{
|
{
|
||||||
case actionGoTo:
|
case actionGoTo:
|
||||||
{
|
{
|
||||||
auto catalog = cur_doc->getCatalog();
|
|
||||||
auto * real_action = dynamic_cast<LinkGoTo*>(action);
|
auto * real_action = dynamic_cast<LinkGoTo*>(action);
|
||||||
LinkDest * dest = nullptr;
|
LinkDest * dest = nullptr;
|
||||||
if(auto _ = real_action->getDest())
|
if(auto _ = real_action->getDest())
|
||||||
dest = _->copy();
|
dest = _->copy();
|
||||||
else if (auto _ = real_action->getNamedDest())
|
else if (auto _ = real_action->getNamedDest())
|
||||||
dest = catalog->findDest(_);
|
dest = cur_catalog->findDest(_);
|
||||||
if(dest)
|
if(dest)
|
||||||
{
|
{
|
||||||
int pageno = 0;
|
int pageno = 0;
|
||||||
if(dest->isPageRef())
|
detail = get_linkdest_detail_str(dest, cur_catalog, pageno);
|
||||||
{
|
|
||||||
auto pageref = dest->getPageRef();
|
|
||||||
pageno = catalog->findPage(pageref.num, pageref.gen);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
pageno = dest->getPageNum();
|
|
||||||
}
|
|
||||||
|
|
||||||
if(pageno > 0)
|
if(pageno > 0)
|
||||||
{
|
{
|
||||||
dest_str = (char*)str_fmt("#p%x", pageno);
|
dest_str = (char*)str_fmt("#p%x", pageno);
|
||||||
dest_detail_str = get_dest_detail_str(pageno, dest);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
delete dest;
|
delete dest;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -178,17 +178,30 @@ void HTMLRenderer::processLink(AnnotLink * al)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(dest_str != "")
|
return dest_str;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Based on pdftohtml from poppler
|
||||||
|
* TODO: CSS for link rectangles
|
||||||
|
* TODO: share rectangle draw with css-draw
|
||||||
|
*/
|
||||||
|
void HTMLRenderer::processLink(AnnotLink * al)
|
||||||
|
{
|
||||||
|
string dest_detail_str;
|
||||||
|
string dest_str = get_linkaction_str(al->getAction(), dest_detail_str);
|
||||||
|
|
||||||
|
if(!dest_str.empty())
|
||||||
{
|
{
|
||||||
html_fout << "<a class=\"a\" href=\"" << dest_str << "\"";
|
f_pages.fs << "<a class=\"a\" href=\"" << dest_str << "\"";
|
||||||
|
|
||||||
if(dest_detail_str != "")
|
if(!dest_detail_str.empty())
|
||||||
html_fout << " data-dest-detail='" << dest_detail_str << "'";
|
f_pages.fs << " data-dest-detail='" << dest_detail_str << "'";
|
||||||
|
|
||||||
html_fout << ">";
|
f_pages.fs << ">";
|
||||||
}
|
}
|
||||||
|
|
||||||
html_fout << "<div class=\"Cd t"
|
f_pages.fs << "<div class=\"Cd t"
|
||||||
<< install_transform_matrix(default_ctm)
|
<< install_transform_matrix(default_ctm)
|
||||||
<< "\" style=\"";
|
<< "\" style=\"";
|
||||||
|
|
||||||
@ -215,31 +228,31 @@ void HTMLRenderer::processLink(AnnotLink * al)
|
|||||||
border_top_bottom_width, border_left_right_width);
|
border_top_bottom_width, border_left_right_width);
|
||||||
|
|
||||||
if(abs(border_top_bottom_width - border_left_right_width) < EPS)
|
if(abs(border_top_bottom_width - border_left_right_width) < EPS)
|
||||||
html_fout << "border-width:" << round(border_top_bottom_width) << "px;";
|
f_pages.fs << "border-width:" << round(border_top_bottom_width) << "px;";
|
||||||
else
|
else
|
||||||
html_fout << "border-width:" << round(border_top_bottom_width) << "px " << round(border_left_right_width) << "px;";
|
f_pages.fs << "border-width:" << round(border_top_bottom_width) << "px " << round(border_left_right_width) << "px;";
|
||||||
}
|
}
|
||||||
auto style = border->getStyle();
|
auto style = border->getStyle();
|
||||||
switch(style)
|
switch(style)
|
||||||
{
|
{
|
||||||
case AnnotBorder::borderSolid:
|
case AnnotBorder::borderSolid:
|
||||||
html_fout << "border-style:solid;";
|
f_pages.fs << "border-style:solid;";
|
||||||
break;
|
break;
|
||||||
case AnnotBorder::borderDashed:
|
case AnnotBorder::borderDashed:
|
||||||
html_fout << "border-style:dashed;";
|
f_pages.fs << "border-style:dashed;";
|
||||||
break;
|
break;
|
||||||
case AnnotBorder::borderBeveled:
|
case AnnotBorder::borderBeveled:
|
||||||
html_fout << "border-style:outset;";
|
f_pages.fs << "border-style:outset;";
|
||||||
break;
|
break;
|
||||||
case AnnotBorder::borderInset:
|
case AnnotBorder::borderInset:
|
||||||
html_fout << "border-style:inset;";
|
f_pages.fs << "border-style:inset;";
|
||||||
break;
|
break;
|
||||||
case AnnotBorder::borderUnderlined:
|
case AnnotBorder::borderUnderlined:
|
||||||
html_fout << "border-style:none;border-bottom-style:solid;";
|
f_pages.fs << "border-style:none;border-bottom-style:solid;";
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
cerr << "Warning:Unknown annotation border style: " << style << endl;
|
cerr << "Warning:Unknown annotation border style: " << style << endl;
|
||||||
html_fout << "border-style:solid;";
|
f_pages.fs << "border-style:solid;";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -257,36 +270,36 @@ void HTMLRenderer::processLink(AnnotLink * al)
|
|||||||
r = g = b = 0;
|
r = g = b = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
html_fout << "border-color:rgb("
|
f_pages.fs << "border-color:rgb("
|
||||||
<< dec << (int)dblToByte(r) << "," << (int)dblToByte(g) << "," << (int)dblToByte(b) << hex
|
<< dec << (int)dblToByte(r) << "," << (int)dblToByte(g) << "," << (int)dblToByte(b) << hex
|
||||||
<< ");";
|
<< ");";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
html_fout << "border-style:none;";
|
f_pages.fs << "border-style:none;";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
html_fout << "border-style:none;";
|
f_pages.fs << "border-style:none;";
|
||||||
}
|
}
|
||||||
|
|
||||||
tm_transform(default_ctm, x, y);
|
tm_transform(default_ctm, x, y);
|
||||||
|
|
||||||
html_fout << "position:absolute;"
|
f_pages.fs << "position:absolute;"
|
||||||
<< "left:" << round(x) << "px;"
|
<< "left:" << round(x) << "px;"
|
||||||
<< "bottom:" << round(y) << "px;"
|
<< "bottom:" << round(y) << "px;"
|
||||||
<< "width:" << round(w) << "px;"
|
<< "width:" << round(w) << "px;"
|
||||||
<< "height:" << round(h) << "px;";
|
<< "height:" << round(h) << "px;";
|
||||||
|
|
||||||
// fix for IE
|
// fix for IE
|
||||||
html_fout << "background-color:rgba(255,255,255,0.000001);";
|
f_pages.fs << "background-color:rgba(255,255,255,0.000001);";
|
||||||
|
|
||||||
html_fout << "\"></div>";
|
f_pages.fs << "\"></div>";
|
||||||
|
|
||||||
if(dest_str != "")
|
if(dest_str != "")
|
||||||
{
|
{
|
||||||
html_fout << "</a>";
|
f_pages.fs << "</a>";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
72
src/HTMLRenderer/outline.cc
Normal file
72
src/HTMLRenderer/outline.cc
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
/*
|
||||||
|
* outline.cc
|
||||||
|
*
|
||||||
|
* Handling Outline items
|
||||||
|
*
|
||||||
|
* by WangLu
|
||||||
|
* 2013.01.28
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#include <Outline.h>
|
||||||
|
#include <goo/GooList.h>
|
||||||
|
|
||||||
|
#include "HTMLRenderer.h"
|
||||||
|
#include "util/namespace.h"
|
||||||
|
#include "util/unicode.h"
|
||||||
|
|
||||||
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
using std::ostream;
|
||||||
|
|
||||||
|
void HTMLRenderer::process_outline_items(GooList * items)
|
||||||
|
{
|
||||||
|
if((!items) || (items->getLength() == 0))
|
||||||
|
return;
|
||||||
|
|
||||||
|
f_outline.fs << "<ul>";
|
||||||
|
|
||||||
|
for(int i = 0; i < items->getLength(); ++i)
|
||||||
|
{
|
||||||
|
OutlineItem * item = (OutlineItem*)(items->get(i));
|
||||||
|
|
||||||
|
string detail;
|
||||||
|
string dest = get_linkaction_str(item->getAction(), detail);
|
||||||
|
|
||||||
|
// we don't care dest is empty or not.
|
||||||
|
f_outline.fs << "<li>"
|
||||||
|
<< "<a href=\"" << dest << "\"";
|
||||||
|
|
||||||
|
if(!detail.empty())
|
||||||
|
f_outline.fs << " data-dest-detail='" << detail << "'";
|
||||||
|
|
||||||
|
f_outline.fs << ">";
|
||||||
|
|
||||||
|
outputUnicodes(f_outline.fs, item->getTitle(), item->getTitleLength());
|
||||||
|
|
||||||
|
f_outline.fs << "</a>";
|
||||||
|
|
||||||
|
// check kids
|
||||||
|
item->open();
|
||||||
|
if(item->hasKids())
|
||||||
|
{
|
||||||
|
process_outline_items(item->getKids());
|
||||||
|
}
|
||||||
|
item->close();
|
||||||
|
f_outline.fs << "</li>";
|
||||||
|
}
|
||||||
|
|
||||||
|
f_outline.fs << "</ul>";
|
||||||
|
}
|
||||||
|
|
||||||
|
void HTMLRenderer::process_outline()
|
||||||
|
{
|
||||||
|
Outline * outline = cur_doc->getOutline();
|
||||||
|
if(!outline)
|
||||||
|
return;
|
||||||
|
|
||||||
|
process_outline_items(outline->getItems());
|
||||||
|
}
|
||||||
|
|
||||||
|
}// namespace pdf2htmlEX
|
67
src/Param.h
67
src/Param.h
@ -15,63 +15,56 @@ namespace pdf2htmlEX {
|
|||||||
|
|
||||||
struct Param
|
struct Param
|
||||||
{
|
{
|
||||||
// PDF stuff
|
// pages
|
||||||
std::string owner_password, user_password;
|
|
||||||
std::string input_filename, output_filename;
|
|
||||||
int no_drm;
|
|
||||||
|
|
||||||
// path
|
|
||||||
std::string dest_dir, tmp_dir, data_dir;
|
|
||||||
|
|
||||||
// normal parameters
|
|
||||||
int first_page, last_page;
|
int first_page, last_page;
|
||||||
|
|
||||||
|
// dimensions
|
||||||
double zoom;
|
double zoom;
|
||||||
double fit_width, fit_height;
|
double fit_width, fit_height;
|
||||||
double h_dpi, v_dpi;
|
|
||||||
int use_cropbox;
|
int use_cropbox;
|
||||||
|
double h_dpi, v_dpi;
|
||||||
|
|
||||||
int process_nontext;
|
// output files
|
||||||
int single_html;
|
int single_html;
|
||||||
int split_pages;
|
int split_pages;
|
||||||
|
std::string dest_dir;
|
||||||
|
std::string css_filename;
|
||||||
|
std::string outline_filename;
|
||||||
|
|
||||||
|
// fonts
|
||||||
int embed_base_font;
|
int embed_base_font;
|
||||||
int embed_external_font;
|
int embed_external_font;
|
||||||
|
std::string font_suffix;
|
||||||
int decompose_ligature;
|
int decompose_ligature;
|
||||||
|
int remove_unused_glyph;
|
||||||
|
int auto_hint;
|
||||||
|
std::string external_hint_tool;
|
||||||
|
int stretch_narrow_glyph;
|
||||||
|
int squeeze_wide_glyph;
|
||||||
|
|
||||||
// Advanced tweak
|
// text
|
||||||
/*
|
|
||||||
* Position & Size
|
|
||||||
*/
|
|
||||||
double h_eps, v_eps;
|
double h_eps, v_eps;
|
||||||
double space_threshold;
|
double space_threshold;
|
||||||
double font_size_multiplier;
|
double font_size_multiplier;
|
||||||
|
|
||||||
/*
|
|
||||||
* Font
|
|
||||||
*/
|
|
||||||
int auto_hint;
|
|
||||||
int tounicode;
|
|
||||||
int space_as_offset;
|
int space_as_offset;
|
||||||
int stretch_narrow_glyph;
|
int tounicode;
|
||||||
int squeeze_wide_glyph;
|
|
||||||
int remove_unused_glyph;
|
|
||||||
|
|
||||||
std::string font_suffix, font_format;
|
// encryption
|
||||||
std::string external_hint_tool;
|
std::string owner_password, user_password;
|
||||||
|
int no_drm;
|
||||||
|
|
||||||
/*
|
// misc.
|
||||||
* Output
|
|
||||||
*/
|
|
||||||
std::string css_filename;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Debug
|
|
||||||
*/
|
|
||||||
int debug;
|
|
||||||
int clean_tmp;
|
int clean_tmp;
|
||||||
|
int process_nontext;
|
||||||
// experimental
|
std::string data_dir;
|
||||||
int css_draw;
|
int css_draw;
|
||||||
|
int debug;
|
||||||
|
|
||||||
|
// non-optional
|
||||||
|
std::string input_filename, output_filename;
|
||||||
|
|
||||||
|
// not a paramater
|
||||||
|
std::string tmp_dir;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace pdf2htmlEX
|
} // namespace pdf2htmlEX
|
||||||
|
@ -36,13 +36,8 @@ ArgParser argparser;
|
|||||||
|
|
||||||
void show_usage_and_exit(const char * dummy = nullptr)
|
void show_usage_and_exit(const char * dummy = nullptr)
|
||||||
{
|
{
|
||||||
cerr << "Usage: pdf2htmlEX [Options] <input.pdf> [<output.html>]" << endl;
|
cerr << "Usage: pdf2htmlEX [options] <input.pdf> [<output.html>]" << endl;
|
||||||
cerr << endl;
|
|
||||||
cerr << "Options:" << endl;
|
|
||||||
argparser.show_usage(cerr);
|
argparser.show_usage(cerr);
|
||||||
cerr << endl;
|
|
||||||
cerr << "Run 'man pdf2htmlEX' for detailed information" << endl;
|
|
||||||
cerr << endl;
|
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -53,60 +48,76 @@ void show_version_and_exit(const char * dummy = nullptr)
|
|||||||
cerr << "Libraries: ";
|
cerr << "Libraries: ";
|
||||||
cerr << "poppler " << POPPLER_VERSION << ", ";
|
cerr << "poppler " << POPPLER_VERSION << ", ";
|
||||||
cerr << "libfontforge " << ffw_get_version() << endl;
|
cerr << "libfontforge " << ffw_get_version() << endl;
|
||||||
|
cerr << "Default data-dir: " << PDF2HTMLEX_DATA_PATH << endl;
|
||||||
exit(EXIT_SUCCESS);
|
exit(EXIT_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
void parse_options (int argc, char **argv)
|
void parse_options (int argc, char **argv)
|
||||||
{
|
{
|
||||||
argparser
|
string deprecated_string;
|
||||||
.add("help,h", "show all options", &show_usage_and_exit)
|
|
||||||
.add("version,v", "show copyright and version info", &show_version_and_exit)
|
|
||||||
|
|
||||||
|
argparser
|
||||||
|
// pages
|
||||||
|
.add("first-page,f", ¶m.first_page, 1, "first page to convert")
|
||||||
|
.add("last-page,l", ¶m.last_page, numeric_limits<int>::max(), "last page to convert")
|
||||||
|
|
||||||
|
// dimensions
|
||||||
|
.add("zoom", ¶m.zoom, 0, "zoom ratio", nullptr, true)
|
||||||
|
.add("fit-width", ¶m.fit_width, 0, "fit width to <fp> pixels", nullptr, true)
|
||||||
|
.add("fit-height", ¶m.fit_height, 0, "fit height to <fp> pixels", nullptr, true)
|
||||||
|
.add("use-cropbox", ¶m.use_cropbox, 0, "use CropBox instead of MediaBox")
|
||||||
|
.add("hdpi", ¶m.h_dpi, 144.0, "horizontal resolution for graphics in DPI")
|
||||||
|
.add("vdpi", ¶m.v_dpi, 144.0, "vertical resolution for graphics in DPI")
|
||||||
|
|
||||||
|
// output files
|
||||||
|
.add("single-html", ¶m.single_html, 1, "generate a single HTML file")
|
||||||
|
.add("split-pages", ¶m.split_pages, 0, "split pages into separate files")
|
||||||
|
.add("dest-dir", ¶m.dest_dir, ".", "specify destination directory")
|
||||||
|
.add("css-filename", ¶m.css_filename, "", "filename of the generated css file")
|
||||||
|
.add("outline-filename", ¶m.outline_filename, "", "filename of the generated outline file")
|
||||||
|
|
||||||
|
// fonts
|
||||||
|
.add("embed-base-font", ¶m.embed_base_font, 0, "embed local match for standard 14 fonts")
|
||||||
|
.add("embed-external-font", ¶m.embed_external_font, 0, "embed local match for external fonts")
|
||||||
|
.add("font-suffix", ¶m.font_suffix, ".ttf", "suffix for embedded font files (.ttf,.otf,.woff,.svg)")
|
||||||
|
.add("decompose-ligature", ¶m.decompose_ligature, 0, "decompose ligatures, such as \uFB01 -> fi")
|
||||||
|
.add("remove-unused-glyph", ¶m.remove_unused_glyph, 1, "remove unused glyphs in embedded fonts")
|
||||||
|
.add("auto-hint", ¶m.auto_hint, 0, "use fontforge autohint on fonts without hints")
|
||||||
|
.add("external-hint-tool", ¶m.external_hint_tool, "", "external tool for hinting fonts (overrides --auto-hint)")
|
||||||
|
.add("stretch-narrow-glyph", ¶m.stretch_narrow_glyph, 0, "stretch narrow glyphs instead of padding them")
|
||||||
|
.add("squeeze-wide-glyph", ¶m.squeeze_wide_glyph, 1, "shrink wide glyphs instead of truncating them")
|
||||||
|
|
||||||
|
// text
|
||||||
|
.add("heps", ¶m.h_eps, 1.0, "horizontal threshold for merging text, in pixels")
|
||||||
|
.add("veps", ¶m.v_eps, 1.0, "vertical threshold for merging text, in pixels")
|
||||||
|
.add("space-threshold", ¶m.space_threshold, (1.0/8), "word break threshold (threshold * em)")
|
||||||
|
.add("font-size-multiplier", ¶m.font_size_multiplier, 4.0, "a value greater than 1 increases the rendering accuracy")
|
||||||
|
.add("space-as-offset", ¶m.space_as_offset, 0, "treat space characters as offsets")
|
||||||
|
.add("tounicode", ¶m.tounicode, 0, "how to handle ToUnicode CMaps (0=auto, 1=force, -1=ignore)")
|
||||||
|
|
||||||
|
// encryption
|
||||||
.add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", nullptr, true)
|
.add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", nullptr, true)
|
||||||
.add("user-password,u", ¶m.user_password, "", "user password (for encrypted files)", nullptr, true)
|
.add("user-password,u", ¶m.user_password, "", "user password (for encrypted files)", nullptr, true)
|
||||||
.add("no-drm", ¶m.no_drm, 0, "override document DRM settings")
|
.add("no-drm", ¶m.no_drm, 0, "override document DRM settings")
|
||||||
|
|
||||||
.add("dest-dir", ¶m.dest_dir, ".", "specify destination directory")
|
// misc.
|
||||||
|
.add("clean-tmp", ¶m.clean_tmp, 1, "remove temporary files after conversion")
|
||||||
|
.add("process-nontext", ¶m.process_nontext, 1, "render graphics in addition to text")
|
||||||
.add("data-dir", ¶m.data_dir, PDF2HTMLEX_DATA_PATH, "specify data directory")
|
.add("data-dir", ¶m.data_dir, PDF2HTMLEX_DATA_PATH, "specify data directory")
|
||||||
|
.add("css-draw", ¶m.css_draw, 0, "[experimental and unsupported] CSS drawing")
|
||||||
|
.add("debug", ¶m.debug, 0, "print debugging information")
|
||||||
|
|
||||||
.add("first-page,f", ¶m.first_page, 1, "first page to process")
|
// meta
|
||||||
.add("last-page,l", ¶m.last_page, numeric_limits<int>::max(), "last page to process")
|
.add("version,v", "print copyright and version info", &show_version_and_exit)
|
||||||
|
.add("help,h", "print usage information", &show_usage_and_exit)
|
||||||
|
|
||||||
.add("zoom", ¶m.zoom, 0, "zoom ratio", nullptr, true)
|
|
||||||
.add("fit-width", ¶m.fit_width, 0, "fit width to <arg> pixels", nullptr, true)
|
|
||||||
.add("fit-height", ¶m.fit_height, 0, "fit height to <arg> pixels", nullptr, true)
|
|
||||||
.add("hdpi", ¶m.h_dpi, 144.0, "horizontal DPI for non-text")
|
|
||||||
.add("vdpi", ¶m.v_dpi, 144.0, "vertical DPI for non-text")
|
|
||||||
.add("use-cropbox", ¶m.use_cropbox, 0, "use CropBox instead of MediaBox")
|
|
||||||
|
|
||||||
.add("process-nontext", ¶m.process_nontext, 1, "process nontext objects")
|
|
||||||
.add("single-html", ¶m.single_html, 1, "combine everything into one single HTML file")
|
|
||||||
.add("split-pages", ¶m.split_pages, 0, "split pages into separated files")
|
|
||||||
.add("embed-base-font", ¶m.embed_base_font, 0, "embed local matched font for base 14 fonts in the PDF file")
|
|
||||||
.add("embed-external-font", ¶m.embed_external_font, 0, "embed local matched font for external fonts in the PDF file")
|
|
||||||
.add("decompose-ligature", ¶m.decompose_ligature, 0, "decompose ligatures, for example 'fi' -> 'f''i'")
|
|
||||||
|
|
||||||
.add("heps", ¶m.h_eps, 1.0, "max tolerated horizontal offset (in pixels)")
|
|
||||||
.add("veps", ¶m.v_eps, 1.0, "max tolerated vertical offset (in pixels)")
|
|
||||||
.add("space-threshold", ¶m.space_threshold, (1.0/8), "distance no thiner than (threshold * em) will be considered as a space character")
|
|
||||||
.add("font-size-multiplier", ¶m.font_size_multiplier, 4.0, "setting a value greater than 1 would increase the rendering accuracy")
|
|
||||||
.add("auto-hint", ¶m.auto_hint, 0, "Whether to generate hints for fonts")
|
|
||||||
.add("tounicode", ¶m.tounicode, 0, "Specify how to deal with ToUnicode map, 0 for auto, 1 for forced, -1 for disabled")
|
|
||||||
.add("space-as-offset", ¶m.space_as_offset, 0, "treat space characters as offsets")
|
|
||||||
.add("stretch-narrow-glyph", ¶m.stretch_narrow_glyph, 0, "stretch narrow glyphs instead of padding space")
|
|
||||||
.add("squeeze-wide-glyph", ¶m.squeeze_wide_glyph, 1, "squeeze wide glyphs instead of truncating")
|
|
||||||
.add("remove-unused-glyph", ¶m.remove_unused_glyph, 1, "remove unused glyphs in embedded fonts")
|
|
||||||
|
|
||||||
.add("font-suffix", ¶m.font_suffix, ".ttf", "suffix for extracted font files")
|
|
||||||
.add("font-format", ¶m.font_format, "opentype", "format for extracted font files")
|
|
||||||
.add("external-hint-tool", ¶m.external_hint_tool, "", "external tool for hintting fonts.(overrides --auto-hint)")
|
|
||||||
.add("css-filename", ¶m.css_filename, "", "Specify the file name of the generated css file")
|
|
||||||
|
|
||||||
.add("debug", ¶m.debug, 0, "output debug information")
|
|
||||||
.add("clean-tmp", ¶m.clean_tmp, 1, "clean temporary files after processing")
|
|
||||||
.add("css-draw", ¶m.css_draw, 0, "[Experimental and Unsupported] CSS Drawing")
|
|
||||||
.add("", ¶m.input_filename, "", "")
|
.add("", ¶m.input_filename, "", "")
|
||||||
.add("", ¶m.output_filename, "", "")
|
.add("", ¶m.output_filename, "", "")
|
||||||
|
|
||||||
|
// deprecated
|
||||||
|
.add("font-format", &deprecated_string, "", "", [] (const char*) {
|
||||||
|
cerr << "warning: --font-format is deprecated, @font-face format is inferred from --font-suffix" << endl;
|
||||||
|
})
|
||||||
;
|
;
|
||||||
|
|
||||||
try
|
try
|
||||||
@ -140,8 +151,7 @@ int main(int argc, char **argv)
|
|||||||
parse_options(argc, argv);
|
parse_options(argc, argv);
|
||||||
if (param.input_filename == "")
|
if (param.input_filename == "")
|
||||||
{
|
{
|
||||||
cerr << "Missing input filename" << endl;
|
show_usage_and_exit();
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//prepare the directories
|
//prepare the directories
|
||||||
@ -202,7 +212,7 @@ int main(int argc, char **argv)
|
|||||||
param.first_page = min<int>(max<int>(param.first_page, 1), doc->getNumPages());
|
param.first_page = min<int>(max<int>(param.first_page, 1), doc->getNumPages());
|
||||||
param.last_page = min<int>(max<int>(param.last_page, param.first_page), doc->getNumPages());
|
param.last_page = min<int>(max<int>(param.last_page, param.first_page), doc->getNumPages());
|
||||||
|
|
||||||
if(param.output_filename == "")
|
if(param.output_filename.empty())
|
||||||
{
|
{
|
||||||
const string s = get_filename(param.input_filename);
|
const string s = get_filename(param.input_filename);
|
||||||
|
|
||||||
@ -223,7 +233,7 @@ int main(int argc, char **argv)
|
|||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(param.css_filename == "")
|
if(param.css_filename.empty())
|
||||||
{
|
{
|
||||||
const string s = get_filename(param.input_filename);
|
const string s = get_filename(param.input_filename);
|
||||||
|
|
||||||
@ -237,6 +247,21 @@ int main(int argc, char **argv)
|
|||||||
param.css_filename = s + ".css";
|
param.css_filename = s + ".css";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if(param.outline_filename.empty())
|
||||||
|
{
|
||||||
|
const string s = get_filename(param.input_filename);
|
||||||
|
|
||||||
|
if(get_suffix(param.input_filename) == ".pdf")
|
||||||
|
{
|
||||||
|
param.outline_filename = s.substr(0, s.size() - 4) + ".outline";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if(!param.split_pages)
|
||||||
|
param.outline_filename = s + ".outline";
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
HTMLRenderer * htmlOut = new HTMLRenderer(¶m);
|
HTMLRenderer * htmlOut = new HTMLRenderer(¶m);
|
||||||
htmlOut->process(doc);
|
htmlOut->process(doc);
|
||||||
|
@ -76,7 +76,7 @@ void ArgParser::parse(int argc, char ** argv) const
|
|||||||
int v = p->shortname;
|
int v = p->shortname;
|
||||||
if(!(opt_map.insert(make_pair(v, p)).second))
|
if(!(opt_map.insert(make_pair(v, p)).second))
|
||||||
{
|
{
|
||||||
cerr << "Warning: duplicated shortname '" << v << "' used by -" << (char)(p->shortname) << " and -" << (char)(opt_map[p->shortname]->shortname) << endl;
|
cerr << "Warning: duplicated shortname: " << v << endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -93,7 +93,7 @@ void ArgParser::parse(int argc, char ** argv) const
|
|||||||
}
|
}
|
||||||
if(!(opt_map.insert(make_pair(v, p)).second))
|
if(!(opt_map.insert(make_pair(v, p)).second))
|
||||||
{
|
{
|
||||||
cerr << "Warning: duplicated shortname '" << v << "' used by --" << (p->name) << " and --" << (opt_map[p->shortname]->name) << endl;
|
cerr << "Warning: duplicated long name: " << (p->name) << endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -146,6 +146,10 @@ void ArgParser::show_usage(ostream & out) const
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<> const char * ArgParser::get_type_name<int> (void) { return "int"; }
|
||||||
|
template<> const char * ArgParser::get_type_name<double> (void) { return "fp"; }
|
||||||
|
template<> const char * ArgParser::get_type_name<string> (void) { return "string"; }
|
||||||
|
|
||||||
ArgParser::ArgEntryBase::ArgEntryBase(const char * name, const char * description, bool need_arg)
|
ArgParser::ArgEntryBase::ArgEntryBase(const char * name, const char * description, bool need_arg)
|
||||||
: shortname(0), name(name), description(description), need_arg(need_arg)
|
: shortname(0), name(name), description(description), need_arg(need_arg)
|
||||||
{
|
{
|
||||||
@ -159,11 +163,11 @@ ArgParser::ArgEntryBase::ArgEntryBase(const char * name, const char * descriptio
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
cerr << "Warning: argument '" << this->name << "' may not be parsed correctly" << endl;
|
cerr << "Warning: argument '" << this->name << "' cannnot be parsed as a short option" << endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const int ArgParser::arg_col_width = 40;
|
const int ArgParser::arg_col_width = 31;
|
||||||
|
|
||||||
} // namespace pdf2htmlEX
|
} // namespace pdf2htmlEX
|
||||||
|
@ -39,7 +39,6 @@ void dump_value(std::ostream & out, const T & v)
|
|||||||
|
|
||||||
extern void dump_value(std::ostream & out, const std::string & v);
|
extern void dump_value(std::ostream & out, const std::string & v);
|
||||||
|
|
||||||
|
|
||||||
class ArgParser
|
class ArgParser
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
@ -48,12 +47,14 @@ class ArgParser
|
|||||||
typedef void (*ArgParserCallBack) (const char * arg);
|
typedef void (*ArgParserCallBack) (const char * arg);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* optname: name of the argment, should be provided as --optname
|
* The 1st is for arg without arguments (i.e. flags), and the 2nd is for general args.
|
||||||
* description: if description is "", the argument won't be shown in show_usage()
|
* optname:
|
||||||
|
* - if not nullptr, it should be the name of the arg, should be in the format of "<long name>[,<short char>]", e.g. "help,h"
|
||||||
|
* - if nullptr, it denotes an optional arg, and description will be ignored
|
||||||
|
* description:
|
||||||
|
* - if description is nullptr or "", the argument won't be shown in show_usage()
|
||||||
*/
|
*/
|
||||||
|
|
||||||
ArgParser & add(const char * optname, const char * description, ArgParserCallBack callback = nullptr);
|
ArgParser & add(const char * optname, const char * description, ArgParserCallBack callback = nullptr);
|
||||||
|
|
||||||
template <class T, class Tv>
|
template <class T, class Tv>
|
||||||
ArgParser & add(const char * optname, T * location, const Tv & default_value, const char * description, ArgParserCallBack callback = nullptr, bool dont_show_default = false);
|
ArgParser & add(const char * optname, T * location, const Tv & default_value, const char * description, ArgParserCallBack callback = nullptr, bool dont_show_default = false);
|
||||||
|
|
||||||
@ -61,9 +62,14 @@ class ArgParser
|
|||||||
void show_usage(std::ostream & out) const;
|
void show_usage(std::ostream & out) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
// type names helper
|
||||||
|
template<class>
|
||||||
|
static const char * get_type_name(void) { return "unknown"; }
|
||||||
|
|
||||||
class ArgEntryBase
|
class ArgEntryBase
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
/* name or description cannot be nullptr */
|
||||||
ArgEntryBase(const char * name, const char * description, bool need_arg);
|
ArgEntryBase(const char * name, const char * description, bool need_arg);
|
||||||
virtual ~ArgEntryBase() { }
|
virtual ~ArgEntryBase() { }
|
||||||
char shortname;
|
char shortname;
|
||||||
@ -101,15 +107,25 @@ class ArgParser
|
|||||||
template<class T, class Tv>
|
template<class T, class Tv>
|
||||||
ArgParser & ArgParser::add(const char * optname, T * location, const Tv & default_value, const char * description, ArgParserCallBack callback, bool dont_show_default)
|
ArgParser & ArgParser::add(const char * optname, T * location, const Tv & default_value, const char * description, ArgParserCallBack callback, bool dont_show_default)
|
||||||
{
|
{
|
||||||
// use "" in case nullptr is provided
|
// ArgEntry does not accept nullptr as optname nor description
|
||||||
if((!optname) || (!optname[0]))
|
if((!optname) || (!optname[0]))
|
||||||
|
{
|
||||||
|
// when optname is nullptr or "", it's optional, and description is dropped
|
||||||
optional_arg_entries.push_back(new ArgEntry<T, Tv>("", location, default_value, callback, "", dont_show_default));
|
optional_arg_entries.push_back(new ArgEntry<T, Tv>("", location, default_value, callback, "", dont_show_default));
|
||||||
|
}
|
||||||
else
|
else
|
||||||
arg_entries.push_back(new ArgEntry<T, Tv>(optname, location, default_value, callback, description, dont_show_default));
|
{
|
||||||
|
arg_entries.push_back(new ArgEntry<T, Tv>(optname, location, default_value, callback, (description ? description : ""), dont_show_default));
|
||||||
|
}
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Known types
|
||||||
|
template<> const char * ArgParser::get_type_name<int> (void);
|
||||||
|
template<> const char * ArgParser::get_type_name<double> (void);
|
||||||
|
template<> const char * ArgParser::get_type_name<std::string> (void);
|
||||||
|
|
||||||
template<class T, class Tv>
|
template<class T, class Tv>
|
||||||
ArgParser::ArgEntry<T, Tv>::ArgEntry(const char * name, T * location, const Tv & default_value, ArgParserCallBack callback, const char * description, bool dont_show_default)
|
ArgParser::ArgEntry<T, Tv>::ArgEntry(const char * name, T * location, const Tv & default_value, ArgParserCallBack callback, const char * description, bool dont_show_default)
|
||||||
: ArgEntryBase(name, description, (location != nullptr))
|
: ArgEntryBase(name, description, (location != nullptr))
|
||||||
@ -141,7 +157,7 @@ void ArgParser::ArgEntry<T, Tv>::parse(const char * arg) const
|
|||||||
template<class T, class Tv>
|
template<class T, class Tv>
|
||||||
void ArgParser::ArgEntry<T, Tv>::show_usage(std::ostream & out) const
|
void ArgParser::ArgEntry<T, Tv>::show_usage(std::ostream & out) const
|
||||||
{
|
{
|
||||||
if(description == "")
|
if(description.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
std::ostringstream sout;
|
std::ostringstream sout;
|
||||||
@ -161,13 +177,7 @@ void ArgParser::ArgEntry<T, Tv>::show_usage(std::ostream & out) const
|
|||||||
|
|
||||||
if(need_arg)
|
if(need_arg)
|
||||||
{
|
{
|
||||||
sout << " <arg>";
|
sout << " <" << get_type_name<T>() << ">";
|
||||||
if(!dont_show_default)
|
|
||||||
{
|
|
||||||
sout << " (=";
|
|
||||||
dump_value(sout, default_value);
|
|
||||||
sout << ")";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string s = sout.str();
|
std::string s = sout.str();
|
||||||
@ -176,7 +186,16 @@ void ArgParser::ArgEntry<T, Tv>::show_usage(std::ostream & out) const
|
|||||||
for(int i = s.size(); i < arg_col_width; ++i)
|
for(int i = s.size(); i < arg_col_width; ++i)
|
||||||
out << ' ';
|
out << ' ';
|
||||||
|
|
||||||
out << " " << description << std::endl;
|
out << " " << description;
|
||||||
|
|
||||||
|
if(need_arg && !dont_show_default)
|
||||||
|
{
|
||||||
|
out << " (default: ";
|
||||||
|
dump_value(out, default_value);
|
||||||
|
out << ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
out << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace ArgParser
|
} // namespace ArgParser
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
namespace pdf2htmlEX {
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
static inline double round(double x) { return (std::abs(x) > EPS) ? x : 0.0; }
|
static inline double round(double x) { return (std::abs(x) > EPS) ? x : 0.0; }
|
||||||
static inline bool equal(double x, double y) { return std::abs(x-y) < EPS; }
|
static inline bool equal(double x, double y) { return std::abs(x-y) <= EPS; }
|
||||||
static inline bool is_positive(double x) { return x > EPS; }
|
static inline bool is_positive(double x) { return x > EPS; }
|
||||||
static inline bool tm_equal(const double * tm1, const double * tm2, int size = 6)
|
static inline bool tm_equal(const double * tm1, const double * tm2, int size = 6)
|
||||||
{
|
{
|
||||||
|
@ -33,6 +33,9 @@ Unicode unicode_from_font (CharCode code, GfxFont * font);
|
|||||||
*/
|
*/
|
||||||
Unicode check_unicode(Unicode * u, int len, CharCode code, GfxFont * font);
|
Unicode check_unicode(Unicode * u, int len, CharCode code, GfxFont * font);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Escape necessary characters, and map Unicode to UTF-8
|
||||||
|
*/
|
||||||
void outputUnicodes(std::ostream & out, const Unicode * u, int uLen);
|
void outputUnicodes(std::ostream & out, const Unicode * u, int uLen);
|
||||||
|
|
||||||
|
|
||||||
|
@ -13,7 +13,7 @@ with open('out.html','w') as outf:
|
|||||||
if not f.lower().endswith('.pdf'):
|
if not f.lower().endswith('.pdf'):
|
||||||
continue
|
continue
|
||||||
print f
|
print f
|
||||||
if os.system('pdf2htmlEX --dest-dir html --auto-hint=1 --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) != 0:
|
if os.system('pdf2htmlEX -l 7 --dest-dir html --auto-hint=1 --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) != 0:
|
||||||
print "error on ", f
|
print "error on ", f
|
||||||
sys.exit(-1)
|
sys.exit(-1)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user