mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 13:00:08 +00:00
Merge branch 'master' of git://github.com/coolwanglu/pdf2htmlEX
Conflicts: src/HTMLRenderer/general.cc src/HTMLRenderer/state.cc
This commit is contained in:
commit
6973760a85
@ -1,5 +1,9 @@
|
||||
Latest v0.7
|
||||
|
||||
* Process outline
|
||||
* Fix build with poppler
|
||||
* Many code cleaning jobs [John Hewson]
|
||||
|
||||
v0.6
|
||||
2013.01.26
|
||||
|
||||
|
89
README.md
89
README.md
@ -5,39 +5,28 @@
|
||||
A beautiful demo is worth a thousand words:
|
||||
|
||||
- [**Typography**](http://coolwanglu.github.com/pdf2htmlEX/demo/geneve.html) [Original](https://github.com/raphink/geneve_1564/raw/master/geneve_1564.pdf)
|
||||
|
||||
- [**Full Circle Magazine(large)**](http://coolwanglu.github.com/pdf2htmlEX/demo/issue65_en.html) [Sample](http://coolwanglu.github.com/pdf2htmlEX/demo/issue65_en_sample.html) [Original](http://dl.fullcirclemagazine.org/issue65_en.pdf)
|
||||
|
||||
- [**Formulas**](http://coolwanglu.github.com/pdf2htmlEX/demo/cheat.html) [Original](http://www.tug.org/texshowcase/cheat.pdf)
|
||||
|
||||
- [**Scientific Paper**](http://coolwanglu.github.com/pdf2htmlEX/demo/demo.html) [Original](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.148.349&rep=rep1&type=pdf)
|
||||
|
||||
- [**Chinese**](http://coolwanglu.github.com/pdf2htmlEX/demo/chn.html) [Original](http://files.cnblogs.com/phphuaibei/git%E6%90%AD%E5%BB%BA.pdf)
|
||||
- Try your own files: [MediaFire](http://www.mediafire.com), a free online storage service, now uses pdf2htmlEX for its PDF preview feature.
|
||||
|
||||
## Introduction
|
||||
|
||||
pdf2htmlEX renders PDF files in HTML, utilizing modern Web technologies, aims to provide an accuracy rendering, while keeping optimized for Web display.
|
||||
pdf2htmlEX renders PDF files in HTML, utilizing modern Web technologies.
|
||||
It aims to provide an accuracy rendering, while keeping optimized for Web display.
|
||||
|
||||
It is optimized for modern web browsers. On Linux/Mac, the generated HTML pages could be as beautiful as PDF files.
|
||||
pdf2htmlEX is best for text-based PDF files, for example scientific papers with complicated formulas and figures.
|
||||
Text, fonts and formats are natively perserved in HTML such that you can still search and copy.
|
||||
The generated HTML file is static, Javascript is not required.
|
||||
|
||||
This program is designed for scientific papers with complicate formulas and figures, therefore precise rendering is the #1 concern. But of course general PDF files are also supported.
|
||||
|
||||
### Why HTML ?
|
||||
|
||||
HTML, together with CSS and Javascript, is much more open and flexible than PDF. Almost everything can be customized.
|
||||
- Embedding documents to web pages with consistent theme and behavior
|
||||
- Cross references to other documents are much easier and intuitive
|
||||
- More functions to the document with Javascript, e.g. access control, animation, statistics
|
||||
|
||||
Readers can also be benefitted
|
||||
- Read while downloading
|
||||
- Plugin-free
|
||||
[More info](https://github.com/coolwanglu/pdf2htmlEX/wiki/Introduction)
|
||||
|
||||
## Features
|
||||
|
||||
* Optional single HTML file output
|
||||
* Precise rendering
|
||||
* Text perserved - you can select & copy & paste
|
||||
* Native text in HTML - you can select & copy & search
|
||||
* Proper styling
|
||||
- Font - extracted and reencoded
|
||||
- Color
|
||||
@ -54,32 +43,16 @@ Readers can also be benefitted
|
||||
|
||||
## Get started
|
||||
|
||||
### Ubuntu
|
||||
### Install
|
||||
|
||||
[PPA](https://launchpad.net/~coolwanglu/+archive/pdf2htmlex), which is not so up-to-date.
|
||||
|
||||
### ArchLinux
|
||||
|
||||
[AUR Package](https://aur.archlinux.org/packages.php?ID=62426), special thanks to Arthur Titeica <arthur.titeica@gmail.com>
|
||||
|
||||
### Gentoo
|
||||
|
||||
Install through Overlay gentoo-zh, mrueg or sunrise, thanks to the packagers.
|
||||
|
||||
### Mac
|
||||
|
||||
[Homebrew Formula](https://github.com/jamiely/homebrew/blob/pdf2htmlex/Library/Formula/pdf2htmlex.rb), special thanks to Jamie Ly <me@jamie.ly>
|
||||
|
||||
[Macports (local repo)](https://github.com/iapain/pdf2htmlEX-macport), special thanks to Deepak Thukral <iapain@iapa.in>
|
||||
|
||||
### Windows
|
||||
|
||||
The code may be built with Cygwin.
|
||||
|
||||
Or with MinGW with some modifications.
|
||||
|
||||
More info can be found on [the pdf2htmlEX page in TeX Wiki](http://oku.edu.mie-u.ac.jp/~okumura/texwiki/?pdf2htmlEX) (in Japanese), special thanks to Haruhiko Okumura
|
||||
Thanks to all packagers!
|
||||
|
||||
* [Ubuntu PPA](https://launchpad.net/~coolwanglu/+archive/pdf2htmlex) by Lu Wang <coolwanglu@gmail.com>, not always up-to-date.
|
||||
* [ArchLinux AUR](https://aur.archlinux.org/packages.php?ID=62426) by Arthur Titeica <arthur.titeica@gmail.com>
|
||||
* [Gentoo Overlay](http://gpo.zugaina.org/app-text/pdf2htmlex), gentoo-zh, mrueg or sunrise, by respective packagers.
|
||||
* [Homebrew Formula](https://github.com/jamiely/homebrew/blob/pdf2htmlex/Library/Formula/pdf2htmlex.rb) by Jamie Ly <me@jamie.ly>
|
||||
* [Macports (local repo)](https://github.com/iapain/pdf2htmlEX-macport) by Deepak Thukral <iapain@iapa.in>
|
||||
* Windows [N/A](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ#wiki-install-windows)
|
||||
|
||||
### Build from source
|
||||
|
||||
@ -97,6 +70,10 @@ More info can be found on [the pdf2htmlEX page in TeX Wiki](http://oku.edu.mie-u
|
||||
* git version is recommended to avoid annoying compilation issues
|
||||
* [Optional] **ttfautohint**
|
||||
* run pdf2htmlEX with **--external-hint-tool=ttfautohint** to enable it
|
||||
* [For Windows]
|
||||
* Cygwin
|
||||
* or MinGW, with some modifications to pdf2htmlEX. See [pdf2htmlEX on TeX Wiki](http://oku.edu.mie-u.ac.jp/~okumura/texwiki/?pdf2htmlEX) (in Japanese), special thanks to Haruhiko Okumura
|
||||
|
||||
|
||||
#### Compiling
|
||||
|
||||
@ -107,18 +84,15 @@ More info can be found on [the pdf2htmlEX page in TeX Wiki](http://oku.edu.mie-u
|
||||
## Usage
|
||||
|
||||
pdf2htmlEX /path/to/foobar.pdf
|
||||
|
||||
pdf2htmlEX --help
|
||||
|
||||
man pdf2htmlEX
|
||||
|
||||
## FAQ
|
||||
|
||||
* [Troubleshooting compilation errors](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ#wiki-compile)
|
||||
* [The demo pages are ugly](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ#wiki-ugly)
|
||||
* [How can I help](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ#wiki-help)
|
||||
* [I want more features](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ#wiki-feature_commission)
|
||||
* [More](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ)
|
||||
* [More about pdf2htmlEX](https://github.com/coolwanglu/pdf2htmlEX/wiki/)
|
||||
|
||||
## LICENSE
|
||||
|
||||
@ -132,6 +106,15 @@ GPLv2 & GPLv3 Dual licensed
|
||||
|
||||
### [**Donate Now**](http://coolwanglu.github.com/pdf2htmlEX/donate.html)
|
||||
|
||||
## Contact
|
||||
|
||||
* Mailing list <pdf2htmlex@googlegroups.com>
|
||||
* You might want to try these useful resources first: `man pdf2htmlEX`, [wiki](https://github.com/coolwanglu/pdf2htmlEX/wiki) and [FAQ](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ)
|
||||
|
||||
* Lu Wang <coolwanglu@gmail.com>
|
||||
* For personal enquiries only
|
||||
* Accepting messages in **Chinese**, **English** or **Japanese**.
|
||||
|
||||
## Acknowledge
|
||||
|
||||
pdf2htmlEX is made possible thanks to the following projects:
|
||||
@ -148,18 +131,6 @@ pdf2htmlEX is inspired by the following projects:
|
||||
* Crocodoc
|
||||
* Google Doc
|
||||
|
||||
|
||||
## Contact
|
||||
|
||||
* Mailing list <pdf2htmlex@googlegroups.com>
|
||||
* Please read [**FAQ**](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ) before sending emails. Or your message might be ignored.
|
||||
* Please use the **latest master branch**.
|
||||
|
||||
* Lu Wang <coolwanglu@gmail.com>
|
||||
* Please use the mailing list above unless for personal enquiries.
|
||||
* Accepting messages in **Chinese**, **English** or **Japanese**.
|
||||
|
||||
|
||||
### Special Thanks
|
||||
|
||||
* Hongliang Tian <tatetian@gmail.com>
|
||||
|
6
debian/changelog
vendored
6
debian/changelog
vendored
@ -1,3 +1,9 @@
|
||||
pdf2htmlex (0.7-1~git201301292229r2595c-0ubuntu1) quantal; urgency=low
|
||||
|
||||
* Fixed a CSS issue
|
||||
|
||||
-- WANG Lu <coolwanglu@gmail.com> Tue, 29 Jan 2013 22:29:21 +0800
|
||||
|
||||
pdf2htmlex (0.7-1~git201301282229r2595c-0ubuntu1) quantal; urgency=low
|
||||
|
||||
* Process PDF Outline
|
||||
|
@ -23,9 +23,7 @@ Other objects are rendered as images and also embedded.
|
||||
|
||||
.SH OPTIONS
|
||||
|
||||
|
||||
.TP
|
||||
.B Pages
|
||||
.SS Pages
|
||||
|
||||
.TP
|
||||
.B -f, --first-page <num> (Default: 1)
|
||||
@ -35,9 +33,7 @@ Specify the first page to process
|
||||
.B -l, --last-page <num> (Default: last page)
|
||||
Specify the last page to process
|
||||
|
||||
|
||||
.TP
|
||||
.B Dimensions
|
||||
.SS Dimensions
|
||||
|
||||
.B --zoom <ratio>, --fit-width <width>, --fit-height <height>
|
||||
--zoom specifies the zoom factor directly; --fit-width/height specifies the maximum width/height of a page, the values are in pixels.
|
||||
@ -55,8 +51,7 @@ Use CropBox instead of MediaBox for output.
|
||||
Specify the horizontal and vertical DPI for images
|
||||
|
||||
|
||||
.TP
|
||||
.B Output Files
|
||||
.SS Output
|
||||
|
||||
.TP
|
||||
.B --single-html <0|1> (Default: 1)
|
||||
@ -92,9 +87,15 @@ Specify the filename of the generated outline file, if not embedded.
|
||||
|
||||
If it's empty, the file name will be determined automatically.
|
||||
|
||||
.TP
|
||||
.B --process-nontext <0|1> (Default: 1)
|
||||
Whether to process non-text objects (as images)
|
||||
|
||||
.TP
|
||||
.B Fonts
|
||||
.B --process-outline <0|1> (Default: 0)
|
||||
Whether to show outline in the generated HTML
|
||||
|
||||
.SS Fonts
|
||||
|
||||
.TP
|
||||
.B --embed-base-font <0|1> (Default: 1)
|
||||
@ -140,9 +141,7 @@ If set to 1, glyphs narrower than described in PDF will be stretched; otherwise
|
||||
.B --squeeze-wide-glyph <0|1> (Default: 1)
|
||||
If set to 1, glyphs wider than described in PDF will be squeezed; otherwise it will be truncated.
|
||||
|
||||
|
||||
.TP
|
||||
.B Text
|
||||
.SS Text
|
||||
|
||||
.TP
|
||||
.B --heps <len>, --veps <len> (Default: 1)
|
||||
@ -178,9 +177,7 @@ If set to -1, a customized map is used such that rendering will be correct in HT
|
||||
|
||||
If set to 0, pdf2htmlEX would try its best to balance the two methods above.
|
||||
|
||||
|
||||
.TP
|
||||
.B Encryption
|
||||
.SS PDF Protection
|
||||
|
||||
.TP
|
||||
.B -o, --owner-password <password>
|
||||
@ -194,21 +191,15 @@ Specify user password
|
||||
.B --no-drm <0|1> (Default: 0)
|
||||
Override document DRM settings
|
||||
|
||||
|
||||
.TP
|
||||
.B Misc.
|
||||
.SS Misc.
|
||||
|
||||
.TP
|
||||
.B --clean-tmp <0|1> (Default: 1)
|
||||
If switched off, intermediate files won't be cleaned in the end.
|
||||
|
||||
.TP
|
||||
.B --process-nontext <0|1> (Default: 1)
|
||||
Whether to process non-text objects (as images)
|
||||
|
||||
.TP
|
||||
.B --data-dir <dir> (Default: @CMAKE_INSTALL_PREFIX@/share/pdf2htmlEX)
|
||||
Specify the folder holding the manifest and other files
|
||||
Specify the folder holding the manifest and other files (see below for the manifest file)`
|
||||
|
||||
.TP
|
||||
.B --css-draw <0|1> (Default: 0)
|
||||
@ -218,9 +209,7 @@ Experimental and unsupported CSS drawing
|
||||
.B --debug <0|1> (Default: 0)
|
||||
Print debug information.
|
||||
|
||||
|
||||
.TP
|
||||
.B Meta
|
||||
.SS Meta
|
||||
|
||||
.TP
|
||||
.B -v, --version
|
||||
@ -230,6 +219,14 @@ Print copyright and version info
|
||||
.B --help
|
||||
Print usage information
|
||||
|
||||
.SH MANIFEST and DATA-DIR
|
||||
When split-pages is 0, the manifest file describes how the final html page should be generated.
|
||||
|
||||
By default, pdf2htmlEX will use the manifest in the default data-dir (run `pdf2htmlEX -v` to check), which gives a simple demo of its syntax.
|
||||
|
||||
You can modify the default one, or you can create a new one and specify the correct data-dir in the command line.
|
||||
|
||||
When single-html is 1, all files referred by the manifest must be located in the data-dir.
|
||||
|
||||
.SH EXAMPLE
|
||||
.TP
|
||||
|
@ -1,4 +1,4 @@
|
||||
# manifest
|
||||
# pdf2htmlEX manifest
|
||||
# by WangLu
|
||||
# 2012.09.12
|
||||
#
|
||||
@ -21,39 +21,53 @@
|
||||
<meta charset="utf-8">
|
||||
<meta name="generator" content="pdf2htmlEX"/>
|
||||
"""
|
||||
|
||||
# base CSS styles
|
||||
@base.css
|
||||
|
||||
# PDF specific CSS styles
|
||||
$css
|
||||
|
||||
# necessary Javascript codes
|
||||
@jquery.js
|
||||
@pdf2htmlEX.js
|
||||
|
||||
# entry point of pdf2htmlEX
|
||||
"""
|
||||
<script type="text/javascript">
|
||||
new pdf2htmlEX.Viewer('pdf-main', 'pdf-outline');
|
||||
</script>
|
||||
"""
|
||||
|
||||
"""
|
||||
<title></title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="pdf-outline" class="opened">
|
||||
"""
|
||||
|
||||
# The container of outline
|
||||
# By default this is hidden, pdf2htmlEX.js will add the 'opened' class if it is not empty
|
||||
# You can add a class 'opened' here if you want it always opened or you don't use pdf2htmlEX.js
|
||||
# e.g.
|
||||
# <div id="pdf-outline" class="opened">
|
||||
"""
|
||||
<div id="pdf-outline">
|
||||
"""
|
||||
$outline
|
||||
|
||||
"""
|
||||
</div>
|
||||
"""
|
||||
|
||||
# The container of PDF pages
|
||||
# check base.css for an example and requirements of its CSS styles
|
||||
"""
|
||||
<div id="pdf-main">
|
||||
"""
|
||||
|
||||
# PDF pages
|
||||
$pages
|
||||
|
||||
"""
|
||||
</div>
|
||||
"""
|
||||
|
||||
|
||||
"""
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
@ -118,8 +118,8 @@ var pdf2htmlEX = (function(){
|
||||
this.container = $('#'+this.container_id);
|
||||
|
||||
// need a better design
|
||||
if(this.outline.children().length == 0) {
|
||||
this.outline.toggleClass('opened');
|
||||
if(this.outline.children().length > 0) {
|
||||
this.outline.addClass('opened');
|
||||
}
|
||||
|
||||
var new_pages = new Array();
|
||||
|
@ -156,7 +156,7 @@ class HTMLRenderer : public OutputDev
|
||||
virtual void endPage();
|
||||
|
||||
/*
|
||||
* To optmize false alarms
|
||||
* To optimize false alarms
|
||||
* We just mark as changed, and recheck if they have been changed when we are about to output a new string
|
||||
*/
|
||||
|
||||
@ -273,11 +273,13 @@ class HTMLRenderer : public OutputDev
|
||||
////////////////////////////////////////////////////
|
||||
// state tracking
|
||||
////////////////////////////////////////////////////
|
||||
// reset all states
|
||||
void reset_state();
|
||||
// reset all ***_changed flags
|
||||
void reset_state_change();
|
||||
// check updated states, and determine new_line_stauts
|
||||
// make sure this function can be called several times consecutively without problem
|
||||
void check_state_change(GfxState * state);
|
||||
// reset all ***_changed flags
|
||||
void reset_state_change();
|
||||
// prepare the line context, (close old tags, open new tags)
|
||||
// make sure the current HTML style consistent with PDF
|
||||
void prepare_text_line(GfxState * state);
|
||||
@ -393,7 +395,7 @@ class HTMLRenderer : public OutputDev
|
||||
// we try to render the final font size directly
|
||||
// to reduce the effect of ctm as much as possible
|
||||
|
||||
// draw_ctm is cur_ctm scaled by 1/draw_text_scale,
|
||||
// draw_text_tm is cur_text_tm scaled by 1/draw_text_scale,
|
||||
// so everything redenered should be multiplied by draw_text_scale
|
||||
double draw_text_tm[6];
|
||||
double draw_font_size;
|
||||
|
@ -18,6 +18,9 @@
|
||||
|
||||
namespace pdf2htmlEX {
|
||||
|
||||
using std::cerr;
|
||||
using std::endl;
|
||||
|
||||
void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suffix, GfxFont * font)
|
||||
{
|
||||
string mime_type, format;
|
||||
@ -46,6 +49,10 @@ void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suff
|
||||
format = "svg";
|
||||
mime_type = "image/svg+xml";
|
||||
}
|
||||
else
|
||||
{
|
||||
cerr << "Warning: unknown font suffix: " << suffix << endl;
|
||||
}
|
||||
|
||||
f_css.fs << "@font-face{"
|
||||
<< "font-family:f" << info.id << ";"
|
||||
|
@ -125,6 +125,7 @@ void HTMLRenderer::process(PDFDoc *doc)
|
||||
|
||||
////////////////////////
|
||||
// Process Outline
|
||||
if(param->process_outline)
|
||||
process_outline();
|
||||
|
||||
post_process();
|
||||
@ -183,35 +184,8 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
|
||||
}
|
||||
|
||||
f_pages.fs << "\">";
|
||||
draw_text_scale = 1.0;
|
||||
|
||||
cur_font_info = install_font(nullptr);
|
||||
cur_font_size = draw_font_size = 0;
|
||||
cur_fs_id = install_font_size(cur_font_size);
|
||||
|
||||
memcpy(cur_text_tm, ID_MATRIX, sizeof(cur_text_tm));
|
||||
memcpy(draw_text_tm, ID_MATRIX, sizeof(draw_text_tm));
|
||||
cur_ttm_id = install_transform_matrix(draw_text_tm);
|
||||
|
||||
cur_letter_space = cur_word_space = 0;
|
||||
cur_ls_id = install_letter_space(cur_letter_space);
|
||||
cur_ws_id = install_word_space(cur_word_space);
|
||||
|
||||
cur_fill_color.r = cur_fill_color.g = cur_fill_color.b = 0;
|
||||
cur_stroke_color.r = cur_stroke_color.g = cur_stroke_color.b = 0;
|
||||
cur_fill_color_id = install_fill_color(&cur_fill_color);
|
||||
cur_stroke_color_id = install_stroke_color(&cur_stroke_color);
|
||||
cur_has_stroke = false;
|
||||
cur_has_fill = true;
|
||||
|
||||
cur_rise = 0;
|
||||
cur_rise_id = install_rise(cur_rise);
|
||||
|
||||
cur_tx = cur_ty = 0;
|
||||
draw_tx = draw_ty = 0;
|
||||
|
||||
reset_state_change();
|
||||
all_changed = true;
|
||||
reset_state();
|
||||
}
|
||||
|
||||
void HTMLRenderer::endPage() {
|
||||
@ -396,7 +370,9 @@ void HTMLRenderer::post_process()
|
||||
continue;
|
||||
}
|
||||
|
||||
if(line.empty() || line[0] == '#')
|
||||
if(line.empty()
|
||||
|| (line.find_first_not_of(' ') == string::npos)
|
||||
|| line[0] == '#')
|
||||
continue;
|
||||
|
||||
|
||||
|
@ -304,7 +304,7 @@ long long HTMLRenderer::install_whitespace(double ws_width, double & actual_widt
|
||||
{
|
||||
// ws_width is already mulitpled by draw_scale
|
||||
auto iter = whitespace_map.lower_bound(ws_width - param->h_eps);
|
||||
if((iter != whitespace_map.end()) && (abs(iter->first - ws_width) < param->h_eps))
|
||||
if((iter != whitespace_map.end()) && (abs(iter->first - ws_width) <= param->h_eps))
|
||||
{
|
||||
actual_width = iter->first;
|
||||
return iter->second;
|
||||
@ -320,7 +320,7 @@ long long HTMLRenderer::install_whitespace(double ws_width, double & actual_widt
|
||||
long long HTMLRenderer::install_rise(double rise)
|
||||
{
|
||||
auto iter = rise_map.lower_bound(rise - param->v_eps);
|
||||
if((iter != rise_map.end()) && (abs(iter->first - rise) < param->v_eps))
|
||||
if((iter != rise_map.end()) && (abs(iter->first - rise) <= param->v_eps))
|
||||
{
|
||||
return iter->second;
|
||||
}
|
||||
@ -334,7 +334,7 @@ long long HTMLRenderer::install_rise(double rise)
|
||||
long long HTMLRenderer::install_height(double height)
|
||||
{
|
||||
auto iter = height_map.lower_bound(height - EPS);
|
||||
if((iter != height_map.end()) && (abs(iter->first - height) < EPS))
|
||||
if((iter != height_map.end()) && (abs(iter->first - height) <= EPS))
|
||||
{
|
||||
return iter->second;
|
||||
}
|
||||
@ -347,7 +347,7 @@ long long HTMLRenderer::install_height(double height)
|
||||
long long HTMLRenderer::install_left(double left)
|
||||
{
|
||||
auto iter = left_map.lower_bound(left - param->h_eps);
|
||||
if((iter != left_map.end()) && (abs(iter->first - left) < param->h_eps))
|
||||
if((iter != left_map.end()) && (abs(iter->first - left) <= param->h_eps))
|
||||
{
|
||||
return iter->second;
|
||||
}
|
||||
|
@ -93,6 +93,56 @@ void HTMLRenderer::updateStrokeColor(GfxState * state)
|
||||
{
|
||||
stroke_color_changed = true;
|
||||
}
|
||||
void HTMLRenderer::reset_state()
|
||||
{
|
||||
draw_text_scale = 1.0;
|
||||
|
||||
cur_font_info = install_font(nullptr);
|
||||
cur_font_size = draw_font_size = 0;
|
||||
cur_fs_id = install_font_size(cur_font_size);
|
||||
|
||||
memcpy(cur_text_tm, ID_MATRIX, sizeof(cur_text_tm));
|
||||
memcpy(draw_text_tm, ID_MATRIX, sizeof(draw_text_tm));
|
||||
cur_ttm_id = install_transform_matrix(draw_text_tm);
|
||||
|
||||
cur_letter_space = cur_word_space = 0;
|
||||
cur_ls_id = install_letter_space(cur_letter_space);
|
||||
cur_ws_id = install_word_space(cur_word_space);
|
||||
|
||||
cur_fill_color.r = cur_fill_color.g = cur_fill_color.b = 0;
|
||||
cur_stroke_color.r = cur_stroke_color.g = cur_stroke_color.b = 0;
|
||||
cur_fill_color_id = install_fill_color(&cur_fill_color);
|
||||
cur_stroke_color_id = install_stroke_color(&cur_stroke_color);
|
||||
cur_has_stroke = false;
|
||||
cur_has_fill = true;
|
||||
|
||||
cur_rise = 0;
|
||||
cur_rise_id = install_rise(cur_rise);
|
||||
|
||||
cur_tx = cur_ty = 0;
|
||||
draw_tx = draw_ty = 0;
|
||||
|
||||
reset_state_change();
|
||||
all_changed = true;
|
||||
}
|
||||
void HTMLRenderer::reset_state_change()
|
||||
{
|
||||
all_changed = false;
|
||||
|
||||
rise_changed = false;
|
||||
text_pos_changed = false;
|
||||
|
||||
font_changed = false;
|
||||
ctm_changed = false;
|
||||
text_mat_changed = false;
|
||||
hori_scale_changed = false;
|
||||
|
||||
letter_space_changed = false;
|
||||
word_space_changed = false;
|
||||
|
||||
fill_color_changed = false;
|
||||
stroke_color_changed = false;
|
||||
}
|
||||
void HTMLRenderer::check_state_change(GfxState * state)
|
||||
{
|
||||
// DEPENDENCY WARNING
|
||||
@ -383,24 +433,6 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
||||
reset_state_change();
|
||||
}
|
||||
|
||||
void HTMLRenderer::reset_state_change()
|
||||
{
|
||||
all_changed = false;
|
||||
|
||||
rise_changed = false;
|
||||
text_pos_changed = false;
|
||||
|
||||
font_changed = false;
|
||||
ctm_changed = false;
|
||||
text_mat_changed = false;
|
||||
hori_scale_changed = false;
|
||||
|
||||
letter_space_changed = false;
|
||||
word_space_changed = false;
|
||||
|
||||
fill_color_changed = false;
|
||||
stroke_color_changed = false;
|
||||
}
|
||||
void HTMLRenderer::prepare_text_line(GfxState * state)
|
||||
{
|
||||
if(!line_opened)
|
||||
|
@ -533,7 +533,6 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
|
||||
|
||||
double dx = 0;
|
||||
double dy = 0;
|
||||
double dxerr = 0;
|
||||
double dx1,dy1;
|
||||
double ox, oy;
|
||||
|
||||
@ -605,7 +604,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
|
||||
cur_tx += dx;
|
||||
cur_ty += dy;
|
||||
|
||||
draw_tx += dx + dxerr * cur_font_size * hs;
|
||||
draw_tx += dx;
|
||||
draw_ty += dy;
|
||||
}
|
||||
|
||||
|
@ -24,12 +24,14 @@ struct Param
|
||||
int use_cropbox;
|
||||
double h_dpi, v_dpi;
|
||||
|
||||
// output files
|
||||
// output
|
||||
int single_html;
|
||||
int split_pages;
|
||||
std::string dest_dir;
|
||||
std::string css_filename;
|
||||
std::string outline_filename;
|
||||
int process_nontext;
|
||||
int process_outline;
|
||||
|
||||
// fonts
|
||||
int embed_base_font;
|
||||
@ -55,7 +57,6 @@ struct Param
|
||||
|
||||
// misc.
|
||||
int clean_tmp;
|
||||
int process_nontext;
|
||||
std::string data_dir;
|
||||
int css_draw;
|
||||
int debug;
|
||||
|
@ -48,6 +48,7 @@ void show_version_and_exit(const char * dummy = nullptr)
|
||||
cerr << "Libraries: ";
|
||||
cerr << "poppler " << POPPLER_VERSION << ", ";
|
||||
cerr << "libfontforge " << ffw_get_version() << endl;
|
||||
cerr << "Default data-dir: " << PDF2HTMLEX_DATA_PATH << endl;
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
@ -74,6 +75,8 @@ void parse_options (int argc, char **argv)
|
||||
.add("dest-dir", ¶m.dest_dir, ".", "specify destination directory")
|
||||
.add("css-filename", ¶m.css_filename, "", "filename of the generated css file")
|
||||
.add("outline-filename", ¶m.outline_filename, "", "filename of the generated outline file")
|
||||
.add("process-nontext", ¶m.process_nontext, 1, "render graphics in addition to text")
|
||||
.add("process-outline", ¶m.process_outline, 1, "show outline in HTML")
|
||||
|
||||
// fonts
|
||||
.add("embed-base-font", ¶m.embed_base_font, 0, "embed local match for standard 14 fonts")
|
||||
@ -101,7 +104,6 @@ void parse_options (int argc, char **argv)
|
||||
|
||||
// misc.
|
||||
.add("clean-tmp", ¶m.clean_tmp, 1, "remove temporary files after conversion")
|
||||
.add("process-nontext", ¶m.process_nontext, 1, "render graphics in addition to text")
|
||||
.add("data-dir", ¶m.data_dir, PDF2HTMLEX_DATA_PATH, "specify data directory")
|
||||
.add("css-draw", ¶m.css_draw, 0, "[experimental and unsupported] CSS drawing")
|
||||
.add("debug", ¶m.debug, 0, "print debugging information")
|
||||
|
@ -76,7 +76,7 @@ void ArgParser::parse(int argc, char ** argv) const
|
||||
int v = p->shortname;
|
||||
if(!(opt_map.insert(make_pair(v, p)).second))
|
||||
{
|
||||
cerr << "Warning: duplicated shortname '" << v << "' used by -" << (char)(p->shortname) << " and -" << (char)(opt_map[p->shortname]->shortname) << endl;
|
||||
cerr << "Warning: duplicated shortname: " << v << endl;
|
||||
}
|
||||
}
|
||||
|
||||
@ -93,7 +93,7 @@ void ArgParser::parse(int argc, char ** argv) const
|
||||
}
|
||||
if(!(opt_map.insert(make_pair(v, p)).second))
|
||||
{
|
||||
cerr << "Warning: duplicated shortname '" << v << "' used by --" << (p->name) << " and --" << (opt_map[p->shortname]->name) << endl;
|
||||
cerr << "Warning: duplicated long name: " << (p->name) << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -146,6 +146,10 @@ void ArgParser::show_usage(ostream & out) const
|
||||
}
|
||||
}
|
||||
|
||||
template<> const char * ArgParser::get_type_name<int> (void) { return "int"; }
|
||||
template<> const char * ArgParser::get_type_name<double> (void) { return "fp"; }
|
||||
template<> const char * ArgParser::get_type_name<string> (void) { return "string"; }
|
||||
|
||||
ArgParser::ArgEntryBase::ArgEntryBase(const char * name, const char * description, bool need_arg)
|
||||
: shortname(0), name(name), description(description), need_arg(need_arg)
|
||||
{
|
||||
@ -159,7 +163,7 @@ ArgParser::ArgEntryBase::ArgEntryBase(const char * name, const char * descriptio
|
||||
}
|
||||
else
|
||||
{
|
||||
cerr << "Warning: argument '" << this->name << "' may not be parsed correctly" << endl;
|
||||
cerr << "Warning: argument '" << this->name << "' cannnot be parsed as a short option" << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -39,24 +39,6 @@ void dump_value(std::ostream & out, const T & v)
|
||||
|
||||
extern void dump_value(std::ostream & out, const std::string & v);
|
||||
|
||||
// type names helper
|
||||
template<typename>
|
||||
struct type_name {
|
||||
static char const* value() { return "unknown"; }
|
||||
};
|
||||
|
||||
template<> struct type_name<int> {
|
||||
static char const* value() { return "int"; }
|
||||
};
|
||||
|
||||
template<> struct type_name<double> {
|
||||
static char const* value() { return "fp"; }
|
||||
};
|
||||
|
||||
template<> struct type_name<std::string> {
|
||||
static char const* value() { return "string"; }
|
||||
};
|
||||
|
||||
class ArgParser
|
||||
{
|
||||
public:
|
||||
@ -65,12 +47,14 @@ class ArgParser
|
||||
typedef void (*ArgParserCallBack) (const char * arg);
|
||||
|
||||
/*
|
||||
* optname: name of the argment, should be provided as --optname
|
||||
* description: if description is "", the argument won't be shown in show_usage()
|
||||
* The 1st is for arg without arguments (i.e. flags), and the 2nd is for general args.
|
||||
* optname:
|
||||
* - if not nullptr, it should be the name of the arg, should be in the format of "<long name>[,<short char>]", e.g. "help,h"
|
||||
* - if nullptr, it denotes an optional arg, and description will be ignored
|
||||
* description:
|
||||
* - if description is nullptr or "", the argument won't be shown in show_usage()
|
||||
*/
|
||||
|
||||
ArgParser & add(const char * optname, const char * description, ArgParserCallBack callback = nullptr);
|
||||
|
||||
template <class T, class Tv>
|
||||
ArgParser & add(const char * optname, T * location, const Tv & default_value, const char * description, ArgParserCallBack callback = nullptr, bool dont_show_default = false);
|
||||
|
||||
@ -78,9 +62,14 @@ class ArgParser
|
||||
void show_usage(std::ostream & out) const;
|
||||
|
||||
private:
|
||||
// type names helper
|
||||
template<class>
|
||||
static const char * get_type_name(void) { return "unknown"; }
|
||||
|
||||
class ArgEntryBase
|
||||
{
|
||||
public:
|
||||
/* name or description cannot be nullptr */
|
||||
ArgEntryBase(const char * name, const char * description, bool need_arg);
|
||||
virtual ~ArgEntryBase() { }
|
||||
char shortname;
|
||||
@ -118,15 +107,25 @@ class ArgParser
|
||||
template<class T, class Tv>
|
||||
ArgParser & ArgParser::add(const char * optname, T * location, const Tv & default_value, const char * description, ArgParserCallBack callback, bool dont_show_default)
|
||||
{
|
||||
// use "" in case nullptr is provided
|
||||
// ArgEntry does not accept nullptr as optname nor description
|
||||
if((!optname) || (!optname[0]))
|
||||
{
|
||||
// when optname is nullptr or "", it's optional, and description is dropped
|
||||
optional_arg_entries.push_back(new ArgEntry<T, Tv>("", location, default_value, callback, "", dont_show_default));
|
||||
}
|
||||
else
|
||||
arg_entries.push_back(new ArgEntry<T, Tv>(optname, location, default_value, callback, description, dont_show_default));
|
||||
{
|
||||
arg_entries.push_back(new ArgEntry<T, Tv>(optname, location, default_value, callback, (description ? description : ""), dont_show_default));
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Known types
|
||||
template<> const char * ArgParser::get_type_name<int> (void);
|
||||
template<> const char * ArgParser::get_type_name<double> (void);
|
||||
template<> const char * ArgParser::get_type_name<std::string> (void);
|
||||
|
||||
template<class T, class Tv>
|
||||
ArgParser::ArgEntry<T, Tv>::ArgEntry(const char * name, T * location, const Tv & default_value, ArgParserCallBack callback, const char * description, bool dont_show_default)
|
||||
: ArgEntryBase(name, description, (location != nullptr))
|
||||
@ -158,7 +157,7 @@ void ArgParser::ArgEntry<T, Tv>::parse(const char * arg) const
|
||||
template<class T, class Tv>
|
||||
void ArgParser::ArgEntry<T, Tv>::show_usage(std::ostream & out) const
|
||||
{
|
||||
if(description == "")
|
||||
if(description.empty())
|
||||
return;
|
||||
|
||||
std::ostringstream sout;
|
||||
@ -178,7 +177,7 @@ void ArgParser::ArgEntry<T, Tv>::show_usage(std::ostream & out) const
|
||||
|
||||
if(need_arg)
|
||||
{
|
||||
sout << " <" << type_name<T>::value() << ">";
|
||||
sout << " <" << get_type_name<T>() << ">";
|
||||
}
|
||||
|
||||
std::string s = sout.str();
|
||||
|
@ -15,7 +15,7 @@
|
||||
namespace pdf2htmlEX {
|
||||
|
||||
static inline double round(double x) { return (std::abs(x) > EPS) ? x : 0.0; }
|
||||
static inline bool equal(double x, double y) { return std::abs(x-y) < EPS; }
|
||||
static inline bool equal(double x, double y) { return std::abs(x-y) <= EPS; }
|
||||
static inline bool is_positive(double x) { return x > EPS; }
|
||||
static inline bool tm_equal(const double * tm1, const double * tm2, int size = 6)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user