mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-21 12:40:08 +00:00
lots of improvements from josch
This commit is contained in:
parent
d286233b6a
commit
123b37ce22
1
AUTHORS
1
AUTHORS
@ -10,6 +10,7 @@ filodej <philode@gmail.com>
|
||||
hasufell <julian.ospald@googlemail.com>
|
||||
Herbert Jones <herbert@mediafire.com>
|
||||
Hongliang Tian <tatetian@gmail.com>
|
||||
Johannes Schauer <j.schauer@email.de>
|
||||
John Hewson <john@jahewson.com>
|
||||
Marc Sanfacon <marc.sanfacon@gmail.com>
|
||||
Michele Redolfi <michele@tecnicaict.com>
|
||||
|
@ -39,7 +39,7 @@ Issues may be closed due to the following reasons:
|
||||
* Fixed
|
||||
* Duplicate of other issues
|
||||
* Invalid / Won't fix / Off topic
|
||||
* Inactiviy (for unconfirmed issues)
|
||||
* Inactivity (for unconfirmed issues)
|
||||
* Insufficient info (for unconfirmed issues)
|
||||
|
||||
In the last two cases, you can reopen the issue when you can provide more information.
|
||||
|
@ -75,7 +75,7 @@ v0.8
|
||||
* New UI style
|
||||
* New options:
|
||||
--optimize-text : HTML optimization, see above
|
||||
--fallback : the most accurate way, but costy (larger file sizes)
|
||||
--fallback : the most accurate way, but costly (larger file sizes)
|
||||
--printing : enable or disable CSS for printing
|
||||
--page-file: specify page filenames when split-pages is on
|
||||
* Deprecated options:
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/bin/sh
|
||||
#!/bin/sh -ex
|
||||
|
||||
# convert raw SVG into png of different sizes
|
||||
convert -background none -resize 64x64^ pdf2htmlEX.svg pdf2htmlEX-64x64.png
|
||||
|
148
pdf2htmlEX.1.in
148
pdf2htmlEX.1.in
@ -26,174 +26,174 @@ Other objects are rendered as images and also embedded.
|
||||
.SS Pages
|
||||
|
||||
.TP
|
||||
.B -f, --first-page <num> (Default: 1)
|
||||
.B \-f, \-\-first\-page <num> (Default: 1)
|
||||
Specify the first page to process
|
||||
|
||||
.TP
|
||||
.B -l, --last-page <num> (Default: last page)
|
||||
.B \-l, \-\-last\-page <num> (Default: last page)
|
||||
Specify the last page to process
|
||||
|
||||
.SS Dimensions
|
||||
|
||||
.TP
|
||||
.B --zoom <ratio>, --fit-width <width>, --fit-height <height>
|
||||
--zoom specifies the zoom factor directly; --fit-width/height specifies the maximum width/height of a page, the values are in pixels.
|
||||
.B \-\-zoom <ratio>, \-\-fit\-width <width>, \-\-fit\-height <height>
|
||||
\-\-zoom specifies the zoom factor directly; \-\-fit\-width/height specifies the maximum width/height of a page, the values are in pixels.
|
||||
|
||||
If multiple values are specified, the minimum one will be used.
|
||||
|
||||
If none is specified, pages will be rendered as 72DPI.
|
||||
|
||||
.TP
|
||||
.B --use-cropbox <0|1> (Default: 1)
|
||||
.B \-\-use\-cropbox <0|1> (Default: 1)
|
||||
Use CropBox instead of MediaBox for output.
|
||||
|
||||
.TP
|
||||
.B --hdpi <dpi>, --vdpi <dpi> (Default: 144)
|
||||
.B \-\-hdpi <dpi>, \-\-vdpi <dpi> (Default: 144)
|
||||
Specify the horizontal and vertical DPI for images
|
||||
|
||||
|
||||
.SS Output
|
||||
|
||||
.B --embed <string>
|
||||
.B \-\-embed <string>
|
||||
.br
|
||||
.B --embed-css <0|1> (Default: 1)
|
||||
.B \-\-embed\-css <0|1> (Default: 1)
|
||||
.br
|
||||
.B --embed-font <0|1> (Default: 1)
|
||||
.B \-\-embed\-font <0|1> (Default: 1)
|
||||
.br
|
||||
.B --embed-image <0|1> (Default: 1)
|
||||
.B \-\-embed\-image <0|1> (Default: 1)
|
||||
.br
|
||||
.B --embed-javascript <0|1> (Default: 1)
|
||||
.B \-\-embed\-javascript <0|1> (Default: 1)
|
||||
.br
|
||||
.B --embed-outline <0|1> (Default: 1)
|
||||
.B \-\-embed\-outline <0|1> (Default: 1)
|
||||
.RS
|
||||
Specify which elements should be embedded into the output HTML file.
|
||||
|
||||
If switched off, separated files will be generated along with the HTML file for the corresponding elements.
|
||||
|
||||
--embed accepts a string as argument. Each letter of the string must be one of `cCfFiIjJoO`, which corresponds
|
||||
to one of the --embed-*** switches. Lower case letters for 0 and upper case letters for 1. For example,
|
||||
`--embed cFIJo` means to embed everything but CSS files and outlines.
|
||||
\-\-embed accepts a string as argument. Each letter of the string must be one of `cCfFiIjJoO`, which corresponds
|
||||
to one of the \-\-embed\-*** switches. Lower case letters for 0 and upper case letters for 1. For example,
|
||||
`\-\-embed cFIJo` means to embed everything but CSS files and outlines.
|
||||
.RE
|
||||
.TP
|
||||
.B --split-pages <0|1> (Default: 0)
|
||||
.B \-\-split\-pages <0|1> (Default: 0)
|
||||
If turned on, the content of each page is stored in a separated file.
|
||||
|
||||
This switch is useful if you want pages to be loaded separately & dynamically -- a supporting server might be necessary.
|
||||
This switch is useful if you want pages to be loaded separately & dynamically \-\- a supporting server might be necessary.
|
||||
|
||||
Also see --page-filename.
|
||||
Also see \-\-page\-filename.
|
||||
|
||||
.TP
|
||||
.B --dest-dir <dir> (Default: .)
|
||||
.B \-\-dest\-dir <dir> (Default: .)
|
||||
Specify destination folder.
|
||||
|
||||
.TP
|
||||
.B --css-filename <filename> (Default: <none>)
|
||||
.B \-\-css\-filename <filename> (Default: <none>)
|
||||
Specify the filename of the generated css file, if not embedded.
|
||||
|
||||
If it's empty, the file name will be determined automatically.
|
||||
|
||||
.TP
|
||||
.B --page-filename <filename> (Default: <none>)
|
||||
Specify the filename template for pages when --split-pages is 1
|
||||
.B \-\-page\-filename <filename> (Default: <none>)
|
||||
Specify the filename template for pages when \-\-split\-pages is 1
|
||||
|
||||
A %d placeholder may be included in `filename` to indicate where the page number should be placed. The placeholder supports a limited subset of normal numerical placeholders, including specified width and zero padding.
|
||||
|
||||
If `filename` does not contain a placeholder for the page number, the page number will be inserted directly before the file extension. If the filename does not have an extension, the page number will be placed at the end of the file name.
|
||||
|
||||
If --page-filename is not specified, <input-filename> will be used for the output filename, replacing the extension with .page and adding the page number directly before the extension.
|
||||
If \-\-page\-filename is not specified, <input\-filename> will be used for the output filename, replacing the extension with .page and adding the page number directly before the extension.
|
||||
|
||||
.B Examples
|
||||
|
||||
.B pdf2htmlEX --split-pages 1 foo.pdf
|
||||
.B pdf2htmlEX \-\-split\-pages 1 foo.pdf
|
||||
|
||||
Yields page files foo1.page, foo2.page, etc.
|
||||
|
||||
.B pdf2htmlEX --split-pages 1 foo.pdf --page-filename bar.baz
|
||||
.B pdf2htmlEX \-\-split\-pages 1 foo.pdf \-\-page\-filename bar.baz
|
||||
|
||||
Yields page files bar1.baz, bar2.baz, etc.
|
||||
|
||||
.B pdf2htmlEX --split-pages 1 foo.pdf --page-filename page%dbar.baz
|
||||
.B pdf2htmlEX \-\-split\-pages 1 foo.pdf \-\-page\-filename page%dbar.baz
|
||||
|
||||
Yields page files page1bar.baz, page2bar.baz, etc.
|
||||
|
||||
.B pdf2htmlEX --split-pages 1 foo.pdf --page-filename bar%03d.baz
|
||||
.B pdf2htmlEX \-\-split\-pages 1 foo.pdf \-\-page\-filename bar%03d.baz
|
||||
|
||||
Yields page files bar001.baz, bar002.baz, etc.
|
||||
|
||||
.TP
|
||||
.B --outline-filename <filename> (Default: <none>)
|
||||
.B \-\-outline\-filename <filename> (Default: <none>)
|
||||
Specify the filename of the generated outline file, if not embedded.
|
||||
|
||||
If it's empty, the file name will be determined automatically.
|
||||
|
||||
.TP
|
||||
.B --process-nontext <0|1> (Default: 1)
|
||||
Whether to process non-text objects (as images)
|
||||
.B \-\-process\-nontext <0|1> (Default: 1)
|
||||
Whether to process non\-text objects (as images)
|
||||
|
||||
.TP
|
||||
.B --process-outline <0|1> (Default: 1)
|
||||
.B \-\-process\-outline <0|1> (Default: 1)
|
||||
Whether to show outline in the generated HTML
|
||||
|
||||
.TP
|
||||
.B --printing <0|1> (Default: 1)
|
||||
.B \-\-printing <0|1> (Default: 1)
|
||||
Enable printing support. Disabling this option may reduce the size of CSS.
|
||||
|
||||
.TP
|
||||
.B --fallback <0|1> (Deafult: 0)
|
||||
.B \-\-fallback <0|1> (Default: 0)
|
||||
Output in fallback mode, for better accuracy and browser compatibility, but the size becomes larger.
|
||||
|
||||
.TP
|
||||
.B --tmp-file-size-limit <limit> (Default: -1)
|
||||
.B \-\-tmp\-file\-size\-limit <limit> (Default: \-1)
|
||||
This limits the total size (in KB) of the temporary files which will also limit the total size of the output file.
|
||||
This is an estimate and it will stop after a page, once the total temporary files size is greater than this number.
|
||||
|
||||
-1 means no limit and is the default.
|
||||
\-1 means no limit and is the default.
|
||||
|
||||
|
||||
.SS Fonts
|
||||
|
||||
.TP
|
||||
.B --embed-external-font <0|1> (Default: 1)
|
||||
.B \-\-embed\-external\-font <0|1> (Default: 1)
|
||||
Specify whether the local matched fonts, for fonts not embedded in PDF, should be embedded into HTML.
|
||||
|
||||
If this switch is off, only font names are exported such that web browsers may try to find proper fonts themselves, and that might cause issues about incorrect font metrics.
|
||||
|
||||
.TP
|
||||
.B --font-format <format> (Default: woff)
|
||||
.B \-\-font\-format <format> (Default: woff)
|
||||
Specify the format of fonts extracted from the PDF file.
|
||||
|
||||
.TP
|
||||
.B --decompose-ligature <0|1> (Default: 0)
|
||||
Decompose ligatures. For example 'fi' -> 'f''i'.
|
||||
.B \-\-decompose\-ligature <0|1> (Default: 0)
|
||||
Decompose ligatures. For example 'fi' \-> 'f''i'.
|
||||
|
||||
.TP
|
||||
.B --auto-hint <0|1> (Default: 0)
|
||||
.B \-\-auto\-hint <0|1> (Default: 0)
|
||||
If set to 1, hints will be generated for the fonts using fontforge.
|
||||
|
||||
This may be preceded by --external-hint-tool.
|
||||
This may be preceded by \-\-external\-hint\-tool.
|
||||
|
||||
.TP
|
||||
.B --external-hint-tool <tool> (Default: <none>)
|
||||
If specified, the tool will be called in order to enhanced hinting for fonts, this will precede --auto-hint.
|
||||
.B \-\-external\-hint\-tool <tool> (Default: <none>)
|
||||
If specified, the tool will be called in order to enhanced hinting for fonts, this will precede \-\-auto\-hint.
|
||||
|
||||
The tool will be called as '<tool> <in.suffix> <out.suffix>', where suffix will be the same as specified for --font-format.
|
||||
The tool will be called as '<tool> <in.suffix> <out.suffix>', where suffix will be the same as specified for \-\-font\-format.
|
||||
|
||||
.TP
|
||||
.B --stretch-narrow-glyph <0|1> (Default: 0)
|
||||
.B \-\-stretch\-narrow\-glyph <0|1> (Default: 0)
|
||||
If set to 1, glyphs narrower than described in PDF will be stretched; otherwise space will be padded to the right of the glyphs
|
||||
|
||||
.TP
|
||||
.B --squeeze-wide-glyph <0|1> (Default: 1)
|
||||
.B \-\-squeeze\-wide\-glyph <0|1> (Default: 1)
|
||||
If set to 1, glyphs wider than described in PDF will be squeezed; otherwise it will be truncated.
|
||||
|
||||
.TP
|
||||
.B --override-fstype <0|1> (Default: 0)
|
||||
.B \-\-override\-fstype <0|1> (Default: 0)
|
||||
Clear the fstype bits in TTF/OTF fonts.
|
||||
|
||||
Turn this on if Internet Explorer complains about 'Permission must be Installable' AND you have permission to do so.
|
||||
|
||||
.TP
|
||||
.B --process-type3 <0|1> (Default: 0)
|
||||
.B \-\-process\-type3 <0|1> (Default: 0)
|
||||
If turned on, pdf2htmlEX will try to convert Type 3 fonts such that text can be rendered natively in HTML.
|
||||
Otherwise all text with Type 3 fonts will be rendered as image.
|
||||
|
||||
@ -202,17 +202,17 @@ This feature is highly experimental.
|
||||
.SS Text
|
||||
|
||||
.TP
|
||||
.B --heps <len>, --veps <len> (Default: 1)
|
||||
.B \-\-heps <len>, \-\-veps <len> (Default: 1)
|
||||
Specify the maximum tolerable horizontal/vertical offset (in pixels).
|
||||
|
||||
pdf2htmlEX would try to optimize the generated HTML file moving Text within this distance.
|
||||
|
||||
.TP
|
||||
.B --space-threshold <ratio> (Default: 0.125)
|
||||
.B \-\-space\-threshold <ratio> (Default: 0.125)
|
||||
pdf2htmlEX would insert a whitespace character ' ' if the distance between two consecutive letters in the same line is wider than ratio * font_size.
|
||||
|
||||
.TP
|
||||
.B --font-size-multiplier <ratio> (Default: 4.0)
|
||||
.B \-\-font\-size\-multiplier <ratio> (Default: 4.0)
|
||||
Many web browsers limit the minimum font size, and many would round the given font size, which results in incorrect rendering.
|
||||
|
||||
Specify a ratio greater than 1 would resolve this issue, however it might freeze some browsers.
|
||||
@ -220,42 +220,42 @@ Specify a ratio greater than 1 would resolve this issue, however it might freeze
|
||||
For some versions of Firefox, however, there will be a problem when the font size is too large, in which case a smaller value should be specified here.
|
||||
|
||||
.TP
|
||||
.B --space-as-offset <0|1> (Default: 0)
|
||||
.B \-\-space\-as\-offset <0|1> (Default: 0)
|
||||
If set to 1, space characters will be treated as offsets, which allows a better optimization.
|
||||
|
||||
For PDF files with bad encodings, turning on this option may cause losing characters.
|
||||
|
||||
.TP
|
||||
.B --tounicode <-1|0|1> (Default: 0)
|
||||
.B \-\-tounicode <\-1|0|1> (Default: 0)
|
||||
A ToUnicode map may be provided for each font in PDF which indicates the 'meaning' of the characters. However often there is better "ToUnicode" info in Type 0/1 fonts, and sometimes the ToUnicode map provided is wrong.
|
||||
If this value is set to 1, the ToUnicode Map is always applied, if provided in PDF, and characters may not render correctly in HTML if there are collisions.
|
||||
|
||||
If set to -1, a customized map is used such that rendering will be correct in HTML (visually the same), but you may not get correct characters by select & copy & paste.
|
||||
If set to \-1, a customized map is used such that rendering will be correct in HTML (visually the same), but you may not get correct characters by select & copy & paste.
|
||||
|
||||
If set to 0, pdf2htmlEX would try its best to balance the two methods above.
|
||||
|
||||
.TP
|
||||
.B --optimize-text <0|1> (Deafult: 0)
|
||||
.B \-\-optimize\-text <0|1> (Default: 0)
|
||||
If set to 1, pdf2htmlEX will try to reduce the number of HTML elements used for text. Turn it off if anything goes wrong.
|
||||
|
||||
.SS Background Image
|
||||
|
||||
.TP
|
||||
.B --bg-format <format> (Default: png)
|
||||
Specify the background image format. Run `pdf2htmlEX -v` to check all supported formats.
|
||||
.B \-\-bg\-format <format> (Default: png)
|
||||
Specify the background image format. Run `pdf2htmlEX \-v` to check all supported formats.
|
||||
|
||||
.SS PDF Protection
|
||||
|
||||
.TP
|
||||
.B -o, --owner-password <password>
|
||||
.B \-o, \-\-owner\-password <password>
|
||||
Specify owner password
|
||||
|
||||
.TP
|
||||
.B -u, --user-password <password>
|
||||
.B \-u, \-\-user\-password <password>
|
||||
Specify user password
|
||||
|
||||
.TP
|
||||
.B --no-drm <0|1> (Default: 0)
|
||||
.B \-\-no\-drm <0|1> (Default: 0)
|
||||
Override document DRM settings
|
||||
|
||||
Turn this on only when you have permission.
|
||||
@ -263,53 +263,53 @@ Turn this on only when you have permission.
|
||||
.SS Misc.
|
||||
|
||||
.TP
|
||||
.B --clean-tmp <0|1> (Default: 1)
|
||||
.B \-\-clean\-tmp <0|1> (Default: 1)
|
||||
If switched off, intermediate files won't be cleaned in the end.
|
||||
|
||||
.TP
|
||||
.B --data-dir <dir> (Default: @CMAKE_INSTALL_PREFIX@/share/pdf2htmlEX)
|
||||
.B \-\-data\-dir <dir> (Default: @CMAKE_INSTALL_PREFIX@/share/pdf2htmlEX)
|
||||
Specify the folder holding the manifest and other files (see below for the manifest file)`
|
||||
|
||||
.TP
|
||||
.B --tmp-dir <dir> (Default: /tmp)
|
||||
.B \-\-tmp\-dir <dir> (Default: /tmp or $TMPDIR if set)
|
||||
Specify the temporary folder to use for temporary files
|
||||
|
||||
.TP
|
||||
.B --css-draw <0|1> (Default: 0)
|
||||
.B \-\-css\-draw <0|1> (Default: 0)
|
||||
Experimental and unsupported CSS drawing
|
||||
|
||||
.TP
|
||||
.B --debug <0|1> (Default: 0)
|
||||
.B \-\-debug <0|1> (Default: 0)
|
||||
Print debug information.
|
||||
|
||||
.SS Meta
|
||||
|
||||
.TP
|
||||
.B -v, --version
|
||||
.B \-v, \-\-version
|
||||
Print copyright and version info
|
||||
|
||||
.TP
|
||||
.B --help
|
||||
.B \-\-help
|
||||
Print usage information
|
||||
|
||||
.SH MANIFEST and DATA-DIR
|
||||
When split-pages is 0, the manifest file describes how the final html page should be generated.
|
||||
.SH MANIFEST and DATA\-DIR
|
||||
When split\-pages is 0, the manifest file describes how the final html page should be generated.
|
||||
|
||||
By default, pdf2htmlEX will use the manifest in the default data-dir (run `pdf2htmlEX -v` to check), which gives a simple demo of its syntax.
|
||||
By default, pdf2htmlEX will use the manifest in the default data\-dir (run `pdf2htmlEX \-v` to check), which gives a simple demo of its syntax.
|
||||
|
||||
You can modify the default one, or you can create a new one and specify the correct data-dir in the command line.
|
||||
You can modify the default one, or you can create a new one and specify the correct data\-dir in the command line.
|
||||
|
||||
All files referred by the manifest must be located in the data-dir.
|
||||
All files referred by the manifest must be located in the data\-dir.
|
||||
|
||||
.SH EXAMPLE
|
||||
.TP
|
||||
.B pdf2htmlEX /path/to/file.pdf
|
||||
Convert file.pdf into file.html
|
||||
.TP
|
||||
.B pdf2htmlEX --clean-tmp 0 --debug 1 /path/to/file.pdf
|
||||
.B pdf2htmlEX \-\-clean\-tmp 0 \-\-debug 1 /path/to/file.pdf
|
||||
Convert file.pdf and leave all intermediate files.
|
||||
.TP
|
||||
.B pdf2htmlEX --dest-dir out --embed fi /path/to/file.pdf
|
||||
.B pdf2htmlEX \-\-dest\-dir out \-\-embed fi /path/to/file.pdf
|
||||
Convert file.pdf into out/file.html and leave font/image files separated.
|
||||
|
||||
.SH COPYRIGHT
|
||||
|
@ -16,7 +16,7 @@
|
||||
overflow:auto;
|
||||
}
|
||||
#page-container { /* PDF container */
|
||||
position:absolute; /* required for calulating relative positions of pages in pdf2htmlEX.js */
|
||||
position:absolute; /* required for calculating relative positions of pages in pdf2htmlEX.js */
|
||||
top:0;
|
||||
left:0px;
|
||||
margin:0;
|
||||
@ -154,7 +154,7 @@
|
||||
transform-origin:0% 100%;
|
||||
-ms-transform-origin:0% 100%;
|
||||
-webkit-transform-origin:0% 100%;
|
||||
unicode-bidi:bidi-override;/* For rtl lanauges, e.g. Hebrew, we don't want the default Unicode behaviour */
|
||||
unicode-bidi:bidi-override;/* For rtl languages, e.g. Hebrew, we don't want the default Unicode behaviour */
|
||||
-moz-font-feature-settings:"liga" 0;/* We don't want Firefox to recognize ligatures */
|
||||
}
|
||||
.@CSS_LINK_CN span { /* text blocks within a line */
|
||||
@ -162,7 +162,7 @@
|
||||
vertical-align: baseline;
|
||||
/* _<id> for spaces may need display:inline, which will override this */
|
||||
display:inline-block;
|
||||
unicode-bidi:bidi-override; /* For rtl lanauges, e.g. Hebrew, we don't want the default Unicode behaviour */
|
||||
unicode-bidi:bidi-override; /* For rtl languages, e.g. Hebrew, we don't want the default Unicode behaviour */
|
||||
}
|
||||
.@CSS_WHITESPACE_CN@ { /* text shift */
|
||||
color:transparent;
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/bin/sh
|
||||
#!/bin/sh -ex
|
||||
# Compile and optimize CSS code
|
||||
# Copyright 2013 Lu Wang <coolwanglu@gmail.com>
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
#!/bin/sh
|
||||
#!/bin/sh -ex
|
||||
# Compile and optimize JS code
|
||||
# Copyright 2013 Lu Wang <coolwanglu@gmail.com>
|
||||
|
||||
|
@ -166,7 +166,7 @@ ArgParser::ArgEntryBase::ArgEntryBase(const char * name, const char * descriptio
|
||||
}
|
||||
else
|
||||
{
|
||||
cerr << "Warning: argument '" << this->name << "' cannnot be parsed as a short option" << endl;
|
||||
cerr << "Warning: argument '" << this->name << "' cannot be parsed as a short option" << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -188,7 +188,7 @@ protected:
|
||||
void reset_state();
|
||||
// reset all ***_changed flags
|
||||
void reset_state_change();
|
||||
// check updated states, and determine new_line_stauts
|
||||
// check updated states, and determine new_line_status
|
||||
// make sure this function can be called several times consecutively without problem
|
||||
void check_state_change(GfxState * state);
|
||||
// prepare the line context, (close old tags, open new tags)
|
||||
|
@ -168,7 +168,7 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, FontInfo & info)
|
||||
}
|
||||
catch(int)
|
||||
{
|
||||
cerr << "Someting wrong when trying to dump font " << hex << fn_id << dec << endl;
|
||||
cerr << "Something wrong when trying to dump font " << hex << fn_id << dec << endl;
|
||||
}
|
||||
|
||||
obj2.free();
|
||||
@ -240,7 +240,7 @@ string HTMLRenderer::dump_type3_font (GfxFont * font, FontInfo & info)
|
||||
cairo_surface_set_fallback_resolution(surface, param.h_dpi, param.v_dpi);
|
||||
cairo_t * cr = cairo_create(surface);
|
||||
|
||||
// track the positio of the origin
|
||||
// track the position of the origin
|
||||
double ox, oy;
|
||||
ox = oy = 0.0;
|
||||
|
||||
@ -792,7 +792,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
|
||||
|
||||
/*
|
||||
* Step 5
|
||||
* Generate the font, load the metrics and set the embeddig bits (fstype)
|
||||
* Generate the font, load the metrics and set the embedding bits (fstype)
|
||||
*
|
||||
* Ascent/Descent are not used in PDF, and the values in PDF may be wrong or inconsistent (there are 3 sets of them)
|
||||
* We need to reload in order to retrieve/fix accurate ascent/descent, some info won't be written to the font by fontforge until saved.
|
||||
|
@ -67,7 +67,7 @@ HTMLRenderer::HTMLRenderer(const Param & param)
|
||||
all_manager.whitespace .set_eps(param.h_eps);
|
||||
all_manager.left .set_eps(param.h_eps);
|
||||
/*
|
||||
* For othere states, we need accurate values
|
||||
* For other states, we need accurate values
|
||||
* optimization will be done separately
|
||||
*/
|
||||
all_manager.font_size .set_eps(EPS);
|
||||
|
@ -264,7 +264,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
||||
* Rescale the font
|
||||
* If the font-size is 1, and the matrix is [10,0,0,10,0,0], we would like to change it to
|
||||
* font-size == 10 and matrix == [1,0,0,1,0,0],
|
||||
* such that it will be easy and natrual for web browsers
|
||||
* such that it will be easy and natural for web browsers
|
||||
*/
|
||||
double new_draw_text_tm[6];
|
||||
memcpy(new_draw_text_tm, cur_text_tm, sizeof(new_draw_text_tm));
|
||||
@ -357,7 +357,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
||||
dy = inverted[1] * lhs1 + inverted[3] * lhs2;
|
||||
if(equal(dy, 0))
|
||||
{
|
||||
// text on a same horizontal line, we can insert positive or negaive x-offsets
|
||||
// text on a same horizontal line, we can insert positive or negative x-offsets
|
||||
merged = true;
|
||||
}
|
||||
else if(param.optimize_text)
|
||||
|
@ -291,7 +291,7 @@ void HTMLTextLine::optimize(std::vector<HTMLTextLine*> & lines)
|
||||
*/
|
||||
void HTMLTextLine::optimize_normal(std::vector<HTMLTextLine*> & lines)
|
||||
{
|
||||
// remove unuseful states in the end
|
||||
// remove useless states in the end
|
||||
while((!states.empty()) && (states.back().start_idx >= text.size()))
|
||||
states.pop_back();
|
||||
|
||||
@ -416,9 +416,9 @@ void HTMLTextLine::optimize_normal(std::vector<HTMLTextLine*> & lines)
|
||||
|
||||
// Optimize word space
|
||||
|
||||
// In some PDF files all spaces are converted into positionig shift
|
||||
// In some PDF files all spaces are converted into positioning shift
|
||||
// We may try to change (some of) them to ' ' by adjusting word_space
|
||||
// for now, we cosider only the no-space scenario
|
||||
// for now, we consider only the no-space scenario
|
||||
// which also includes the case when param.space_as_offset is set
|
||||
|
||||
// get the text segment covered by current state (*state_iter1)
|
||||
@ -554,7 +554,7 @@ void HTMLTextLine::State::begin (ostream & out, const State * prev_state)
|
||||
else
|
||||
out << ids[i];
|
||||
}
|
||||
// veritcal align
|
||||
// vertical align
|
||||
if(!equal(vertical_align, 0))
|
||||
{
|
||||
// so we have to dump it
|
||||
|
@ -57,7 +57,7 @@ public:
|
||||
long long ids[ID_COUNT];
|
||||
|
||||
size_t start_idx; // index of the first Text using this state
|
||||
// for optimzation
|
||||
// for optimization
|
||||
long long hash_value;
|
||||
long long hash_umask; // some states may not be actually used
|
||||
bool need_close;
|
||||
|
@ -84,7 +84,7 @@ protected:
|
||||
|
||||
// Be careful about the mixed usage of Matrix and const double *
|
||||
// the input is usually double *, which might be changed, so we have to copy the content out
|
||||
// in the map we use Matrix instead of double * such that the array may be automatically release when deconstructign
|
||||
// in the map we use Matrix instead of double * such that the array may be automatically release when deconstructing
|
||||
template <class Imp>
|
||||
class StateManager<Matrix, Imp>
|
||||
{
|
||||
@ -302,7 +302,7 @@ public:
|
||||
void dump_value(std::ostream & out, const Matrix & matrix) {
|
||||
// always ignore tm[4] and tm[5] because
|
||||
// we have already shifted the origin
|
||||
// TODO: recognize common matices
|
||||
// TODO: recognize common matrices
|
||||
const auto & m = matrix.m;
|
||||
auto prefixes = {"", "-ms-", "-webkit-"};
|
||||
if(tm_equal(m, ID_MATRIX, 4))
|
||||
|
@ -26,7 +26,7 @@ set(CSS_STROKE_COLOR_CN "sc") # Stroke Color
|
||||
|
||||
set(CSS_LETTER_SPACE_CN "ls") # Letter Space
|
||||
set(CSS_WORD_SPACE_CN "ws") # Word Space
|
||||
set(CSS_VERTICAL_ALIGN_CN "v") # Vertial align
|
||||
set(CSS_VERTICAL_ALIGN_CN "v") # Vertical align
|
||||
set(CSS_WHITESPACE_CN "_") # whitespace
|
||||
set(CSS_LEFT_CN "x") # X
|
||||
set(CSS_HEIGHT_CN "h") # Height
|
||||
|
@ -56,7 +56,7 @@ void show_usage_and_exit(const char * dummy = nullptr)
|
||||
void show_version_and_exit(const char * dummy = nullptr)
|
||||
{
|
||||
cerr << "pdf2htmlEX version " << PDF2HTMLEX_VERSION << endl;
|
||||
cerr << "Copyright 2012-2014 Lu Wang <coolwanglu@gmail.com> and other contributers" << endl;
|
||||
cerr << "Copyright 2012-2014 Lu Wang <coolwanglu@gmail.com> and other contributors" << endl;
|
||||
cerr << "Libraries: " << endl;
|
||||
cerr << " poppler " << POPPLER_VERSION << endl;
|
||||
cerr << " libfontforge " << ffw_get_version() << endl;
|
||||
@ -159,7 +159,7 @@ void parse_options (int argc, char **argv)
|
||||
.add("split-pages", ¶m.split_pages, 0, "split pages into separate files")
|
||||
.add("dest-dir", ¶m.dest_dir, ".", "specify destination directory")
|
||||
.add("css-filename", ¶m.css_filename, "", "filename of the generated css file")
|
||||
.add("page-filename", ¶m.page_filename, "", "filename template for splitted pages ")
|
||||
.add("page-filename", ¶m.page_filename, "", "filename template for split pages ")
|
||||
.add("outline-filename", ¶m.outline_filename, "", "filename of the generated outline file")
|
||||
.add("process-nontext", ¶m.process_nontext, 1, "render graphics in addition to text")
|
||||
.add("process-outline", ¶m.process_outline, 1, "show outline in HTML")
|
||||
@ -197,7 +197,7 @@ void parse_options (int argc, char **argv)
|
||||
|
||||
// misc.
|
||||
.add("clean-tmp", ¶m.clean_tmp, 1, "remove temporary files after conversion")
|
||||
.add("tmp-dir", ¶m.tmp_dir, param.tmp_dir, "specify the location of tempory directory.")
|
||||
.add("tmp-dir", ¶m.tmp_dir, param.tmp_dir, "specify the location of temporary directory.")
|
||||
.add("data-dir", ¶m.data_dir, param.data_dir, "specify data directory")
|
||||
// TODO: css drawings are hidden on print, for annot links, need to fix it for other drawings
|
||||
// .add("css-draw", ¶m.css_draw, 0, "[experimental and unsupported] CSS drawing")
|
||||
@ -351,7 +351,18 @@ int main(int argc, char **argv)
|
||||
param.data_dir = get_exec_dir(argv[0]);
|
||||
param.tmp_dir = get_tmp_dir();
|
||||
#else
|
||||
param.tmp_dir = "/tmp";
|
||||
char const* tmp = getenv("TMPDIR");
|
||||
#ifdef P_tmpdir
|
||||
if (!tmp)
|
||||
tmp = P_tmpdir;
|
||||
#endif
|
||||
#ifdef _PATH_TMP
|
||||
if (!tmp)
|
||||
tmp = _PATH_TMP;
|
||||
#endif
|
||||
if (!tmp)
|
||||
tmp = "/tmp";
|
||||
param.tmp_dir = string(tmp);
|
||||
param.data_dir = PDF2HTMLEX_DATA_PATH;
|
||||
#endif
|
||||
|
||||
|
@ -49,7 +49,7 @@ void ffw_add_empty_char(int32_t unicode, int width);
|
||||
// metrics
|
||||
int ffw_get_em_size(void);
|
||||
// manipulate ascent and descent
|
||||
// asscent is between 0 and 1
|
||||
// ascent is between 0 and 1
|
||||
// descent is between -1 and 0
|
||||
void ffw_fix_metric();
|
||||
// get ascent/descent based on the shape
|
||||
|
@ -37,7 +37,7 @@ bool isLegalUnicode(Unicode u)
|
||||
/*
|
||||
* 9, 10 and 13 are interpreted as white-spaces in HTML
|
||||
* `word-spacing` may be applied on them
|
||||
* and the browser may not use the actualy glyphs in the font
|
||||
* and the browser may not use the actual glyphs in the font
|
||||
* So mark them as illegal
|
||||
*
|
||||
* The problem is that the correct value can not be copied out in this way
|
||||
|
@ -259,5 +259,4 @@ if __name__=="__main__":
|
||||
print >> sys.stderr, "Cannot locate pdf2htmlEX executable. Make sure source was built before running this test."
|
||||
exit(1)
|
||||
|
||||
suite = unittest.loader.TestLoader().loadTestsFromTestCase(OutputNamingTests)
|
||||
unittest.TextTestRunner(verbosity=2).run(suite)
|
||||
unittest.main()
|
||||
|
Loading…
Reference in New Issue
Block a user