1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-21 12:40:08 +00:00

merge fix

This commit is contained in:
Lu Wang 2014-07-13 17:02:15 -07:00
commit 2081f9eb88
20 changed files with 121 additions and 110 deletions

View File

@ -11,6 +11,7 @@ filodej <philode@gmail.com>
hasufell <julian.ospald@googlemail.com> hasufell <julian.ospald@googlemail.com>
Herbert Jones <herbert@mediafire.com> Herbert Jones <herbert@mediafire.com>
Hongliang Tian <tatetian@gmail.com> Hongliang Tian <tatetian@gmail.com>
Johannes Schauer <j.schauer@email.de>
John Hewson <john@jahewson.com> John Hewson <john@jahewson.com>
Marc Sanfacon <marc.sanfacon@gmail.com> Marc Sanfacon <marc.sanfacon@gmail.com>
Michele Redolfi <michele@tecnicaict.com> Michele Redolfi <michele@tecnicaict.com>

View File

@ -80,7 +80,7 @@ v0.8
* New UI style * New UI style
* New options: * New options:
--optimize-text : HTML optimization, see above --optimize-text : HTML optimization, see above
--fallback : the most accurate way, but costy (larger file sizes) --fallback : the most accurate way, but costly (larger file sizes)
--printing : enable or disable CSS for printing --printing : enable or disable CSS for printing
--page-file: specify page filenames when split-pages is on --page-file: specify page filenames when split-pages is on
* Deprecated options: * Deprecated options:

View File

@ -1,4 +1,4 @@
#!/bin/sh #!/bin/sh -ex
# convert raw SVG into png of different sizes # convert raw SVG into png of different sizes
convert -background none -resize 64x64^ pdf2htmlEX.svg pdf2htmlEX-64x64.png convert -background none -resize 64x64^ pdf2htmlEX.svg pdf2htmlEX-64x64.png

View File

@ -26,178 +26,178 @@ Other objects are rendered as images and also embedded.
.SS Pages .SS Pages
.TP .TP
.B -f, --first-page <num> (Default: 1) .B \-f, \-\-first\-page <num> (Default: 1)
Specify the first page to process Specify the first page to process
.TP .TP
.B -l, --last-page <num> (Default: last page) .B \-l, \-\-last\-page <num> (Default: last page)
Specify the last page to process Specify the last page to process
.SS Dimensions .SS Dimensions
.TP .TP
.B --zoom <ratio>, --fit-width <width>, --fit-height <height> .B \-\-zoom <ratio>, \-\-fit\-width <width>, \-\-fit\-height <height>
--zoom specifies the zoom factor directly; --fit-width/height specifies the maximum width/height of a page, the values are in pixels. \-\-zoom specifies the zoom factor directly; \-\-fit\-width/height specifies the maximum width/height of a page, the values are in pixels.
If multiple values are specified, the minimum one will be used. If multiple values are specified, the minimum one will be used.
If none is specified, pages will be rendered as 72DPI. If none is specified, pages will be rendered as 72DPI.
.TP .TP
.B --use-cropbox <0|1> (Default: 1) .B \-\-use\-cropbox <0|1> (Default: 1)
Use CropBox instead of MediaBox for output. Use CropBox instead of MediaBox for output.
.TP .TP
.B --hdpi <dpi>, --vdpi <dpi> (Default: 144) .B \-\-hdpi <dpi>, \-\-vdpi <dpi> (Default: 144)
Specify the horizontal and vertical DPI for images Specify the horizontal and vertical DPI for images
.SS Output .SS Output
.B --embed <string> .B \-\-embed <string>
.br .br
.B --embed-css <0|1> (Default: 1) .B \-\-embed\-css <0|1> (Default: 1)
.br .br
.B --embed-font <0|1> (Default: 1) .B \-\-embed\-font <0|1> (Default: 1)
.br .br
.B --embed-image <0|1> (Default: 1) .B \-\-embed\-image <0|1> (Default: 1)
.br .br
.B --embed-javascript <0|1> (Default: 1) .B \-\-embed\-javascript <0|1> (Default: 1)
.br .br
.B --embed-outline <0|1> (Default: 1) .B \-\-embed\-outline <0|1> (Default: 1)
.RS .RS
Specify which elements should be embedded into the output HTML file. Specify which elements should be embedded into the output HTML file.
If switched off, separated files will be generated along with the HTML file for the corresponding elements. If switched off, separated files will be generated along with the HTML file for the corresponding elements.
--embed accepts a string as argument. Each letter of the string must be one of `cCfFiIjJoO`, which corresponds \-\-embed accepts a string as argument. Each letter of the string must be one of `cCfFiIjJoO`, which corresponds
to one of the --embed-*** switches. Lower case letters for 0 and upper case letters for 1. For example, to one of the \-\-embed\-*** switches. Lower case letters for 0 and upper case letters for 1. For example,
`--embed cFIJo` means to embed everything but CSS files and outlines. `\-\-embed cFIJo` means to embed everything but CSS files and outlines.
.RE .RE
.TP .TP
.B --split-pages <0|1> (Default: 0) .B \-\-split\-pages <0|1> (Default: 0)
If turned on, the content of each page is stored in a separated file. If turned on, the content of each page is stored in a separated file.
This switch is useful if you want pages to be loaded separately & dynamically -- a supporting server might be necessary. This switch is useful if you want pages to be loaded separately & dynamically \-\- a supporting server might be necessary.
Also see --page-filename. Also see \-\-page\-filename.
.TP .TP
.B --dest-dir <dir> (Default: .) .B \-\-dest\-dir <dir> (Default: .)
Specify destination folder. Specify destination folder.
.TP .TP
.B --css-filename <filename> (Default: <none>) .B \-\-css\-filename <filename> (Default: <none>)
Specify the filename of the generated css file, if not embedded. Specify the filename of the generated css file, if not embedded.
If it's empty, the file name will be determined automatically. If it's empty, the file name will be determined automatically.
.TP .TP
.B --page-filename <filename> (Default: <none>) .B \-\-page\-filename <filename> (Default: <none>)
Specify the filename template for pages when --split-pages is 1 Specify the filename template for pages when \-\-split\-pages is 1
A %d placeholder may be included in `filename` to indicate where the page number should be placed. The placeholder supports a limited subset of normal numerical placeholders, including specified width and zero padding. A %d placeholder may be included in `filename` to indicate where the page number should be placed. The placeholder supports a limited subset of normal numerical placeholders, including specified width and zero padding.
If `filename` does not contain a placeholder for the page number, the page number will be inserted directly before the file extension. If the filename does not have an extension, the page number will be placed at the end of the file name. If `filename` does not contain a placeholder for the page number, the page number will be inserted directly before the file extension. If the filename does not have an extension, the page number will be placed at the end of the file name.
If --page-filename is not specified, <input-filename> will be used for the output filename, replacing the extension with .page and adding the page number directly before the extension. If \-\-page\-filename is not specified, <input\-filename> will be used for the output filename, replacing the extension with .page and adding the page number directly before the extension.
.B Examples .B Examples
.B pdf2htmlEX --split-pages 1 foo.pdf .B pdf2htmlEX \-\-split\-pages 1 foo.pdf
Yields page files foo1.page, foo2.page, etc. Yields page files foo1.page, foo2.page, etc.
.B pdf2htmlEX --split-pages 1 foo.pdf --page-filename bar.baz .B pdf2htmlEX \-\-split\-pages 1 foo.pdf \-\-page\-filename bar.baz
Yields page files bar1.baz, bar2.baz, etc. Yields page files bar1.baz, bar2.baz, etc.
.B pdf2htmlEX --split-pages 1 foo.pdf --page-filename page%dbar.baz .B pdf2htmlEX \-\-split\-pages 1 foo.pdf \-\-page\-filename page%dbar.baz
Yields page files page1bar.baz, page2bar.baz, etc. Yields page files page1bar.baz, page2bar.baz, etc.
.B pdf2htmlEX --split-pages 1 foo.pdf --page-filename bar%03d.baz .B pdf2htmlEX \-\-split\-pages 1 foo.pdf \-\-page\-filename bar%03d.baz
Yields page files bar001.baz, bar002.baz, etc. Yields page files bar001.baz, bar002.baz, etc.
.TP .TP
.B --outline-filename <filename> (Default: <none>) .B \-\-outline\-filename <filename> (Default: <none>)
Specify the filename of the generated outline file, if not embedded. Specify the filename of the generated outline file, if not embedded.
If it's empty, the file name will be determined automatically. If it's empty, the file name will be determined automatically.
.TP .TP
.B --process-nontext <0|1> (Default: 1) .B \-\-process\-nontext <0|1> (Default: 1)
Whether to process non-text objects (as images) Whether to process non\-text objects (as images)
.TP .TP
.B --process-outline <0|1> (Default: 1) .B \-\-process\-outline <0|1> (Default: 1)
Whether to show outline in the generated HTML Whether to show outline in the generated HTML
.TP .TP
.B --process-annotation <0|1> (Default: 0) .B \-\-process-annotation <0|1> (Default: 0)
Whether to show annotation in the generated HTML Whether to show annotation in the generated HTML
.TP .TP
.B --printing <0|1> (Default: 1) .B \-\-printing <0|1> (Default: 1)
Enable printing support. Disabling this option may reduce the size of CSS. Enable printing support. Disabling this option may reduce the size of CSS.
.TP .TP
.B --fallback <0|1> (Default: 0) .B \-\-fallback <0|1> (Default: 0)
Output in fallback mode, for better accuracy and browser compatibility, but the size becomes larger. Output in fallback mode, for better accuracy and browser compatibility, but the size becomes larger.
.TP .TP
.B --tmp-file-size-limit <limit> (Default: -1) .B \-\-tmp\-file\-size\-limit <limit> (Default: \-1)
This limits the total size (in KB) of the temporary files which will also limit the total size of the output file. This limits the total size (in KB) of the temporary files which will also limit the total size of the output file.
This is an estimate and it will stop after a page, once the total temporary files size is greater than this number. This is an estimate and it will stop after a page, once the total temporary files size is greater than this number.
-1 means no limit and is the default. \-1 means no limit and is the default.
.SS Fonts .SS Fonts
.TP .TP
.B --embed-external-font <0|1> (Default: 1) .B \-\-embed\-external\-font <0|1> (Default: 1)
Specify whether the local matched fonts, for fonts not embedded in PDF, should be embedded into HTML. Specify whether the local matched fonts, for fonts not embedded in PDF, should be embedded into HTML.
If this switch is off, only font names are exported such that web browsers may try to find proper fonts themselves, and that might cause issues about incorrect font metrics. If this switch is off, only font names are exported such that web browsers may try to find proper fonts themselves, and that might cause issues about incorrect font metrics.
.TP .TP
.B --font-format <format> (Default: woff) .B \-\-font\-format <format> (Default: woff)
Specify the format of fonts extracted from the PDF file. Specify the format of fonts extracted from the PDF file.
.TP .TP
.B --decompose-ligature <0|1> (Default: 0) .B \-\-decompose\-ligature <0|1> (Default: 0)
Decompose ligatures. For example 'fi' -> 'f''i'. Decompose ligatures. For example 'fi' \-> 'f''i'.
.TP .TP
.B --auto-hint <0|1> (Default: 0) .B \-\-auto\-hint <0|1> (Default: 0)
If set to 1, hints will be generated for the fonts using FontForge. If set to 1, hints will be generated for the fonts using FontForge.
This may be preceded by --external-hint-tool. This may be preceded by \-\-external\-hint\-tool.
.TP .TP
.B --external-hint-tool <tool> (Default: <none>) .B \-\-external\-hint\-tool <tool> (Default: <none>)
If specified, the tool will be called in order to enhanced hinting for fonts, this will precede --auto-hint. If specified, the tool will be called in order to enhanced hinting for fonts, this will precede \-\-auto\-hint.
The tool will be called as '<tool> <in.suffix> <out.suffix>', where suffix will be the same as specified for --font-format. The tool will be called as '<tool> <in.suffix> <out.suffix>', where suffix will be the same as specified for \-\-font\-format.
.TP .TP
.B --stretch-narrow-glyph <0|1> (Default: 0) .B \-\-stretch\-narrow\-glyph <0|1> (Default: 0)
If set to 1, glyphs narrower than described in PDF will be stretched; otherwise space will be padded to the right of the glyphs If set to 1, glyphs narrower than described in PDF will be stretched; otherwise space will be padded to the right of the glyphs
.TP .TP
.B --squeeze-wide-glyph <0|1> (Default: 1) .B \-\-squeeze\-wide\-glyph <0|1> (Default: 1)
If set to 1, glyphs wider than described in PDF will be squeezed; otherwise it will be truncated. If set to 1, glyphs wider than described in PDF will be squeezed; otherwise it will be truncated.
.TP .TP
.B --override-fstype <0|1> (Default: 0) .B \-\-override\-fstype <0|1> (Default: 0)
Clear the fstype bits in TTF/OTF fonts. Clear the fstype bits in TTF/OTF fonts.
Turn this on if Internet Explorer complains about 'Permission must be Installable' AND you have permission to do so. Turn this on if Internet Explorer complains about 'Permission must be Installable' AND you have permission to do so.
.TP .TP
.B --process-type3 <0|1> (Default: 0) .B \-\-process\-type3 <0|1> (Default: 0)
If turned on, pdf2htmlEX will try to convert Type 3 fonts such that text can be rendered natively in HTML. If turned on, pdf2htmlEX will try to convert Type 3 fonts such that text can be rendered natively in HTML.
Otherwise all text with Type 3 fonts will be rendered as image. Otherwise all text with Type 3 fonts will be rendered as image.
@ -206,17 +206,17 @@ This feature is highly experimental.
.SS Text .SS Text
.TP .TP
.B --heps <len>, --veps <len> (Default: 1) .B \-\-heps <len>, \-\-veps <len> (Default: 1)
Specify the maximum tolerable horizontal/vertical offset (in pixels). Specify the maximum tolerable horizontal/vertical offset (in pixels).
pdf2htmlEX would try to optimize the generated HTML file moving Text within this distance. pdf2htmlEX would try to optimize the generated HTML file moving Text within this distance.
.TP .TP
.B --space-threshold <ratio> (Default: 0.125) .B \-\-space\-threshold <ratio> (Default: 0.125)
pdf2htmlEX would insert a whitespace character ' ' if the distance between two consecutive letters in the same line is wider than ratio * font_size. pdf2htmlEX would insert a whitespace character ' ' if the distance between two consecutive letters in the same line is wider than ratio * font_size.
.TP .TP
.B --font-size-multiplier <ratio> (Default: 4.0) .B \-\-font\-size\-multiplier <ratio> (Default: 4.0)
Many web browsers limit the minimum font size, and many would round the given font size, which results in incorrect rendering. Many web browsers limit the minimum font size, and many would round the given font size, which results in incorrect rendering.
Specify a ratio greater than 1 would resolve this issue, however it might freeze some browsers. Specify a ratio greater than 1 would resolve this issue, however it might freeze some browsers.
@ -224,52 +224,52 @@ Specify a ratio greater than 1 would resolve this issue, however it might freeze
For some versions of Firefox, however, there will be a problem when the font size is too large, in which case a smaller value should be specified here. For some versions of Firefox, however, there will be a problem when the font size is too large, in which case a smaller value should be specified here.
.TP .TP
.B --space-as-offset <0|1> (Default: 0) .B \-\-space\-as\-offset <0|1> (Default: 0)
If set to 1, space characters will be treated as offsets, which allows a better optimization. If set to 1, space characters will be treated as offsets, which allows a better optimization.
For PDF files with bad encodings, turning on this option may cause losing characters. For PDF files with bad encodings, turning on this option may cause losing characters.
.TP .TP
.B --tounicode <-1|0|1> (Default: 0) .B \-\-tounicode <\-1|0|1> (Default: 0)
A ToUnicode map may be provided for each font in PDF which indicates the 'meaning' of the characters. However often there is better "ToUnicode" info in Type 0/1 fonts, and sometimes the ToUnicode map provided is wrong. A ToUnicode map may be provided for each font in PDF which indicates the 'meaning' of the characters. However often there is better "ToUnicode" info in Type 0/1 fonts, and sometimes the ToUnicode map provided is wrong.
If this value is set to 1, the ToUnicode Map is always applied, if provided in PDF, and characters may not render correctly in HTML if there are collisions. If this value is set to 1, the ToUnicode Map is always applied, if provided in PDF, and characters may not render correctly in HTML if there are collisions.
If set to -1, a customized map is used such that rendering will be correct in HTML (visually the same), but you may not get correct characters by select & copy & paste. If set to \-1, a customized map is used such that rendering will be correct in HTML (visually the same), but you may not get correct characters by select & copy & paste.
If set to 0, pdf2htmlEX would try its best to balance the two methods above. If set to 0, pdf2htmlEX would try its best to balance the two methods above.
.TP .TP
.B --optimize-text <0|1> (Default: 0) .B \-\-optimize\-text <0|1> (Default: 0)
If set to 1, pdf2htmlEX will try to reduce the number of HTML elements used for text. Turn it off if anything goes wrong. If set to 1, pdf2htmlEX will try to reduce the number of HTML elements used for text. Turn it off if anything goes wrong.
.SS Background Image .SS Background Image
.TP .TP
.B --bg-format <format> (Default: png) .B \-\-bg\-format <format> (Default: png)
Specify the background image format. Run `pdf2htmlEX -v` to check all supported formats. Specify the background image format. Run `pdf2htmlEX \-v` to check all supported formats.
.TP .TP
.B --svg-node-count-limit <limit> (Default: -1) .B \-\-svg\-node\-count\-limit <limit> (Default: -1)
If node count in a svg background image exceeds this limit, fall back this page to bitmap background; negative value means no limit. If node count in a svg background image exceeds this limit, fall back this page to bitmap background; negative value means no limit.
This option is only useful when '--bg-format svg' is specified. Note that node count in svg is just calculated approximately. This option is only useful when '\-\-bg\-format svg' is specified. Note that node count in svg is just calculated approximately.
.TP .TP
.B --svg-embed-bitmap <0|1> (Default: 1) .B \-\-svg\-embed\-bitmap <0|1> (Default: 1)
Whether embed bitmaps in svg background image. 1: embed bitmaps in svg background; 0: dump bitmaps to external files if possible. Whether embed bitmaps in svg background image. 1: embed bitmaps in svg background; 0: dump bitmaps to external files if possible.
JPEG images in a PDF are most possibly dumped. This option is only useful when '--bg-format svg' is specified. JPEG images in a PDF are most possibly dumped. This option is only useful when '\-\-bg\-format svg' is specified.
.SS PDF Protection .SS PDF Protection
.TP .TP
.B -o, --owner-password <password> .B \-o, \-\-owner\-password <password>
Specify owner password Specify owner password
.TP .TP
.B -u, --user-password <password> .B \-u, \-\-user\-password <password>
Specify user password Specify user password
.TP .TP
.B --no-drm <0|1> (Default: 0) .B \-\-no\-drm <0|1> (Default: 0)
Override document DRM settings Override document DRM settings
Turn this on only when you have permission. Turn this on only when you have permission.
@ -277,27 +277,27 @@ Turn this on only when you have permission.
.SS Misc. .SS Misc.
.TP .TP
.B --clean-tmp <0|1> (Default: 1) .B \-\-clean\-tmp <0|1> (Default: 1)
If switched off, intermediate files won't be cleaned in the end. If switched off, intermediate files won't be cleaned in the end.
.TP .TP
.B --data-dir <dir> (Default: @CMAKE_INSTALL_PREFIX@/share/pdf2htmlEX) .B \-\-data\-dir <dir> (Default: @CMAKE_INSTALL_PREFIX@/share/pdf2htmlEX)
Specify the folder holding the manifest and other files (see below for the manifest file)` Specify the folder holding the manifest and other files (see below for the manifest file)`
.TP .TP
.B --tmp-dir <dir> (Default: /tmp) .B \-\-tmp\-dir <dir> (Default: /tmp or $TMPDIR if set)
Specify the temporary folder to use for temporary files Specify the temporary folder to use for temporary files
.TP .TP
.B --css-draw <0|1> (Default: 0) .B \-\-css\-draw <0|1> (Default: 0)
Experimental and unsupported CSS drawing Experimental and unsupported CSS drawing
.TP .TP
.B --debug <0|1> (Default: 0) .B \-\-debug <0|1> (Default: 0)
Print debug information. Print debug information.
.TP .TP
.B --proof <0|1|2> (Default: 0) .B \-\-proof <0|1|2> (Default: 0)
Output a proof version. If a positive value is specified, texts are drawn on both text layer and background image for comparision. Output a proof version. If a positive value is specified, texts are drawn on both text layer and background image for comparision.
If 2 is specified, texts on background are in different colors. If png/jpg background format is used, If 2 is specified, texts on background are in different colors. If png/jpg background format is used,
a higher hdpi/vdpi (e.g. 288) is recommended for legibility. a higher hdpi/vdpi (e.g. 288) is recommended for legibility.
@ -305,31 +305,31 @@ a higher hdpi/vdpi (e.g. 288) is recommended for legibility.
.SS Meta .SS Meta
.TP .TP
.B -v, --version .B \-v, \-\-version
Print copyright and version info Print copyright and version info
.TP .TP
.B --help .B \-\-help
Print usage information Print usage information
.SH MANIFEST and DATA-DIR .SH MANIFEST and DATA\-DIR
When split-pages is 0, the manifest file describes how the final html page should be generated. When split\-pages is 0, the manifest file describes how the final html page should be generated.
By default, pdf2htmlEX will use the manifest in the default data-dir (run `pdf2htmlEX -v` to check), which gives a simple demo of its syntax. By default, pdf2htmlEX will use the manifest in the default data\-dir (run `pdf2htmlEX \-v` to check), which gives a simple demo of its syntax.
You can modify the default one, or you can create a new one and specify the correct data-dir in the command line. You can modify the default one, or you can create a new one and specify the correct data\-dir in the command line.
All files referred by the manifest must be located in the data-dir. All files referred by the manifest must be located in the data\-dir.
.SH EXAMPLE .SH EXAMPLE
.TP .TP
.B pdf2htmlEX /path/to/file.pdf .B pdf2htmlEX /path/to/file.pdf
Convert file.pdf into file.html Convert file.pdf into file.html
.TP .TP
.B pdf2htmlEX --clean-tmp 0 --debug 1 /path/to/file.pdf .B pdf2htmlEX \-\-clean\-tmp 0 \-\-debug 1 /path/to/file.pdf
Convert file.pdf and leave all intermediate files. Convert file.pdf and leave all intermediate files.
.TP .TP
.B pdf2htmlEX --dest-dir out --embed fi /path/to/file.pdf .B pdf2htmlEX \-\-dest\-dir out \-\-embed fi /path/to/file.pdf
Convert file.pdf into out/file.html and leave font/image files separated. Convert file.pdf into out/file.html and leave font/image files separated.
.SH COPYRIGHT .SH COPYRIGHT

View File

@ -16,7 +16,7 @@
overflow:auto; overflow:auto;
} }
#page-container { /* PDF container */ #page-container { /* PDF container */
position:absolute; /* required for calulating relative positions of pages in pdf2htmlEX.js */ position:absolute; /* required for calculating relative positions of pages in pdf2htmlEX.js */
top:0; top:0;
left:0px; left:0px;
margin:0; margin:0;
@ -154,7 +154,7 @@
transform-origin:0% 100%; transform-origin:0% 100%;
-ms-transform-origin:0% 100%; -ms-transform-origin:0% 100%;
-webkit-transform-origin:0% 100%; -webkit-transform-origin:0% 100%;
unicode-bidi:bidi-override;/* For rtl lanauges, e.g. Hebrew, we don't want the default Unicode behaviour */ unicode-bidi:bidi-override;/* For rtl languages, e.g. Hebrew, we don't want the default Unicode behaviour */
-moz-font-feature-settings:"liga" 0;/* We don't want Firefox to recognize ligatures */ -moz-font-feature-settings:"liga" 0;/* We don't want Firefox to recognize ligatures */
} }
.@CSS_LINE_CN@:after { .@CSS_LINE_CN@:after {
@ -165,7 +165,7 @@
position:relative; position:relative;
/* _<id> for spaces may need display:inline, which will override this */ /* _<id> for spaces may need display:inline, which will override this */
display:inline-block; display:inline-block;
unicode-bidi:bidi-override; /* For rtl lanauges, e.g. Hebrew, we don't want the default Unicode behaviour */ unicode-bidi:bidi-override; /* For rtl languages, e.g. Hebrew, we don't want the default Unicode behaviour */
} }
.@CSS_WHITESPACE_CN@ { /* text shift */ .@CSS_WHITESPACE_CN@ { /* text shift */
color:transparent; color:transparent;

View File

@ -1,4 +1,4 @@
#!/bin/sh #!/bin/sh -ex
# Compile and optimize CSS code # Compile and optimize CSS code
# Copyright 2013 Lu Wang <coolwanglu@gmail.com> # Copyright 2013 Lu Wang <coolwanglu@gmail.com>

View File

@ -1,4 +1,4 @@
#!/bin/sh #!/bin/sh -ex
# Compile and optimize JS code # Compile and optimize JS code
# Copyright 2013 Lu Wang <coolwanglu@gmail.com> # Copyright 2013 Lu Wang <coolwanglu@gmail.com>

View File

@ -166,7 +166,7 @@ ArgParser::ArgEntryBase::ArgEntryBase(const char * name, const char * descriptio
} }
else else
{ {
cerr << "Warning: argument '" << this->name << "' cannnot be parsed as a short option" << endl; cerr << "Warning: argument '" << this->name << "' cannot be parsed as a short option" << endl;
} }
} }
} }

View File

@ -188,7 +188,7 @@ protected:
void reset_state(); void reset_state();
// reset all ***_changed flags // reset all ***_changed flags
void reset_state_change(); void reset_state_change();
// check updated states, and determine new_line_stauts // check updated states, and determine new_line_status
// make sure this function can be called several times consecutively without problem // make sure this function can be called several times consecutively without problem
void check_state_change(GfxState * state); void check_state_change(GfxState * state);
// prepare the line context, (close old tags, open new tags) // prepare the line context, (close old tags, open new tags)

View File

@ -168,7 +168,7 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, FontInfo & info)
} }
catch(int) catch(int)
{ {
cerr << "Someting wrong when trying to dump font " << hex << fn_id << dec << endl; cerr << "Something wrong when trying to dump font " << hex << fn_id << dec << endl;
} }
obj2.free(); obj2.free();
@ -240,7 +240,7 @@ string HTMLRenderer::dump_type3_font (GfxFont * font, FontInfo & info)
cairo_surface_set_fallback_resolution(surface, param.h_dpi, param.v_dpi); cairo_surface_set_fallback_resolution(surface, param.h_dpi, param.v_dpi);
cairo_t * cr = cairo_create(surface); cairo_t * cr = cairo_create(surface);
// track the positio of the origin // track the position of the origin
double ox, oy; double ox, oy;
ox = oy = 0.0; ox = oy = 0.0;
@ -792,7 +792,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
/* /*
* Step 5 * Step 5
* Generate the font, load the metrics and set the embeddig bits (fstype) * Generate the font, load the metrics and set the embedding bits (fstype)
* *
* Ascent/Descent are not used in PDF, and the values in PDF may be wrong or inconsistent (there are 3 sets of them) * Ascent/Descent are not used in PDF, and the values in PDF may be wrong or inconsistent (there are 3 sets of them)
* We need to reload in order to retrieve/fix accurate ascent/descent, some info won't be written to the font by fontforge until saved. * We need to reload in order to retrieve/fix accurate ascent/descent, some info won't be written to the font by fontforge until saved.

View File

@ -67,7 +67,7 @@ HTMLRenderer::HTMLRenderer(const Param & param)
all_manager.whitespace .set_eps(param.h_eps); all_manager.whitespace .set_eps(param.h_eps);
all_manager.left .set_eps(param.h_eps); all_manager.left .set_eps(param.h_eps);
/* /*
* For othere states, we need accurate values * For other states, we need accurate values
* optimization will be done separately * optimization will be done separately
*/ */
all_manager.font_size .set_eps(EPS); all_manager.font_size .set_eps(EPS);

View File

@ -264,7 +264,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
* Rescale the font * Rescale the font
* If the font-size is 1, and the matrix is [10,0,0,10,0,0], we would like to change it to * If the font-size is 1, and the matrix is [10,0,0,10,0,0], we would like to change it to
* font-size == 10 and matrix == [1,0,0,1,0,0], * font-size == 10 and matrix == [1,0,0,1,0,0],
* such that it will be easy and natrual for web browsers * such that it will be easy and natural for web browsers
*/ */
double new_draw_text_tm[6]; double new_draw_text_tm[6];
memcpy(new_draw_text_tm, cur_text_tm, sizeof(new_draw_text_tm)); memcpy(new_draw_text_tm, cur_text_tm, sizeof(new_draw_text_tm));
@ -357,7 +357,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
dy = inverted[1] * lhs1 + inverted[3] * lhs2; dy = inverted[1] * lhs1 + inverted[3] * lhs2;
if(equal(dy, 0)) if(equal(dy, 0))
{ {
// text on a same horizontal line, we can insert positive or negaive x-offsets // text on a same horizontal line, we can insert positive or negative x-offsets
merged = true; merged = true;
} }
else if(param.optimize_text) else if(param.optimize_text)

View File

@ -291,7 +291,7 @@ void HTMLTextLine::optimize(std::vector<HTMLTextLine*> & lines)
*/ */
void HTMLTextLine::optimize_normal(std::vector<HTMLTextLine*> & lines) void HTMLTextLine::optimize_normal(std::vector<HTMLTextLine*> & lines)
{ {
// remove unuseful states in the end // remove useless states in the end
while((!states.empty()) && (states.back().start_idx >= text.size())) while((!states.empty()) && (states.back().start_idx >= text.size()))
states.pop_back(); states.pop_back();
@ -416,9 +416,9 @@ void HTMLTextLine::optimize_normal(std::vector<HTMLTextLine*> & lines)
// Optimize word space // Optimize word space
// In some PDF files all spaces are converted into positionig shift // In some PDF files all spaces are converted into positioning shift
// We may try to change (some of) them to ' ' by adjusting word_space // We may try to change (some of) them to ' ' by adjusting word_space
// for now, we cosider only the no-space scenario // for now, we consider only the no-space scenario
// which also includes the case when param.space_as_offset is set // which also includes the case when param.space_as_offset is set
// get the text segment covered by current state (*state_iter1) // get the text segment covered by current state (*state_iter1)
@ -551,7 +551,7 @@ void HTMLTextLine::State::begin (ostream & out, const State * prev_state)
else else
out << ids[i]; out << ids[i];
} }
// veritcal align // vertical align
if(!equal(vertical_align, 0)) if(!equal(vertical_align, 0))
{ {
// so we have to dump it // so we have to dump it

View File

@ -57,7 +57,7 @@ public:
long long ids[ID_COUNT]; long long ids[ID_COUNT];
size_t start_idx; // index of the first Text using this state size_t start_idx; // index of the first Text using this state
// for optimzation // for optimization
long long hash_value; long long hash_value;
long long hash_umask; // some states may not be actually used long long hash_umask; // some states may not be actually used
bool need_close; bool need_close;

View File

@ -84,7 +84,7 @@ protected:
// Be careful about the mixed usage of Matrix and const double * // Be careful about the mixed usage of Matrix and const double *
// the input is usually double *, which might be changed, so we have to copy the content out // the input is usually double *, which might be changed, so we have to copy the content out
// in the map we use Matrix instead of double * such that the array may be automatically release when deconstructign // in the map we use Matrix instead of double * such that the array may be automatically release when deconstructing
template <class Imp> template <class Imp>
class StateManager<Matrix, Imp> class StateManager<Matrix, Imp>
{ {
@ -302,7 +302,7 @@ public:
void dump_value(std::ostream & out, const Matrix & matrix) { void dump_value(std::ostream & out, const Matrix & matrix) {
// always ignore tm[4] and tm[5] because // always ignore tm[4] and tm[5] because
// we have already shifted the origin // we have already shifted the origin
// TODO: recognize common matices // TODO: recognize common matrices
const auto & m = matrix.m; const auto & m = matrix.m;
auto prefixes = {"", "-ms-", "-webkit-"}; auto prefixes = {"", "-ms-", "-webkit-"};
if(tm_equal(m, ID_MATRIX, 4)) if(tm_equal(m, ID_MATRIX, 4))

View File

@ -26,7 +26,7 @@ set(CSS_STROKE_COLOR_CN "sc") # Stroke Color
set(CSS_LETTER_SPACE_CN "ls") # Letter Space set(CSS_LETTER_SPACE_CN "ls") # Letter Space
set(CSS_WORD_SPACE_CN "ws") # Word Space set(CSS_WORD_SPACE_CN "ws") # Word Space
set(CSS_VERTICAL_ALIGN_CN "v") # Vertial align set(CSS_VERTICAL_ALIGN_CN "v") # Vertical align
set(CSS_WHITESPACE_CN "_") # whitespace set(CSS_WHITESPACE_CN "_") # whitespace
set(CSS_LEFT_CN "x") # X set(CSS_LEFT_CN "x") # X
set(CSS_HEIGHT_CN "h") # Height set(CSS_HEIGHT_CN "h") # Height

View File

@ -159,7 +159,7 @@ void parse_options (int argc, char **argv)
.add("split-pages", &param.split_pages, 0, "split pages into separate files") .add("split-pages", &param.split_pages, 0, "split pages into separate files")
.add("dest-dir", &param.dest_dir, ".", "specify destination directory") .add("dest-dir", &param.dest_dir, ".", "specify destination directory")
.add("css-filename", &param.css_filename, "", "filename of the generated css file") .add("css-filename", &param.css_filename, "", "filename of the generated css file")
.add("page-filename", &param.page_filename, "", "filename template for splitted pages ") .add("page-filename", &param.page_filename, "", "filename template for split pages ")
.add("outline-filename", &param.outline_filename, "", "filename of the generated outline file") .add("outline-filename", &param.outline_filename, "", "filename of the generated outline file")
.add("process-nontext", &param.process_nontext, 1, "render graphics in addition to text") .add("process-nontext", &param.process_nontext, 1, "render graphics in addition to text")
.add("process-outline", &param.process_outline, 1, "show outline in HTML") .add("process-outline", &param.process_outline, 1, "show outline in HTML")
@ -201,7 +201,7 @@ void parse_options (int argc, char **argv)
// misc. // misc.
.add("clean-tmp", &param.clean_tmp, 1, "remove temporary files after conversion") .add("clean-tmp", &param.clean_tmp, 1, "remove temporary files after conversion")
.add("tmp-dir", &param.tmp_dir, param.tmp_dir, "specify the location of tempory directory.") .add("tmp-dir", &param.tmp_dir, param.tmp_dir, "specify the location of temporary directory.")
.add("data-dir", &param.data_dir, param.data_dir, "specify data directory") .add("data-dir", &param.data_dir, param.data_dir, "specify data directory")
// TODO: css drawings are hidden on print, for annot links, need to fix it for other drawings // TODO: css drawings are hidden on print, for annot links, need to fix it for other drawings
// .add("css-draw", &param.css_draw, 0, "[experimental and unsupported] CSS drawing") // .add("css-draw", &param.css_draw, 0, "[experimental and unsupported] CSS drawing")
@ -356,7 +356,18 @@ int main(int argc, char **argv)
param.data_dir = get_exec_dir(argv[0]); param.data_dir = get_exec_dir(argv[0]);
param.tmp_dir = get_tmp_dir(); param.tmp_dir = get_tmp_dir();
#else #else
param.tmp_dir = "/tmp"; char const* tmp = getenv("TMPDIR");
#ifdef P_tmpdir
if (!tmp)
tmp = P_tmpdir;
#endif
#ifdef _PATH_TMP
if (!tmp)
tmp = _PATH_TMP;
#endif
if (!tmp)
tmp = "/tmp";
param.tmp_dir = string(tmp);
param.data_dir = PDF2HTMLEX_DATA_PATH; param.data_dir = PDF2HTMLEX_DATA_PATH;
#endif #endif

View File

@ -49,7 +49,7 @@ void ffw_add_empty_char(int32_t unicode, int width);
// metrics // metrics
int ffw_get_em_size(void); int ffw_get_em_size(void);
// manipulate ascent and descent // manipulate ascent and descent
// asscent is between 0 and 1 // ascent is between 0 and 1
// descent is between -1 and 0 // descent is between -1 and 0
void ffw_fix_metric(); void ffw_fix_metric();
// get ascent/descent based on the shape // get ascent/descent based on the shape

View File

@ -38,7 +38,7 @@ bool isLegalUnicode(Unicode u)
/* /*
* 9, 10 and 13 are interpreted as white-spaces in HTML * 9, 10 and 13 are interpreted as white-spaces in HTML
* `word-spacing` may be applied on them * `word-spacing` may be applied on them
* and the browser may not use the actualy glyphs in the font * and the browser may not use the actual glyphs in the font
* So mark them as illegal * So mark them as illegal
* *
* The problem is that the correct value can not be copied out in this way * The problem is that the correct value can not be copied out in this way

View File

@ -259,5 +259,4 @@ if __name__=="__main__":
print >> sys.stderr, "Cannot locate pdf2htmlEX executable. Make sure source was built before running this test." print >> sys.stderr, "Cannot locate pdf2htmlEX executable. Make sure source was built before running this test."
exit(1) exit(1)
suite = unittest.loader.TestLoader().loadTestsFromTestCase(OutputNamingTests) unittest.main()
unittest.TextTestRunner(verbosity=2).run(suite)