From 952a3409f3843aedef558b549cd21b884a9b2c38 Mon Sep 17 00:00:00 2001 From: John Hewson Date: Mon, 28 Jan 2013 22:16:38 +0000 Subject: [PATCH 01/10] cleaned up usage beahviour --- src/pdf2htmlEX.cc | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index b337871..94d41db 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -36,9 +36,7 @@ ArgParser argparser; void show_usage_and_exit(const char * dummy = nullptr) { - cerr << "Usage: pdf2htmlEX [Options] []" << endl; - cerr << endl; - cerr << "Options:" << endl; + cerr << "Usage: pdf2htmlEX [options] []" << endl; argparser.show_usage(cerr); cerr << endl; cerr << "Run 'man pdf2htmlEX' for detailed information" << endl; @@ -141,8 +139,7 @@ int main(int argc, char **argv) parse_options(argc, argv); if (param.input_filename == "") { - cerr << "Missing input filename" << endl; - exit(EXIT_FAILURE); + show_usage_and_exit(); } //prepare the directories From 40e9f8983320b5949e845490e16b8f53ba27443d Mon Sep 17 00:00:00 2001 From: John Hewson Date: Mon, 28 Jan 2013 22:17:57 +0000 Subject: [PATCH 02/10] don't need every program telling people how to use unix --- src/pdf2htmlEX.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 94d41db..18a1d6c 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -38,9 +38,6 @@ void show_usage_and_exit(const char * dummy = nullptr) { cerr << "Usage: pdf2htmlEX [options] []" << endl; argparser.show_usage(cerr); - cerr << endl; - cerr << "Run 'man pdf2htmlEX' for detailed information" << endl; - cerr << endl; exit(EXIT_FAILURE); } From 35d9668e90e60a09d1a46cf77165d9aa301abe7e Mon Sep 17 00:00:00 2001 From: John Hewson Date: Mon, 28 Jan 2013 22:26:07 +0000 Subject: [PATCH 03/10] pdftohtml/pdftocairo/etc style usage defaults --- src/util/ArgParser.cc | 2 +- src/util/ArgParser.h | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/util/ArgParser.cc b/src/util/ArgParser.cc index 04381e6..de3b270 100644 --- a/src/util/ArgParser.cc +++ b/src/util/ArgParser.cc @@ -164,6 +164,6 @@ ArgParser::ArgEntryBase::ArgEntryBase(const char * name, const char * descriptio } } -const int ArgParser::arg_col_width = 40; +const int ArgParser::arg_col_width = 31; } // namespace pdf2htmlEX diff --git a/src/util/ArgParser.h b/src/util/ArgParser.h index 432ec59..24a7db9 100644 --- a/src/util/ArgParser.h +++ b/src/util/ArgParser.h @@ -162,12 +162,6 @@ void ArgParser::ArgEntry::show_usage(std::ostream & out) const if(need_arg) { sout << " "; - if(!dont_show_default) - { - sout << " (="; - dump_value(sout, default_value); - sout << ")"; - } } std::string s = sout.str(); @@ -175,8 +169,17 @@ void ArgParser::ArgEntry::show_usage(std::ostream & out) const for(int i = s.size(); i < arg_col_width; ++i) out << ' '; - - out << " " << description << std::endl; + + out << " " << description; + + if(need_arg && !dont_show_default) + { + out << " (default is "; + dump_value(out, default_value); + out << ")"; + } + + out << std::endl; } } // namespace ArgParser From 81ee37e4ae0274a1e0ec2a18d3802e4659453461 Mon Sep 17 00:00:00 2001 From: John Hewson Date: Mon, 28 Jan 2013 22:30:04 +0000 Subject: [PATCH 04/10] show argument types, like pdftohtml/pdftocairo/etc --- src/util/ArgParser.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/util/ArgParser.h b/src/util/ArgParser.h index 24a7db9..d89dc35 100644 --- a/src/util/ArgParser.h +++ b/src/util/ArgParser.h @@ -39,6 +39,23 @@ void dump_value(std::ostream & out, const T & v) extern void dump_value(std::ostream & out, const std::string & v); +// type names helper +template +struct type_name { + static char const* value() { return "unknown"; } +}; + +template<> struct type_name { + static char const* value() { return "int"; } +}; + +template<> struct type_name { + static char const* value() { return "fp"; } +}; + +template<> struct type_name { + static char const* value() { return "string"; } +}; class ArgParser { @@ -161,7 +178,7 @@ void ArgParser::ArgEntry::show_usage(std::ostream & out) const if(need_arg) { - sout << " "; + sout << " <" << type_name::value() << ">"; } std::string s = sout.str(); From d250ccf829a429f49bc6a70d31d2374011903d2c Mon Sep 17 00:00:00 2001 From: John Hewson Date: Mon, 28 Jan 2013 22:45:12 +0000 Subject: [PATCH 05/10] cleaned-up usage descriptions --- src/pdf2htmlEX.cc | 60 +++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 18a1d6c..602a164 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -54,8 +54,8 @@ void show_version_and_exit(const char * dummy = nullptr) void parse_options (int argc, char **argv) { argparser - .add("help,h", "show all options", &show_usage_and_exit) - .add("version,v", "show copyright and version info", &show_version_and_exit) + .add("help,h", "print usage information", &show_usage_and_exit) + .add("version,v", "print copyright and version info", &show_version_and_exit) .add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", nullptr, true) .add("user-password,u", ¶m.user_password, "", "user password (for encrypted files)", nullptr, true) @@ -64,43 +64,43 @@ void parse_options (int argc, char **argv) .add("dest-dir", ¶m.dest_dir, ".", "specify destination directory") .add("data-dir", ¶m.data_dir, PDF2HTMLEX_DATA_PATH, "specify data directory") - .add("first-page,f", ¶m.first_page, 1, "first page to process") - .add("last-page,l", ¶m.last_page, numeric_limits::max(), "last page to process") + .add("first-page,f", ¶m.first_page, 1, "first page to convert") + .add("last-page,l", ¶m.last_page, numeric_limits::max(), "last page to convert") .add("zoom", ¶m.zoom, 0, "zoom ratio", nullptr, true) - .add("fit-width", ¶m.fit_width, 0, "fit width to pixels", nullptr, true) - .add("fit-height", ¶m.fit_height, 0, "fit height to pixels", nullptr, true) - .add("hdpi", ¶m.h_dpi, 144.0, "horizontal DPI for non-text") - .add("vdpi", ¶m.v_dpi, 144.0, "vertical DPI for non-text") + .add("fit-width", ¶m.fit_width, 0, "fit width to pixels", nullptr, true) + .add("fit-height", ¶m.fit_height, 0, "fit height to pixels", nullptr, true) + .add("hdpi", ¶m.h_dpi, 144.0, "horizontal resolution for graphics in DPI") + .add("vdpi", ¶m.v_dpi, 144.0, "vertical resolution for graphics in DPI") .add("use-cropbox", ¶m.use_cropbox, 0, "use CropBox instead of MediaBox") - .add("process-nontext", ¶m.process_nontext, 1, "process nontext objects") - .add("single-html", ¶m.single_html, 1, "combine everything into one single HTML file") - .add("split-pages", ¶m.split_pages, 0, "split pages into separated files") - .add("embed-base-font", ¶m.embed_base_font, 0, "embed local matched font for base 14 fonts in the PDF file") - .add("embed-external-font", ¶m.embed_external_font, 0, "embed local matched font for external fonts in the PDF file") - .add("decompose-ligature", ¶m.decompose_ligature, 0, "decompose ligatures, for example 'fi' -> 'f''i'") + .add("process-nontext", ¶m.process_nontext, 1, "render graphics in addition to text") + .add("single-html", ¶m.single_html, 1, "generate a single HTML file") + .add("split-pages", ¶m.split_pages, 0, "split pages into separate files") + .add("embed-base-font", ¶m.embed_base_font, 0, "embed local match for standard 14 fonts") + .add("embed-external-font", ¶m.embed_external_font, 0, "embed local match for external fonts") + .add("decompose-ligature", ¶m.decompose_ligature, 0, "decompose ligatures, such as \uFB01 -> fi") - .add("heps", ¶m.h_eps, 1.0, "max tolerated horizontal offset (in pixels)") - .add("veps", ¶m.v_eps, 1.0, "max tolerated vertical offset (in pixels)") - .add("space-threshold", ¶m.space_threshold, (1.0/8), "distance no thiner than (threshold * em) will be considered as a space character") - .add("font-size-multiplier", ¶m.font_size_multiplier, 4.0, "setting a value greater than 1 would increase the rendering accuracy") - .add("auto-hint", ¶m.auto_hint, 0, "Whether to generate hints for fonts") - .add("tounicode", ¶m.tounicode, 0, "Specify how to deal with ToUnicode map, 0 for auto, 1 for forced, -1 for disabled") + .add("heps", ¶m.h_eps, 1.0, "horizontal threshold for merging text, in pixels") + .add("veps", ¶m.v_eps, 1.0, "vertical threshold for merging text, in pixels") + .add("space-threshold", ¶m.space_threshold, (1.0/8), "word break threshold (threshold * em)") + .add("font-size-multiplier", ¶m.font_size_multiplier, 4.0, "a value greater than 1 increases the rendering accuracy") + .add("auto-hint", ¶m.auto_hint, 0, "use fontforge autohint on fonts without hints") + .add("tounicode", ¶m.tounicode, 0, "how to handle ToUnicode CMaps (0=auto, 1=force, -1=ignore)") .add("space-as-offset", ¶m.space_as_offset, 0, "treat space characters as offsets") - .add("stretch-narrow-glyph", ¶m.stretch_narrow_glyph, 0, "stretch narrow glyphs instead of padding space") - .add("squeeze-wide-glyph", ¶m.squeeze_wide_glyph, 1, "squeeze wide glyphs instead of truncating") + .add("stretch-narrow-glyph", ¶m.stretch_narrow_glyph, 0, "stretch narrow glyphs instead of padding them") + .add("squeeze-wide-glyph", ¶m.squeeze_wide_glyph, 1, "shrink wide glyphs instead of truncating them") .add("remove-unused-glyph", ¶m.remove_unused_glyph, 1, "remove unused glyphs in embedded fonts") - .add("font-suffix", ¶m.font_suffix, ".ttf", "suffix for extracted font files") - .add("font-format", ¶m.font_format, "opentype", "format for extracted font files") - .add("external-hint-tool", ¶m.external_hint_tool, "", "external tool for hintting fonts.(overrides --auto-hint)") - .add("css-filename", ¶m.css_filename, "", "Specify the file name of the generated css file") - .add("outline-filename", ¶m.outline_filename, "", "Specify the file name of the generated outline file") + .add("font-suffix", ¶m.font_suffix, ".ttf", "suffix for embedded font files (.ttf,.otf,.woff,.svg)") + .add("font-format", ¶m.font_format, "opentype", "CSS @font-face format for embedded fonts") + .add("external-hint-tool", ¶m.external_hint_tool, "", "external tool for hinting fonts (overrides --auto-hint)") + .add("css-filename", ¶m.css_filename, "", "filename of the generated css file") + .add("outline-filename", ¶m.outline_filename, "", "filename of the generated outline file") - .add("debug", ¶m.debug, 0, "output debug information") - .add("clean-tmp", ¶m.clean_tmp, 1, "clean temporary files after processing") - .add("css-draw", ¶m.css_draw, 0, "[Experimental and Unsupported] CSS Drawing") + .add("debug", ¶m.debug, 0, "print debugging information") + .add("clean-tmp", ¶m.clean_tmp, 1, "remove temporary files after conversion") + .add("css-draw", ¶m.css_draw, 0, "[experimental and unsupported] CSS drawing") .add("", ¶m.input_filename, "", "") .add("", ¶m.output_filename, "", "") ; From c80a732ea50e068443134495b99026d3865adb06 Mon Sep 17 00:00:00 2001 From: John Hewson Date: Mon, 28 Jan 2013 22:54:42 +0000 Subject: [PATCH 06/10] re-ordered usage descriptions to be more like pdftocairo,etc --- src/pdf2htmlEX.cc | 64 ++++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 602a164..a5be7af 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -54,53 +54,61 @@ void show_version_and_exit(const char * dummy = nullptr) void parse_options (int argc, char **argv) { argparser - .add("help,h", "print usage information", &show_usage_and_exit) - .add("version,v", "print copyright and version info", &show_version_and_exit) - - .add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", nullptr, true) - .add("user-password,u", ¶m.user_password, "", "user password (for encrypted files)", nullptr, true) - .add("no-drm", ¶m.no_drm, 0, "override document DRM settings") - - .add("dest-dir", ¶m.dest_dir, ".", "specify destination directory") - .add("data-dir", ¶m.data_dir, PDF2HTMLEX_DATA_PATH, "specify data directory") - + // pages .add("first-page,f", ¶m.first_page, 1, "first page to convert") .add("last-page,l", ¶m.last_page, numeric_limits::max(), "last page to convert") - + + // dimensions .add("zoom", ¶m.zoom, 0, "zoom ratio", nullptr, true) .add("fit-width", ¶m.fit_width, 0, "fit width to pixels", nullptr, true) .add("fit-height", ¶m.fit_height, 0, "fit height to pixels", nullptr, true) + .add("use-cropbox", ¶m.use_cropbox, 0, "use CropBox instead of MediaBox") .add("hdpi", ¶m.h_dpi, 144.0, "horizontal resolution for graphics in DPI") .add("vdpi", ¶m.v_dpi, 144.0, "vertical resolution for graphics in DPI") - .add("use-cropbox", ¶m.use_cropbox, 0, "use CropBox instead of MediaBox") - - .add("process-nontext", ¶m.process_nontext, 1, "render graphics in addition to text") + + // output files .add("single-html", ¶m.single_html, 1, "generate a single HTML file") .add("split-pages", ¶m.split_pages, 0, "split pages into separate files") + .add("dest-dir", ¶m.dest_dir, ".", "specify destination directory") + .add("css-filename", ¶m.css_filename, "", "filename of the generated css file") + .add("outline-filename", ¶m.outline_filename, "", "filename of the generated outline file") + + // embedded fonts .add("embed-base-font", ¶m.embed_base_font, 0, "embed local match for standard 14 fonts") .add("embed-external-font", ¶m.embed_external_font, 0, "embed local match for external fonts") + .add("font-suffix", ¶m.font_suffix, ".ttf", "suffix for embedded font files (.ttf,.otf,.woff,.svg)") + .add("font-format", ¶m.font_format, "opentype", "CSS @font-face format for embedded fonts") .add("decompose-ligature", ¶m.decompose_ligature, 0, "decompose ligatures, such as \uFB01 -> fi") - + .add("remove-unused-glyph", ¶m.remove_unused_glyph, 1, "remove unused glyphs in embedded fonts") + .add("auto-hint", ¶m.auto_hint, 0, "use fontforge autohint on fonts without hints") + .add("external-hint-tool", ¶m.external_hint_tool, "", "external tool for hinting fonts (overrides --auto-hint)") + .add("stretch-narrow-glyph", ¶m.stretch_narrow_glyph, 0, "stretch narrow glyphs instead of padding them") + .add("squeeze-wide-glyph", ¶m.squeeze_wide_glyph, 1, "shrink wide glyphs instead of truncating them") + + // text .add("heps", ¶m.h_eps, 1.0, "horizontal threshold for merging text, in pixels") .add("veps", ¶m.v_eps, 1.0, "vertical threshold for merging text, in pixels") .add("space-threshold", ¶m.space_threshold, (1.0/8), "word break threshold (threshold * em)") .add("font-size-multiplier", ¶m.font_size_multiplier, 4.0, "a value greater than 1 increases the rendering accuracy") - .add("auto-hint", ¶m.auto_hint, 0, "use fontforge autohint on fonts without hints") - .add("tounicode", ¶m.tounicode, 0, "how to handle ToUnicode CMaps (0=auto, 1=force, -1=ignore)") .add("space-as-offset", ¶m.space_as_offset, 0, "treat space characters as offsets") - .add("stretch-narrow-glyph", ¶m.stretch_narrow_glyph, 0, "stretch narrow glyphs instead of padding them") - .add("squeeze-wide-glyph", ¶m.squeeze_wide_glyph, 1, "shrink wide glyphs instead of truncating them") - .add("remove-unused-glyph", ¶m.remove_unused_glyph, 1, "remove unused glyphs in embedded fonts") - - .add("font-suffix", ¶m.font_suffix, ".ttf", "suffix for embedded font files (.ttf,.otf,.woff,.svg)") - .add("font-format", ¶m.font_format, "opentype", "CSS @font-face format for embedded fonts") - .add("external-hint-tool", ¶m.external_hint_tool, "", "external tool for hinting fonts (overrides --auto-hint)") - .add("css-filename", ¶m.css_filename, "", "filename of the generated css file") - .add("outline-filename", ¶m.outline_filename, "", "filename of the generated outline file") - - .add("debug", ¶m.debug, 0, "print debugging information") + .add("tounicode", ¶m.tounicode, 0, "how to handle ToUnicode CMaps (0=auto, 1=force, -1=ignore)") + + // encryption + .add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", nullptr, true) + .add("user-password,u", ¶m.user_password, "", "user password (for encrypted files)", nullptr, true) + .add("no-drm", ¶m.no_drm, 0, "override document DRM settings") + + // misc. .add("clean-tmp", ¶m.clean_tmp, 1, "remove temporary files after conversion") + .add("process-nontext", ¶m.process_nontext, 1, "render graphics in addition to text") + .add("data-dir", ¶m.data_dir, PDF2HTMLEX_DATA_PATH, "specify data directory") .add("css-draw", ¶m.css_draw, 0, "[experimental and unsupported] CSS drawing") + .add("debug", ¶m.debug, 0, "print debugging information") + + // meta + .add("version,v", "print copyright and version info", &show_version_and_exit) + .add("help,h", "print usage information", &show_usage_and_exit) + .add("", ¶m.input_filename, "", "") .add("", ¶m.output_filename, "", "") ; From 4cddb4dbb3ed6403e0fd6febec0658e05d8c1344 Mon Sep 17 00:00:00 2001 From: John Hewson Date: Mon, 28 Jan 2013 23:11:29 +0000 Subject: [PATCH 07/10] re-ordered Param.h to match new usage order --- src/Param.h | 78 ++++++++++++++++++++++++----------------------------- 1 file changed, 35 insertions(+), 43 deletions(-) diff --git a/src/Param.h b/src/Param.h index 2a49ddd..f883a67 100644 --- a/src/Param.h +++ b/src/Param.h @@ -15,64 +15,56 @@ namespace pdf2htmlEX { struct Param { - // PDF stuff - std::string owner_password, user_password; - std::string input_filename, output_filename; - int no_drm; - - // path - std::string dest_dir, tmp_dir, data_dir; - - // normal parameters + // pages int first_page, last_page; - + + // dimensions double zoom; double fit_width, fit_height; - double h_dpi, v_dpi; int use_cropbox; - - int process_nontext; + double h_dpi, v_dpi; + + // output files int single_html; int split_pages; + std::string dest_dir; + std::string css_filename; + std::string outline_filename; + + // embedded fonts int embed_base_font; int embed_external_font; + std::string font_suffix, font_format; int decompose_ligature; - - // Advanced tweak - /* - * Position & Size - */ + int remove_unused_glyph; + int auto_hint; + std::string external_hint_tool; + int stretch_narrow_glyph; + int squeeze_wide_glyph; + + // text double h_eps, v_eps; double space_threshold; double font_size_multiplier; - - /* - * Font - */ - int auto_hint; - int tounicode; int space_as_offset; - int stretch_narrow_glyph; - int squeeze_wide_glyph; - int remove_unused_glyph; - - std::string font_suffix, font_format; - std::string external_hint_tool; - - /* - * Output - */ - std::string css_filename; - std::string outline_filename; - - /* - * Debug - */ - int debug; + int tounicode; + + // encryption + std::string owner_password, user_password; + int no_drm; + + // misc. int clean_tmp; - - // experimental + int process_nontext; + std::string data_dir; int css_draw; + int debug; + + // non-optional + std::string input_filename, output_filename; + + // not a paramater + std::string tmp_dir; }; } // namespace pdf2htmlEX From 6c16aedb634a8358371143db94e1f8a2c30a2bf2 Mon Sep 17 00:00:00 2001 From: John Hewson Date: Tue, 29 Jan 2013 00:04:32 +0000 Subject: [PATCH 08/10] re-ordered manpage to match new usage order and grouping --- pdf2htmlEX.1.in | 182 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 128 insertions(+), 54 deletions(-) diff --git a/pdf2htmlEX.1.in b/pdf2htmlEX.1.in index baff339..38af18e 100644 --- a/pdf2htmlEX.1.in +++ b/pdf2htmlEX.1.in @@ -22,49 +22,42 @@ Fonts are extracted form PDF and then embedded into HTML (Type 3 fonts are not s Other objects are rendered as images and also embedded. .SH OPTIONS + + .TP -.B --help -Show all options -.TP -.B -v, --version -Show copyright and version -.TP -.B -o, --owner-password -Specify owner password -.TP -.B -u, --user-password -Specify user password -.TP -.B --no-drm <0|1> (Default: 0) -Override document DRM settings -.TP -.B --dest-dir (Default: .) -Specify destination folder -.TP -.B --data-dir (Default: @CMAKE_INSTALL_PREFIX@/share/pdf2htmlEX) -Specify the folder holding the manifest and other files +.B Pages + .TP .B -f, --first-page (Default: 1) Specify the first page to process + .TP .B -l, --last-page (Default: last page) Specify the last page to process + + .TP +.B Dimensions + .B --zoom , --fit-width , --fit-height --zoom specifies the zoom factor directly; --fit-width/height specifies the maximum width/height of a page, the values are in pixels. If multiple values are specified, the minimum one will be used. If none is specified, pages will be rendered as 72DPI. -.TP -.B --hdpi , --vdpi (Default: 144) -Specify the horizontal and vertical DPI for images + .TP .B --use-cropbox <0|1> (Default: 0) Use CropBox instead of MediaBox for output. + .TP -.B --process-nontext <0|1> (Default: 1) -Whether to process non-text objects (as images) +.B --hdpi , --vdpi (Default: 144) +Specify the horizontal and vertical DPI for images + + +.TP +.B Output Files + .TP .B --single-html <0|1> (Default: 1) Whether to embed everything into one HTML file. @@ -74,6 +67,7 @@ If switched off, there will be several files generated along with the HTML file Note that the outline will always be embedded into the main HTML file no matter if this switch is on or not. And only when this switch is off will there be a separate .outline file contains the outline. You need to modify the manifest if you do not want outline embedded. + .TP .B --split-pages <0|1> (Default: 0) If turned on, pages will be stored into separated files named as 0.page, 1.page, ... @@ -81,6 +75,27 @@ If turned on, pages will be stored into separated files named as .html generated. This switch is useful if you want pages to be loaded separately & dynamically -- in which case you need to compose the page yourself, and a supporting backend might be necessary. + +.TP +.B --dest-dir (Default: .) +Specify destination folder + +.TP +.B --css-filename (Default: ) +Specify the filename of the generated css file, if not embedded. + +If it's empty, the file name will be determined automatically. + +.TP +.B --outline-filename (Default: ) +Specify the filename of the generated outline file, if not embedded. + +If it's empty, the file name will be determined automatically. + + +.TP +.B Embedded Fonts + .TP .B --embed-base-font <0|1> (Default: 1) Whether to embed base 14 fonts. @@ -88,20 +103,57 @@ Whether to embed base 14 fonts. There are several base font defined in PDF standards, which are supposed to be provided by the PDF reader. If this switch is on, local matched font will be used and embedded; otherwise only font names are exported such that web browsers may try to find proper fonts themselves. + .TP .B --embed-external-font <0|1> (Default: 0) Similar as above but for non-base fonts. + +.TP +.B --font-suffix (Default: .ttf), --font-format (Default: truetype) +Specify the suffix and format of fonts extracted from the PDF file. They should be consistent. + .TP .B --decompose-ligature <0|1> (Default: 0) Decompose ligatures. For example 'fi' -> 'f''i'. + +.TP +.B --remove-unused-glyph <0|1> (Default: 1) +If set to 1, remove unused glyphs in embedded fonts in order to reduce the file size. + +.TP +.B --auto-hint <0|1> (Default: 0) +If set to 1, hints will be generated for the fonts using fontforge. + +This may be preceded by --external-hint-tool. + +.TP +.B --external-hint-tool (Default: ) +If specified, the tool will be called in order to enhanced hinting for fonts, this will precede --auto-hint. + +The tool will be called as ' ', where suffix will be the same as specified for --font-suffix. + +.TP +.B --stretch-narrow-glyph <0|1> (Default: 0) +If set to 1, glyphs narrower than described in PDF will be stretched; otherwise space will be padded to the right of the glyphs + +.TP +.B --squeeze-wide-glyph <0|1> (Default: 1) +If set to 1, glyphs wider than described in PDF will be squeezed; otherwise it will be truncated. + + +.TP +.B Text + .TP .B --heps , --veps (Default: 1) Specify the maximum tolerable horizontal/vertical offset (in pixels). pdf2htmlEX would try to optimize the generated HTML file moving Text within this distance. + .TP .B --space-threshold (Default: 1.0/6) pdf2htmlEX would insert a whitespace character ' ' if the distance between two consecutive letters in the same line is wider than ratio * font_size. + .TP .B --font-size-multiplier (Default: 4.0) Many web browsers limit the minimum font size, and many would round the given font size, which results in incorrect rendering. @@ -109,11 +161,13 @@ Many web browsers limit the minimum font size, and many would round the given fo Specify a ratio greater than 1 would resolve this issue, however it might freeze some browsers. For some versions of Firefox, however, there will be a problem when the font size is too large, in which case a smaller value should be specified here. -.TP -.B --auto-hint <0|1> (Default: 0) -If set to 1, hints will be generated for the fonts using fontforge. -This may be preceded by --external-hint-tool. +.TP +.B --space-as-offset <0|1> (Default: 0) +Treat space characters as offsets, which may increase the size of the output. + +Turn it on if space characters are not displayed correctly, or you want to remove positional spaces. + .TP .B --tounicode <-1|0|1> (Default: 0) A ToUnicode map may be provided for each font in PDF which indicates the 'meaning' of the characters. However often there is better "ToUnicode" info in Type 0/1 fonts, and sometimes the ToUnicode map provided is wrong. @@ -123,40 +177,60 @@ If this value is set to 1, the ToUnicode Map is always applied, if provided in P If set to -1, a customized map is used such that rendering will be correct in HTML (visually the same), but you may not get correct characters by select & copy & paste. If set to 0, pdf2htmlEX would try its best to balance the two methods above. -.TP -.B --space-as-offset <0|1> (Default: 0) -Treat space characters as offsets, which may increase the size of the output. -Turn it on if space characters are not displayed correctly, or you want to remove positional spaces. -.TP -.B --stretch-narrow-glyph <0|1> (Default: 0) -If set to 1, glyphs narrower than described in PDF will be stretched; otherwise space will be padded to the right of the glyphs -.TP -.B --squeeze-wide-glyph <0|1> (Default: 1) -If set to 1, glyphs wider than described in PDF will be squeezed; otherwise it will be truncated. -.TP -.B --remove-unused-glyph <0|1> (Default: 1) -If set to 1, remove unused glyphs in embedded fonts in order to reduce the file size. -.TP -.B --font-suffix (Default: .ttf), --font-format (Default: truetype) -Specify the suffix and format of fonts extracted from the PDF file. They should be consistent. -.TP -.B --external-hint-tool (Default: ) -If specified, the tool will be called in order to enhanced hinting for fonts, this will precede --auto-hint. -The tool will be called as ' ', where suffix will be the same as specified for --font-suffix. .TP -.B --css-filename (Default: ) -Specify the filename of the generated css file, if not embedded. +.B Encryption -If it's empty, the file name will be determined automatically. .TP -.B --debug <0|1> (Default: 0) -Show debug information. +.B -o, --owner-password +Specify owner password + +.TP +.B -u, --user-password +Specify user password + +.TP +.B --no-drm <0|1> (Default: 0) +Override document DRM settings + + +.TP +.B Misc. + .TP .B --clean-tmp <0|1> (Default: 1) If switched off, intermediate files won't be cleaned in the end. +.TP +.B --process-nontext <0|1> (Default: 1) +Whether to process non-text objects (as images) + +.TP +.B --data-dir (Default: @CMAKE_INSTALL_PREFIX@/share/pdf2htmlEX) +Specify the folder holding the manifest and other files + +.TP +.B --css-draw <0|1> (Default: 0) +Experimental and unsupported CSS drawing + +.TP +.B --debug <0|1> (Default: 0) +Print debug information. + + +.TP +.B Meta + +.TP +.B -v, --version +Print copyright and version info + +.TP +.B --help +Print usage information + + .SH EXAMPLE .TP .B pdf2htmlEX /path/to/file.pdf From e79c2884bbee8cbcd793c7992c7a1a65797b3971 Mon Sep 17 00:00:00 2001 From: John Hewson Date: Tue, 29 Jan 2013 10:38:39 +0000 Subject: [PATCH 09/10] usage: embedded fonts -> fonts --- pdf2htmlEX.1.in | 2 +- src/Param.h | 2 +- src/pdf2htmlEX.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pdf2htmlEX.1.in b/pdf2htmlEX.1.in index 38af18e..630ddb6 100644 --- a/pdf2htmlEX.1.in +++ b/pdf2htmlEX.1.in @@ -94,7 +94,7 @@ If it's empty, the file name will be determined automatically. .TP -.B Embedded Fonts +.B Fonts .TP .B --embed-base-font <0|1> (Default: 1) diff --git a/src/Param.h b/src/Param.h index f883a67..fdabc8c 100644 --- a/src/Param.h +++ b/src/Param.h @@ -31,7 +31,7 @@ struct Param std::string css_filename; std::string outline_filename; - // embedded fonts + // fonts int embed_base_font; int embed_external_font; std::string font_suffix, font_format; diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index a5be7af..ba3eaec 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -73,7 +73,7 @@ void parse_options (int argc, char **argv) .add("css-filename", ¶m.css_filename, "", "filename of the generated css file") .add("outline-filename", ¶m.outline_filename, "", "filename of the generated outline file") - // embedded fonts + // fonts .add("embed-base-font", ¶m.embed_base_font, 0, "embed local match for standard 14 fonts") .add("embed-external-font", ¶m.embed_external_font, 0, "embed local match for external fonts") .add("font-suffix", ¶m.font_suffix, ".ttf", "suffix for embedded font files (.ttf,.otf,.woff,.svg)") From efbd9c8c1effb3f7d71d6b79a548642c9be8f247 Mon Sep 17 00:00:00 2001 From: John Hewson Date: Tue, 29 Jan 2013 10:39:25 +0000 Subject: [PATCH 10/10] usage: default is -> default: --- src/util/ArgParser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/ArgParser.h b/src/util/ArgParser.h index d89dc35..2f68991 100644 --- a/src/util/ArgParser.h +++ b/src/util/ArgParser.h @@ -191,7 +191,7 @@ void ArgParser::ArgEntry::show_usage(std::ostream & out) const if(need_arg && !dont_show_default) { - out << " (default is "; + out << " (default: "; dump_value(out, default_value); out << ")"; }