diff --git a/build_for_ppa.py b/build_for_ppa.py old mode 100755 new mode 100644 diff --git a/debian/rules b/debian/rules old mode 100755 new mode 100644 diff --git a/pdf2htmlEX.1.in b/pdf2htmlEX.1.in index 96e8deb..e696683 100644 --- a/pdf2htmlEX.1.in +++ b/pdf2htmlEX.1.in @@ -35,6 +35,7 @@ Specify the last page to process .SS Dimensions +.TP .B --zoom , --fit-width , --fit-height --zoom specifies the zoom factor directly; --fit-width/height specifies the maximum width/height of a page, the values are in pixels. @@ -53,16 +54,26 @@ Specify the horizontal and vertical DPI for images .SS Output -.TP -.B --single-html <0|1> (Default: 1) -Whether to embed everything into one HTML file. +.B --embed +.br +.B --embed-css <0|1> (Default: 1) +.br +.B --embed-font <0|1> (Default: 1) +.br +.B --embed-image <0|1> (Default: 1) +.br +.B --embed-javascript <0|1> (Default: 1) +.br +.B --embed-outline <0|1> (Default: 1) +.RS +Specify which elements should be embedded into the output HTML file. -If switched off, there will be several files generated along with the HTML file including files for fonts, css, images. - -Note that the outline will always be embedded into the main HTML file no matter if this switch is on or not. -And only when this switch is off will there be a separate .outline file contains the outline. -You need to modify the manifest if you do not want outline embedded. +If switched off, separated files will be generated along with the HTML file for the corresponding elements. +--embed accepts a string as argument. Each letter of the string must be one of `cCfFiIjJoO`, which corresponds +to one of the --embed-*** switches. Lower case letters for 0 and upper case letters for 1. For example, +`--embed cFIJo` means to embed everything but CSS files and outlines. +.RE .TP .B --split-pages <0|1> (Default: 0) If turned on, the content of each page is stored in a separated file. diff --git a/src/ArgParser.cc b/src/ArgParser.cc index 2a7903e..d5b6667 100644 --- a/src/ArgParser.cc +++ b/src/ArgParser.cc @@ -41,9 +41,20 @@ void dump_value(std::ostream & out, const std::string & v) out << '"' << v << '"'; } -ArgParser & ArgParser::add(const char * optname, const char * description, ArgParserCallBack callback) +ArgParser & ArgParser::add(const char * optname, const char * description, ArgParserCallBack callback, bool need_arg) { - return add(optname, nullptr, 0, description, callback, true); + // ArgEntry does not accept nullptr as optname nor description + if((!optname) || (!optname[0])) + { + // when optname is nullptr or "", it's optional, and description is dropped + optional_arg_entries.emplace_back(new ArgEntry("", "", callback, need_arg)); + } + else + { + arg_entries.emplace_back(new ArgEntry(optname, (description ? description : ""), callback, need_arg)); + } + + return *this; } void ArgParser::parse(int argc, char ** argv) const diff --git a/src/ArgParser.h b/src/ArgParser.h index e560bc3..c0f8cde 100644 --- a/src/ArgParser.h +++ b/src/ArgParser.h @@ -42,79 +42,86 @@ extern void dump_value(std::ostream & out, const std::string & v); class ArgParser { - public: - typedef void (*ArgParserCallBack) (const char * arg); +public: + typedef void (*ArgParserCallBack) (const char * arg); - /* - * The 1st is for arg without arguments (i.e. flags), and the 2nd is for general args. - * optname: - * - if not nullptr, it should be the name of the arg, should be in the format of "[,]", e.g. "help,h" - * - if nullptr, it denotes an optional arg, and description will be ignored - * description: - * - if description is nullptr or "", the argument won't be shown in show_usage() - */ - ArgParser & add(const char * optname, const char * description, ArgParserCallBack callback = nullptr); - template - ArgParser & add(const char * optname, T * location, const Tv & default_value, const char * description, ArgParserCallBack callback = nullptr, bool dont_show_default = false); + /* + * The 1st is for arguments with callbacks(i.e. flags) + * The 2nd is for arguments linked to variables + * + * optname: + * - if not nullptr, it should be the name of the arg, should be in the format of "[,]", e.g. "help,h" + * - if nullptr, it denotes an optional arg, and description will be ignored + * description: + * - if description is nullptr or "", the argument won't be shown in show_usage() + * + * location: + * - if not nullptr, the argument for this arg is stored there + * - if nullptr, this arg does not need arguments + */ + ArgParser & add(const char * optname, const char * description, ArgParserCallBack callback, bool need_arg = false); + template + ArgParser & add(const char * optname, T * location, const Tv & default_value, const char * description, bool dont_show_default = false); - void parse(int argc, char ** argv) const; - void show_usage(std::ostream & out) const; + void parse(int argc, char ** argv) const; + void show_usage(std::ostream & out) const; + +private: + // type names helper + template + static const char * get_type_name(void) { return "unknown"; } + + struct ArgEntryBase + { + /* name or description cannot be nullptr */ + ArgEntryBase(const char * name, const char * description, bool need_arg); + virtual ~ArgEntryBase() { } + char shortname; + std::string name; + std::string description; + bool need_arg; + virtual void parse (const char * arg) const = 0; + virtual void show_usage (std::ostream & out) const = 0; + }; + + template + struct ArgEntry : public ArgEntryBase + { + ArgEntry(const char * name, + const char * description, + ArgParserCallBack callback, + bool need_arg); + + ArgEntry(const char * name, + T * location, const Tv & default_value, + const char * description, bool dont_show_default); + + virtual void parse (const char * arg) const; + virtual void show_usage (std::ostream & out) const; private: - // type names helper - template - static const char * get_type_name(void) { return "unknown"; } + T * location; + T default_value; + ArgParserCallBack callback; + bool dont_show_default; + }; - class ArgEntryBase - { - public: - /* name or description cannot be nullptr */ - ArgEntryBase(const char * name, const char * description, bool need_arg); - virtual ~ArgEntryBase() { } - char shortname; - std::string name; - std::string description; - bool need_arg; - virtual void parse (const char * arg) const = 0; - virtual void show_usage (std::ostream & out) const = 0; - }; - - template - class ArgEntry : public ArgEntryBase - { - public: - ArgEntry(const char * name, - T * location, const Tv & deafult_value, - ArgParserCallBack callback, - const char * description, bool dont_show_default); - - - virtual void parse (const char * arg) const; - virtual void show_usage (std::ostream & out) const; - - private: - T * location; - T default_value; - ArgParserCallBack callback; - bool dont_show_default; - }; - - std::vector> arg_entries, optional_arg_entries; - static const int arg_col_width; + std::vector> arg_entries, optional_arg_entries; + static const int arg_col_width; }; template -ArgParser & ArgParser::add(const char * optname, T * location, const Tv & default_value, const char * description, ArgParserCallBack callback, bool dont_show_default) +ArgParser & ArgParser::add(const char * optname, T * location, const Tv & default_value, const char * description, bool dont_show_default) { // ArgEntry does not accept nullptr as optname nor description if((!optname) || (!optname[0])) { // when optname is nullptr or "", it's optional, and description is dropped - optional_arg_entries.emplace_back(new ArgEntry("", location, default_value, callback, "", dont_show_default)); + optional_arg_entries.emplace_back(new ArgEntry("", location, default_value, "", dont_show_default)); } else { - arg_entries.emplace_back(new ArgEntry(optname, location, default_value, callback, (description ? description : ""), dont_show_default)); + arg_entries.emplace_back(new ArgEntry(optname, location, default_value, (description ? description : ""), dont_show_default)); } return *this; @@ -126,12 +133,22 @@ template<> const char * ArgParser::get_type_name (void); template<> const char * ArgParser::get_type_name (void); template -ArgParser::ArgEntry::ArgEntry(const char * name, T * location, const Tv & default_value, ArgParserCallBack callback, const char * description, bool dont_show_default) +ArgParser::ArgEntry::ArgEntry(const char * name, const char * description, ArgParserCallBack callback, bool need_arg) + : ArgEntryBase(name, description, need_arg) + , location(nullptr) + , default_value() + , callback(callback) + , dont_show_default(true) +{ +} + +template +ArgParser::ArgEntry::ArgEntry(const char * name, T * location, const Tv & default_value, const char * description, bool dont_show_default) : ArgEntryBase(name, description, (location != nullptr)) - , location(location) - , default_value(default_value) - , callback(callback) - , dont_show_default(dont_show_default) + , location(location) + , default_value(default_value) + , callback(nullptr) + , dont_show_default(dont_show_default) { if(need_arg) *location = T(default_value); @@ -145,7 +162,7 @@ void ArgParser::ArgEntry::parse(const char * arg) const if(!arg) throw std::string("Missing argument of option: --") + name; - if(!read_value(arg, location)) + if((location != nullptr) && (!read_value(arg, location))) throw std::string("Invalid argument: ") + arg; } diff --git a/src/HTMLRenderer/font.cc b/src/HTMLRenderer/font.cc index 7665748..161bca0 100644 --- a/src/HTMLRenderer/font.cc +++ b/src/HTMLRenderer/font.cc @@ -562,10 +562,10 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo * We need to reload in order to retrieve/fix accurate ascent/descent, some info won't be written to the font by fontforge until saved. */ string fn = (char*)str_fmt("%s/f%llx%s", - (param.single_html ? param.tmp_dir : param.dest_dir).c_str(), + (param.embed_font ? param.tmp_dir : param.dest_dir).c_str(), info.id, param.font_suffix.c_str()); - if(param.single_html) + if(param.embed_font) tmp_files.add(fn); ffw_load_font(cur_tmp_fn.c_str()); @@ -763,7 +763,7 @@ void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suff { auto fn = str_fmt("f%llx%s", info.id, suffix.c_str()); - if(param.single_html) + if(param.embed_font) { auto path = param.tmp_dir + "/" + (char*)fn; ifstream fin(path, ifstream::binary); diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index d78bfaa..d94e732 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -122,8 +122,8 @@ void HTMLRenderer::process(PDFDoc *doc) if(param.process_nontext) { - auto fn = str_fmt("%s/bg%x.png", (param.single_html ? param.tmp_dir : param.dest_dir).c_str(), i); - if(param.single_html) + auto fn = str_fmt("%s/bg%x.png", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), i); + if(param.embed_image) tmp_files.add((char*)fn); bg_renderer->render_page(doc, i, (char*)fn); @@ -196,7 +196,7 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref) { f_pages.fs << "\"\"second; - if(param.single_html) + if(param.*(entry.embed_flag)) { ifstream fin(path, ifstream::binary); if(!fin) throw string("Cannot open file ") + path + " for embedding"; - out << iter->second.first << endl + out << entry.prefix_embed << endl << fin.rdbuf(); out.clear(); // out will set fail big if fin is empty - out << iter->second.second << endl; + out << entry.suffix_embed << endl; } else { - out << iter->second.first; + out << entry.prefix_external; outputURL(out, fn); - out << iter->second.second << endl; + out << entry.suffix_external << endl; if(copy) { diff --git a/src/Param.h b/src/Param.h index c50db14..1b3f1d3 100644 --- a/src/Param.h +++ b/src/Param.h @@ -25,7 +25,11 @@ struct Param double h_dpi, v_dpi; // output - int single_html; + int embed_css; + int embed_font; + int embed_image; + int embed_javascript; + int embed_outline; int split_pages; std::string dest_dir; std::string css_filename; diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index c2875cc..f9f9ff6 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -36,9 +36,9 @@ using namespace pdf2htmlEX; Param param; ArgParser argparser; -void deprecated_embed_base_font(const char * dummy = nullptr) +void deprecated_single_html(const char * dummy = nullptr) { - cerr << "--embed-base-font is deprecated. Use --embed-external-font instead." << endl; + cerr << "--single_html is deprecated. Use `--embed CFIJO` instead." << endl; exit(EXIT_FAILURE); } @@ -60,6 +60,31 @@ void show_version_and_exit(const char * dummy = nullptr) exit(EXIT_SUCCESS); } +void embed_parser (const char * str) +{ + while(true) + { + switch(*str) + { + case '\0': return; break; + case 'c': param.embed_css = 0; break; + case 'C': param.embed_css = 1; break; + case 'f': param.embed_font = 0; break; + case 'F': param.embed_font = 1; break; + case 'i': param.embed_image = 0; break; + case 'I': param.embed_image = 1; break; + case 'j': param.embed_javascript = 0; break; + case 'J': param.embed_javascript = 1; break; + case 'o': param.embed_outline = 0; break; + case 'O': param.embed_outline = 1; break; + default: + cerr << "Unknown character `" << (*str) << "` for --embed" << endl; + break; + } + ++ str; + } +} + void parse_options (int argc, char **argv) { argparser @@ -68,15 +93,20 @@ void parse_options (int argc, char **argv) .add("last-page,l", ¶m.last_page, numeric_limits::max(), "last page to convert") // dimensions - .add("zoom", ¶m.zoom, 0, "zoom ratio", nullptr, true) - .add("fit-width", ¶m.fit_width, 0, "fit width to pixels", nullptr, true) - .add("fit-height", ¶m.fit_height, 0, "fit height to pixels", nullptr, true) + .add("zoom", ¶m.zoom, 0, "zoom ratio", true) + .add("fit-width", ¶m.fit_width, 0, "fit width to pixels", true) + .add("fit-height", ¶m.fit_height, 0, "fit height to pixels", true) .add("use-cropbox", ¶m.use_cropbox, 1, "use CropBox instead of MediaBox") .add("hdpi", ¶m.h_dpi, 144.0, "horizontal resolution for graphics in DPI") .add("vdpi", ¶m.v_dpi, 144.0, "vertical resolution for graphics in DPI") // output files - .add("single-html", ¶m.single_html, 1, "generate a single HTML file") + .add("embed", "specify which elements should be embedded into output", embed_parser, true) + .add("embed-css", ¶m.embed_css, 1, "embed CSS files into output") + .add("embed-font", ¶m.embed_font, 1, "embed font files into output") + .add("embed-image", ¶m.embed_image, 1, "embed image files into output") + .add("embed-javascript", ¶m.embed_javascript, 1, "embed JavaScript files into output") + .add("embed-outline", ¶m.embed_outline, 1, "embed outlines into output") .add("split-pages", ¶m.split_pages, 0, "split pages into separate files") .add("dest-dir", ¶m.dest_dir, ".", "specify destination directory") .add("css-filename", ¶m.css_filename, "", "filename of the generated css file") @@ -107,8 +137,8 @@ void parse_options (int argc, char **argv) .add("optimize-text", ¶m.optimize_text, 0, "try to reduce the number of HTML elements used for text") // encryption - .add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", nullptr, true) - .add("user-password,u", ¶m.user_password, "", "user password (for encrypted files)", nullptr, true) + .add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", true) + .add("user-password,u", ¶m.user_password, "", "user password (for encrypted files)", true) .add("no-drm", ¶m.no_drm, 0, "override document DRM settings") // misc. @@ -123,7 +153,7 @@ void parse_options (int argc, char **argv) .add("help,h", "print usage information", &show_usage_and_exit) // deprecated - .add("embed-base-font", "", &deprecated_embed_base_font) + .add("single-html", "", &deprecated_single_html) .add("", ¶m.input_filename, "", "") .add("", ¶m.output_filename, "", "") diff --git a/src/util/const.cc b/src/util/const.cc index d914a1b..1303495 100644 --- a/src/util/const.cc +++ b/src/util/const.cc @@ -22,10 +22,16 @@ const map GB_ENCODED_FONT_NAME_MAP({ {"\xC1\xA5\xCA\xE9", "SimLi"}, }); -const std::map, std::pair > EMBED_STRING_MAP({ - {{".css", 0}, {""}}, - {{".css", 1}, {""}}, - {{".js", 0}, {""}}, - {{".js", 1}, {""}} +const std::map EMBED_STRING_MAP({ + {".css", {&Param::embed_css, + "", + "" }}, + {".js", {&Param::embed_javascript, + "", + "" }} }); } //namespace pdf2htmlEX diff --git a/src/util/const.h b/src/util/const.h index eee8fcd..e6cabdc 100644 --- a/src/util/const.h +++ b/src/util/const.h @@ -11,6 +11,8 @@ #include #include +#include "Param.h" + namespace pdf2htmlEX { #ifndef nullptr @@ -24,9 +26,17 @@ extern const double ID_MATRIX[6]; // For GB encoded font names extern const std::map GB_ENCODED_FONT_NAME_MAP; // map to embed files into html -// key: (suffix, if_embed_content) -// value: (prefix string, suffix string) -extern const std::map, std::pair > EMBED_STRING_MAP; +struct EmbedStringEntry +{ + int Param::*embed_flag; + // used when *embed_flag == true + std::string prefix_embed; + std::string suffix_embed; + // used when *embed_flag == false + std::string prefix_external; + std::string suffix_external; +}; +extern const std::map EMBED_STRING_MAP; } // namespace pdf2htmlEX diff --git a/test/test.py b/test/test.py old mode 100755 new mode 100644