From 2cc7da786ad02e1ad010d922e25e4522acdc4c6c Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Mon, 10 Sep 2012 17:01:15 +0800 Subject: [PATCH] working on getopt --- CMakeLists.txt | 2 + src/ArgParser.cc | 132 ++++++++++++++++++++ src/include/ArgParser.h | 139 +++++++++++++++++++++ src/include/pdf2htmlEX-config.h | 19 +++ src/pdf2htmlEX.cc | 215 +++++++++++++++----------------- 5 files changed, 394 insertions(+), 113 deletions(-) create mode 100644 src/ArgParser.cc create mode 100644 src/include/ArgParser.h create mode 100644 src/include/pdf2htmlEX-config.h diff --git a/CMakeLists.txt b/CMakeLists.txt index dd6d288..edb04eb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -77,6 +77,8 @@ add_executable(pdf2htmlEX src/FontPreprocessor.cc src/include/util.h src/util.cc + src/include/ArgParser.h + src/ArgParser.cc src/include/pdf2htmlEX-config.h ) diff --git a/src/ArgParser.cc b/src/ArgParser.cc new file mode 100644 index 0000000..673a4a3 --- /dev/null +++ b/src/ArgParser.cc @@ -0,0 +1,132 @@ +/* + * A wrapper of getopt + * + * by WangLu + * 2012.09.10 + */ + +#include +#include +#include +#include + +#include "ArgParser.h" + +using std::ostream; +using std::cerr; +using std::endl; +using std::string; +using std::vector; +using std::unordered_map; +using std::make_pair; +using std::ostringstream; + +ArgParser::~ArgParser(void) +{ + for(auto iter = arg_entries.begin(); iter != arg_entries.end(); ++iter) + delete (*iter); +} + +ArgParser & ArgParser::add(const char * optname, const char * description, ArgParserCallBack callback) +{ + return add(optname, nullptr, 0, description, callback); +} + +void ArgParser::parse(int argc, char ** argv) const +{ + //prepare optstring and longopts + vector optstring; + optstring.reserve(arg_entries.size() + 1); + vector longopts; + longopts.reserve(arg_entries.size() + 1); + + unordered_map opt_map; + + for(auto iter = arg_entries.begin(); iter != arg_entries.end(); ++iter) + { + const ArgEntryBase * p = *iter; + if(p->shortname != 0) + { + optstring.push_back(p->shortname); + if(p->need_arg) + optstring.push_back(':'); + + int v = p->shortname; + if(!(opt_map.insert(make_pair(v, p)).second)) + { + cerr << "Warning: duplicated shortname '" << v << "' used by -" << (char)(p->shortname) << " and -" << (char)(opt_map[p->shortname]->shortname) << endl; + } + } + + if(p->name != "") + { + int v = (256 + (iter - arg_entries.begin())); + longopts.push_back({p->name.c_str(), ((p->need_arg) ? required_argument : no_argument), nullptr, v}); + if(!(opt_map.insert(make_pair(v, p)).second)) + { + cerr << "Warning: duplicated shortname '" << v << "' used by --" << (p->name) << " and --" << (opt_map[p->shortname]->name) << endl; + } + } + } + + optstring.push_back(0); + longopts.push_back({0,0,0,0}); + + { + int r; + int idx; + opterr = 0; + while(true) + { + r = getopt_long(argc, argv, &optstring.front(), &longopts.front(), &idx); + if(r == -1) + return; + if(r == ':') + { + ostringstream sout; + sout << "Missing argument for option "; + if(r < 256) + sout << "-" << (char)(opt_map[optopt]->shortname); + else + sout << "--" << opt_map[optopt]->name; + sout << endl; + throw sout.str(); + } + cerr << r << ' ' << idx << ' ' << (optarg ? optarg : "") << endl; + } + } +} + +void ArgParser::show_usage(ostream & out) const +{ + for(auto iter = arg_entries.begin(); iter != arg_entries.end(); ++iter) + { + (*iter)->show_usage(out); + } +} + +ArgParser::ArgEntryBase::ArgEntryBase(const char * name, const char * description, bool need_arg) + : shortname(0), name(name), description(description), need_arg(need_arg) +{ + size_t idx = this->name.rfind(','); + if(idx != string::npos) + { + if(idx+2 == this->name.size()) + { + shortname = this->name[this->name.size()-1]; + this->name = this->name.substr(0, idx); + } + else + { + cerr << "Warning: argument '" << this->name << "' may not be parsed correctly" << endl; + } + } +} + + +void dump_default_value(std::ostream & out, const std::string & v) +{ + out << '"' << v << '"'; +} + +const int ArgParser::arg_col_width = 40; diff --git a/src/include/ArgParser.h b/src/include/ArgParser.h new file mode 100644 index 0000000..b4fcd6e --- /dev/null +++ b/src/include/ArgParser.h @@ -0,0 +1,139 @@ +/* + * A wrapper of getopt + * + * by WangLu + * 2012.09.10 + */ + + +#ifndef ARGPARSER_H__ +#define ARGPARSER_H__ + +#include +#include +#include +#include + +class ArgParser +{ +public: + ~ArgParser(void); + + typedef void (*ArgParserCallBack) (void); + + /* + * optname: name of the argment, should be provided as --optname + * description: if description is "", the argument won't be shown in show_usage() + */ + + ArgParser & add(const char * optname, const char * description, ArgParserCallBack callback = nullptr); + + template + ArgParser & add(const char * optname, T * location, const Tv & default_value, const char * description, ArgParserCallBack callback = nullptr); + + void parse(int argc, char ** argv) const; + void show_usage(std::ostream & out) const; + +private: + class ArgEntryBase + { + public: + ArgEntryBase(const char * name, const char * description, bool need_arg); + virtual ~ArgEntryBase() { } + char shortname; + std::string name; + std::string description; + bool need_arg; + virtual void parse (void) const = 0; + virtual void show_usage (std::ostream & out) const = 0; + }; + + template + class ArgEntry : public ArgEntryBase + { + public: + ArgEntry(const char * name, T * location, const Tv & deafult_value, ArgParserCallBack callback, const char * description); + + virtual void parse (void) const; + virtual void show_usage (std::ostream & out) const; + + private: + T * location; + T default_value; + ArgParserCallBack callback; + }; + + std::vector arg_entries; + static const int arg_col_width; +}; + +template +ArgParser & ArgParser::add(const char * optname, T * location, const Tv & default_value, const char * description, ArgParserCallBack callback) +{ + arg_entries.push_back(new ArgEntry(optname, location, default_value, callback, description)); + + return *this; +} + +template +ArgParser::ArgEntry::ArgEntry(const char * name, T * location, const Tv & default_value, ArgParserCallBack callback, const char * description) + : ArgEntryBase(name, description, (location != nullptr)) + , location(location) + , default_value(default_value) + , callback(callback) +{ + if(need_arg) + *location = T(default_value); +} + +template +void ArgParser::ArgEntry::parse(void) const +{ } + +// helper +template +void dump_default_value(std::ostream & out, const T & v) +{ + out << v; +} + +extern void dump_default_value(std::ostream & out, const std::string & v); + +template +void ArgParser::ArgEntry::show_usage(std::ostream & out) const +{ + if(description == "") + return; + + std::ostringstream sout; + sout << " "; + + if(shortname != 0) + { + sout << "-" << shortname; + } + + if(name != "") + { + if(shortname != 0) + sout << ","; + sout << "--" << name; + } + + if(need_arg) + { + sout << " (="; + dump_default_value(sout, default_value); + sout << ")"; + } + + std::string s = sout.str(); + out << s; + + for(int i = s.size(); i < arg_col_width; ++i) + out << ' '; + + out << " " << description << std::endl; +} + +#endif //ARGPARSER_H__ diff --git a/src/include/pdf2htmlEX-config.h b/src/include/pdf2htmlEX-config.h new file mode 100644 index 0000000..5503307 --- /dev/null +++ b/src/include/pdf2htmlEX-config.h @@ -0,0 +1,19 @@ +/* + * config.h + * Compile time constants + * + * by WangLu + */ + + +#ifndef PDF2HTMLEX_CONFIG_H__ +#define PDF2HTMLEX_CONFIG_H__ + +#include + +static const std::string PDF2HTMLEX_VERSION = "0.3"; +static const std::string PDF2HTMLEX_PREFIX = "/usr/local"; +static const std::string PDF2HTMLEX_DATA_PATH = "/usr/local""/share/pdf2htmlEX"; + + +#endif //PDF2HTMLEX_CONFIG_H__ diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 8b24e71..e07a3d0 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -24,113 +24,96 @@ #include "HTMLRenderer.h" #include "Param.h" #include "pdf2htmlEX-config.h" +#include "ArgParser.h" -namespace po = boost::program_options; using namespace std; Param param; +ArgParser argparser; -// variables -PDFDoc *doc = nullptr; -GooString *fileName = nullptr; -GooString *ownerPW, *userPW; - -HTMLRenderer *htmlOut = nullptr; - -bool finished = false; - -po::options_description opt_visible("Options"), opt_hidden, opt_all; -po::positional_options_description opt_positional; - -void show_usage(void) +void show_usage_and_exit(void) { cerr << "pdftohtmlEX version " << PDF2HTMLEX_VERSION << endl; cerr << endl; cerr << "Copyright 2012 Lu Wang (coolwanglugmail.com)" << endl; cerr << endl; - cerr << "Usage: pdf2htmlEX [Options] " << endl; + cerr << "Usage: pdf2htmlEX [Options] []" << endl; cerr << endl; - cerr << opt_visible << endl; + cerr << "Options:" << endl; + argparser.show_usage(cerr); + cerr << endl; + exit(EXIT_FAILURE); } -po::variables_map parse_options (int argc, char **argv) +void parse_options (int argc, char **argv) { - opt_visible.add_options() - ("help", "show all options") - ("version,v", "show copyright and version info") + argparser + .add("help,h", "show all options", &show_usage_and_exit) + .add("version,v", "show copyright and version info", &show_usage_and_exit) - ("owner-password,o", po::value(¶m.owner_password)->default_value(""), "owner password (for encrypted files)") - ("user-password,u", po::value(¶m.user_password)->default_value(""), "user password (for encrypted files)") + .add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)") + .add("user-password,u", ¶m.user_password, "", "user password (for encrypted files)") - ("dest-dir", po::value(¶m.dest_dir)->default_value("."), "destination directory") + .add("dest-dir", ¶m.dest_dir, ".", "destination directory") - ("first-page,f", po::value(¶m.first_page)->default_value(1), "first page to process") - ("last-page,l", po::value(¶m.last_page)->default_value(numeric_limits::max()), "last page to process") + .add("first-page,f", ¶m.first_page, 1, "first page to process") + .add("last-page,l", ¶m.last_page, numeric_limits::max(), "last page to process") - ("zoom", po::value(¶m.zoom)->default_value(1.0), "zoom ratio") - ("hdpi", po::value(¶m.h_dpi)->default_value(144.0), "horizontal DPI for non-text") - ("vdpi", po::value(¶m.v_dpi)->default_value(144.0), "vertical DPI for non-text") + .add("zoom", ¶m.zoom, 1.0, "zoom ratio") + .add("hdpi", ¶m.h_dpi, 144.0, "horizontal DPI for non-text") + .add("vdpi", ¶m.v_dpi, 144.0, "vertical DPI for non-text") - ("process-nontext", po::value(¶m.process_nontext)->default_value(1), "process nontext objects") - ("single-html", po::value(¶m.single_html)->default_value(1), "combine everything into one single HTML file") - ("embed-base-font", po::value(¶m.embed_base_font)->default_value(0), "embed local matched font for base 14 fonts in the PDF file") - ("embed-external-font", po::value(¶m.embed_external_font)->default_value(0), "embed local matched font for external fonts in the PDF file") - ("decompose-ligature", po::value(¶m.decompose_ligature)->default_value(0), "decompose ligatures, for example 'fi' -> 'f''i'") + .add("process-nontext", ¶m.process_nontext, 1, "process nontext objects") + .add("single-html", ¶m.single_html, 1, "combine everything into one single HTML file") + .add("embed-base-font", ¶m.embed_base_font, 0, "embed local matched font for base 14 fonts in the PDF file") + .add("embed-external-font", ¶m.embed_external_font, 0, "embed local matched font for external fonts in the PDF file") + .add("decompose-ligature", ¶m.decompose_ligature, 0, "decompose ligatures, for example 'fi' -> 'f''i'") - ("heps", po::value(¶m.h_eps)->default_value(1.0), "max tolerated horizontal offset (in pixels)") - ("veps", po::value(¶m.v_eps)->default_value(1.0), "max tolerated vertical offset (in pixels)") - ("space-threshold", po::value(¶m.space_threshold)->default_value(1.0/8), "distance no thiner than (threshold * em) will be considered as a space character") - ("font-size-multiplier", po::value(¶m.font_size_multiplier)->default_value(10.0), "setting a value greater than 1 would increase the rendering accuracy") - ("tounicode", po::value(¶m.tounicode)->default_value(0), "Specify how to deal with ToUnicode map, 0 for auto, 1 for forced, -1 for disabled") - ("space-as-offset", po::value(¶m.space_as_offset)->default_value(0), "treat space characters as offsets") + .add("heps", ¶m.h_eps, 1.0, "max tolerated horizontal offset (in pixels)") + .add("veps", ¶m.v_eps, 1.0, "max tolerated vertical offset (in pixels)") + .add("space-threshold", ¶m.space_threshold, (1.0/8), "distance no thiner than (threshold * em) will be considered as a space character") + .add("font-size-multiplier", ¶m.font_size_multiplier, 10.0, "setting a value greater than 1 would increase the rendering accuracy") + .add("tounicode", ¶m.tounicode, 0, "Specify how to deal with ToUnicode map, 0 for auto, 1 for forced, -1 for disabled") + .add("space-as-offset", ¶m.space_as_offset, 0, "treat space characters as offsets") - ("font-suffix", po::value(¶m.font_suffix)->default_value(".ttf"), "suffix for extracted font files") - ("font-format", po::value(¶m.font_format)->default_value("truetype"), "format for extracted font files") + .add("font-suffix", ¶m.font_suffix, ".ttf", "suffix for extracted font files") + .add("font-format", ¶m.font_format, "truetype", "format for extracted font files") - ("debug", po::value(¶m.debug)->default_value(0), "output debug information") - ("clean-tmp", po::value(¶m.clean_tmp)->default_value(1), "clean temporary files after processing") + .add("debug", ¶m.debug, 0, "output debug information") + .add("clean-tmp", ¶m.clean_tmp, 1, "clean temporary files after processing") ; - opt_hidden.add_options() - ("inputfilename", po::value(¶m.input_filename)->default_value(""), "") - ("outputfilename", po::value(¶m.output_filename)->default_value(""), "") - ; - - opt_positional.add("inputfilename", 1).add("outputfilename",1); - - opt_all.add(opt_visible).add(opt_hidden); - - try { - po::variables_map opt_vm; - po::store(po::command_line_parser(argc, argv).options(opt_all).positional(opt_positional).run() - , opt_vm); - po::notify(opt_vm); - return opt_vm; + try + { + argparser.parse(argc, argv); } - catch(...) { - show_usage(); - abort(); + catch(const std::string & s) + { + cerr << "Error when parsing the arguments:" << endl; + cerr << s << endl; + exit(EXIT_FAILURE); } } int main(int argc, char **argv) { - auto opt_map = parse_options(argc, argv); - if (opt_map.count("version") || opt_map.count("help") || (param.input_filename == "")) + parse_options(argc, argv); + if (param.input_filename == "") { - show_usage(); - abort(); + show_usage_and_exit(); } //prepare the directories - char buf[] = "/tmp/pdf2htmlEX-XXXXXX"; - auto p = mkdtemp(buf); - if(p == nullptr) { - cerr << "Cannot create temp directory" << endl; - abort(); + char buf[] = "/tmp/pdf2htmlEX-XXXXXX"; + auto p = mkdtemp(buf); + if(p == nullptr) + { + cerr << "Cannot create temp directory" << endl; + exit(EXIT_FAILURE); + } + param.tmp_dir = buf; } - param.tmp_dir = buf; if(param.debug) cerr << "temporary dir: " << (param.tmp_dir) << endl; @@ -142,67 +125,73 @@ int main(int argc, char **argv) catch (const string & s) { cerr << s << endl; - abort(); + exit(EXIT_FAILURE); } + bool finished = false; // read config file globalParams = new GlobalParams(); - // open PDF file - ownerPW = (param.owner_password == "") ? (nullptr) : (new GooString(param.owner_password.c_str())); - userPW = (param.user_password == "") ? (nullptr) : (new GooString(param.user_password.c_str())); - fileName = new GooString(param.input_filename.c_str()); - - doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW); - - delete userPW; - delete ownerPW; - - if (!doc->isOk()) { - goto error; - } - - // check for copy permission - if (!doc->okToCopy()) { - error(errNotAllowed, -1, "Copying of text from this document is not allowed."); - goto error; - } - - param.first_page = min(max(param.first_page, 1), doc->getNumPages()); - param.last_page = min(max(param.last_page, param.first_page), doc->getNumPages()); - - if(param.output_filename == "") + PDFDoc *doc = nullptr; + try { - const string s = get_filename(param.input_filename); + { + GooString * ownerPW = (param.owner_password == "") ? (nullptr) : (new GooString(param.owner_password.c_str())); + GooString * userPW = (param.user_password == "") ? (nullptr) : (new GooString(param.user_password.c_str())); + GooString fileName(param.input_filename.c_str()); - if(get_suffix(param.input_filename) == ".pdf") - { - param.output_filename = s.substr(0, s.size() - 4) + ".html"; + doc = PDFDocFactory().createPDFDoc(fileName, ownerPW, userPW); + + delete userPW; + delete ownerPW; } - else - { - param.output_filename = s + ".html"; + + if (!doc->isOk()) { + throw "Cannot read the file"; } + + // check for copy permission + if (!doc->okToCopy()) { + throw "Copying of text from this document is not allowed."; + } + + param.first_page = min(max(param.first_page, 1), doc->getNumPages()); + param.last_page = min(max(param.last_page, param.first_page), doc->getNumPages()); + + if(param.output_filename == "") + { + const string s = get_filename(param.input_filename); + + if(get_suffix(param.input_filename) == ".pdf") + { + param.output_filename = s.substr(0, s.size() - 4) + ".html"; + } + else + { + param.output_filename = s + ".html"; + } + } + + HTMLRenderer * htmlOut = new HTMLRenderer(¶m); + htmlOut->process(doc); + delete htmlOut; + + finished = true; + } + catch (const string & s) + { + cerr << "Error: " << s << endl; } - - htmlOut = new HTMLRenderer(¶m); - htmlOut->process(doc); - delete htmlOut; - - finished = true; // clean up -error: if(doc) delete doc; - delete fileName; if(globalParams) delete globalParams; // check for memory leaks Object::memCheck(stderr); gMemReport(stderr); - if(!finished) - abort(); + exit(finished ? (EXIT_SUCCESS) : (EXIT_FAILURE)); return 0; }