// pdftohtmlEX.cc // // Copyright (C) 2012,2013 Lu Wang #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "Param.h" #include "pdf2htmlEX-config.h" #include "HTMLRenderer/HTMLRenderer.h" #include "util/ArgParser.h" #include "util/path.h" #include "util/ffw.h" using namespace std; using namespace pdf2htmlEX; Param param; ArgParser argparser; void show_usage_and_exit(const char * dummy = nullptr) { cerr << "Usage: pdf2htmlEX [Options] []" << endl; cerr << endl; cerr << "Options:" << endl; argparser.show_usage(cerr); cerr << endl; cerr << "Run 'man pdf2htmlEX' for detailed information" << endl; cerr << endl; exit(EXIT_FAILURE); } void show_version_and_exit(const char * dummy = nullptr) { cerr << "pdftohtmlEX version " << PDF2HTMLEX_VERSION << endl; cerr << "Copyright 2012,2013 Lu Wang " << endl; cerr << "Libraries: "; cerr << "poppler " << POPPLER_VERSION << ", "; cerr << "libfontforge " << ffw_get_version() << endl; exit(EXIT_FAILURE); } void parse_options (int argc, char **argv) { argparser .add("help,h", "show all options", &show_usage_and_exit) .add("version,v", "show copyright and version info", &show_version_and_exit) .add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", nullptr, true) .add("user-password,u", ¶m.user_password, "", "user password (for encrypted files)", nullptr, true) .add("no-drm", ¶m.no_drm, 0, "override document DRM settings") .add("dest-dir", ¶m.dest_dir, ".", "specify destination directory") .add("data-dir", ¶m.data_dir, PDF2HTMLEX_DATA_PATH, "specify data directory") .add("first-page,f", ¶m.first_page, 1, "first page to process") .add("last-page,l", ¶m.last_page, numeric_limits::max(), "last page to process") .add("zoom", ¶m.zoom, 0, "zoom ratio", nullptr, true) .add("fit-width", ¶m.fit_width, 0, "fit width to pixels", nullptr, true) .add("fit-height", ¶m.fit_height, 0, "fit height to pixels", nullptr, true) .add("hdpi", ¶m.h_dpi, 144.0, "horizontal DPI for non-text") .add("vdpi", ¶m.v_dpi, 144.0, "vertical DPI for non-text") .add("use-cropbox", ¶m.use_cropbox, 0, "use CropBox instead of MediaBox") .add("process-nontext", ¶m.process_nontext, 1, "process nontext objects") .add("single-html", ¶m.single_html, 1, "combine everything into one single HTML file") .add("split-pages", ¶m.split_pages, 0, "split pages into separated files") .add("embed-base-font", ¶m.embed_base_font, 0, "embed local matched font for base 14 fonts in the PDF file") .add("embed-external-font", ¶m.embed_external_font, 0, "embed local matched font for external fonts in the PDF file") .add("decompose-ligature", ¶m.decompose_ligature, 0, "decompose ligatures, for example 'fi' -> 'f''i'") .add("heps", ¶m.h_eps, 1.0, "max tolerated horizontal offset (in pixels)") .add("veps", ¶m.v_eps, 1.0, "max tolerated vertical offset (in pixels)") .add("space-threshold", ¶m.space_threshold, (1.0/8), "distance no thiner than (threshold * em) will be considered as a space character") .add("font-size-multiplier", ¶m.font_size_multiplier, 4.0, "setting a value greater than 1 would increase the rendering accuracy") .add("auto-hint", ¶m.auto_hint, 0, "Whether to generate hints for fonts") .add("tounicode", ¶m.tounicode, 0, "Specify how to deal with ToUnicode map, 0 for auto, 1 for forced, -1 for disabled") .add("space-as-offset", ¶m.space_as_offset, 0, "treat space characters as offsets") .add("stretch-narrow-glyph", ¶m.stretch_narrow_glyph, 0, "stretch narrow glyphs instead of padding space") .add("squeeze-wide-glyph", ¶m.squeeze_wide_glyph, 1, "squeeze wide glyphs instead of truncating") .add("remove-unused-glyph", ¶m.remove_unused_glyph, 1, "remove unused glyphs in embedded fonts") .add("font-suffix", ¶m.font_suffix, ".ttf", "suffix for extracted font files") .add("font-format", ¶m.font_format, "opentype", "format for extracted font files") .add("external-hint-tool", ¶m.external_hint_tool, "", "external tool for hintting fonts.(overrides --auto-hint)") .add("css-filename", ¶m.css_filename, "", "Specify the file name of the generated css file") .add("debug", ¶m.debug, 0, "output debug information") .add("clean-tmp", ¶m.clean_tmp, 1, "clean temporary files after processing") .add("css-draw", ¶m.css_draw, 0, "[Experimental and Unsupported] CSS Drawing") .add("", ¶m.input_filename, "", "") .add("", ¶m.output_filename, "", "") ; try { argparser.parse(argc, argv); } catch(const char * s) { // if s == "", getopt_long would have printed the error message if(s && s[0]) { cerr << "Error when parsing the arguments:" << endl; cerr << s << endl; } exit(EXIT_FAILURE); } catch(const std::string & s) { // if s == "", getopt_long would have printed the error message if(s != "") { cerr << "Error when parsing the arguments:" << endl; cerr << s << endl; } exit(EXIT_FAILURE); } } int main(int argc, char **argv) { parse_options(argc, argv); if (param.input_filename == "") { cerr << "Missing input filename" << endl; exit(EXIT_FAILURE); } //prepare the directories { char buf[] = "/tmp/pdf2htmlEX-XXXXXX"; auto p = mkdtemp(buf); if(p == nullptr) { cerr << "Cannot create temp directory" << endl; exit(EXIT_FAILURE); } param.tmp_dir = buf; } if(param.debug) cerr << "temporary dir: " << (param.tmp_dir) << endl; try { create_directories(param.dest_dir); } catch (const string & s) { cerr << s << endl; exit(EXIT_FAILURE); } bool finished = false; // read config file globalParams = new GlobalParams(); // open PDF file PDFDoc *doc = nullptr; try { { GooString * ownerPW = (param.owner_password == "") ? (nullptr) : (new GooString(param.owner_password.c_str())); GooString * userPW = (param.user_password == "") ? (nullptr) : (new GooString(param.user_password.c_str())); GooString fileName(param.input_filename.c_str()); doc = PDFDocFactory().createPDFDoc(fileName, ownerPW, userPW); delete userPW; delete ownerPW; } if (!doc->isOk()) { throw "Cannot read the file"; } // check for copy permission if (!doc->okToCopy()) { if (param.no_drm == 0) { throw "Copying of text from this document is not allowed."; } cerr << "Document has copy-protection bit set." << endl; } param.first_page = min(max(param.first_page, 1), doc->getNumPages()); param.last_page = min(max(param.last_page, param.first_page), doc->getNumPages()); if(param.output_filename == "") { const string s = get_filename(param.input_filename); if(get_suffix(param.input_filename) == ".pdf") { if(param.split_pages) param.output_filename = s.substr(0, s.size() - 4); else param.output_filename = s.substr(0, s.size() - 4) + ".html"; } else { if(param.split_pages) param.output_filename = s; else param.output_filename = s + ".html"; } } if(param.css_filename == "") { const string s = get_filename(param.input_filename); if(get_suffix(param.input_filename) == ".pdf") { param.css_filename = s.substr(0, s.size() - 4) + ".css"; } else { if(!param.split_pages) param.css_filename = s + ".css"; } } HTMLRenderer * htmlOut = new HTMLRenderer(¶m); htmlOut->process(doc); delete htmlOut; finished = true; } catch (const char * s) { cerr << "Error: " << s << endl; } catch (const string & s) { cerr << "Error: " << s << endl; } // clean up if(doc) delete doc; if(globalParams) delete globalParams; // check for memory leaks Object::memCheck(stderr); gMemReport(stderr); exit(finished ? (EXIT_SUCCESS) : (EXIT_FAILURE)); return 0; }