From 8c65a2826bb44e989183abcaeec3e783b7268162 Mon Sep 17 00:00:00 2001 From: Marc Sanfacon Date: Fri, 22 Nov 2013 13:32:40 -0500 Subject: [PATCH 01/16] Compile under MinGW --- CMakeLists.txt | 3 +++ src/pdf2htmlEX.cc | 25 +++++++++++++++++++++++++ src/util/ffw.c | 5 +++++ src/util/path.cc | 17 +++++++++++++++++ 4 files changed, 50 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 73842eb..48b1822 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -130,6 +130,9 @@ else() set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${PYTHON_LIBRARIES}) endif() +# Add additional dependencies +set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} intl iconv gettextlib gettextpo gutils png jpeg openjpeg glib-2.0.dll z xml2 tiff gio-2.0.dll ltdl plibc.dll) + # debug build flags (overwrite default cmake debug flags) set(CMAKE_C_FLAGS_DEBUG "-ggdb -pg") diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 7da4150..554915a 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -42,6 +42,31 @@ using namespace pdf2htmlEX; Param param; ArgParser argparser; +#if defined(_WIN32) +#include +char *mkdtemp(char *tempbuf) { + int rand_value = 0; + char* tempbase = NULL; + char tempbasebuf[MAX_PATH] = ""; + + if (strcmp(&tempbuf[strlen(tempbuf)-6], "XXXXXX")) { + errno = EINVAL; + return NULL; + } + + srand((unsigned)time(0)); + rand_value = (int)((rand() / ((double)RAND_MAX+1.0)) * 1e6); + tempbase = strrchr(tempbuf, '/'); + tempbase = tempbase ? tempbase+1 : tempbuf; + strcpy(tempbasebuf, tempbase); + sprintf(&tempbasebuf[strlen(tempbasebuf)-6], "%d", rand_value); + ::GetTempPath(MAX_PATH, tempbuf); + strcat(tempbuf, tempbasebuf); + ::CreateDirectory(tempbuf, NULL); + return tempbuf; +} +#endif + void deprecated_font_suffix(const char * dummy = nullptr) { cerr << "--font-suffix is deprecated. Use `--font-format` instead." << endl; diff --git a/src/util/ffw.c b/src/util/ffw.c index 01228df..ebbd258 100644 --- a/src/util/ffw.c +++ b/src/util/ffw.c @@ -19,6 +19,11 @@ #include "ffw.h" +#if defined(_WIN32) +#undef printf +#undef vfprintf +#endif + static real EPS=1e-6; static inline int min(int a, int b) diff --git a/src/util/path.cc b/src/util/path.cc index c928454..8815691 100644 --- a/src/util/path.cc +++ b/src/util/path.cc @@ -14,6 +14,17 @@ using std::string; +#if defined(_WIN32) +#include +int mkdir(const char *pathname, mode_t mode) { + if (::GetFileAttributes(pathname) == FILE_ATTRIBUTE_DIRECTORY) { + errno = EEXIST; + return -1; + } + return ::CreateDirectory(pathname, NULL) ? 0 : -1; +} +#endif + namespace pdf2htmlEX { void create_directories(const string & path) @@ -31,9 +42,15 @@ void create_directories(const string & path) { if(errno == EEXIST) { +#if defined(_WIN32) + struct _stat32 stat_buf; + if((_stat32(path.c_str(), &stat_buf) == 0) && S_ISDIR(stat_buf.st_mode)) + return; +#else struct stat stat_buf; if((stat(path.c_str(), &stat_buf) == 0) && S_ISDIR(stat_buf.st_mode)) return; +#endif } throw string("Cannot create directory: ") + path; From d4d86ca3fe8ef377d2a675c0ac51a3767d6a3c4c Mon Sep 17 00:00:00 2001 From: Marc Sanfacon Date: Fri, 22 Nov 2013 15:16:21 -0500 Subject: [PATCH 02/16] data directory under Windows is located unde data in pdf2htmlEX.exe path by default --- src/pdf2htmlEX.cc | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 554915a..6ca3512 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -41,9 +41,11 @@ using namespace pdf2htmlEX; Param param; ArgParser argparser; +string data_dir; -#if defined(_WIN32) +#ifdef _WIN32 #include +#include char *mkdtemp(char *tempbuf) { int rand_value = 0; char* tempbase = NULL; @@ -65,6 +67,7 @@ char *mkdtemp(char *tempbuf) { ::CreateDirectory(tempbuf, NULL); return tempbuf; } + #endif void deprecated_font_suffix(const char * dummy = nullptr) @@ -90,7 +93,11 @@ void show_version_and_exit(const char * dummy = nullptr) #if ENABLE_SVG cerr << " cairo " << cairo_version_string() << endl; #endif +#ifdef _WIN32 + cerr << "Default data-dir: " << data_dir << endl; +#else cerr << "Default data-dir: " << PDF2HTMLEX_DATA_PATH << endl; +#endif cerr << "Supported image format:"; #ifdef ENABLE_LIBPNG cerr << " png"; @@ -137,15 +144,15 @@ void parse_options (int argc, char **argv) // pages .add("first-page,f", ¶m.first_page, 1, "first page to convert") .add("last-page,l", ¶m.last_page, numeric_limits::max(), "last page to convert") - + // dimensions .add("zoom", ¶m.zoom, 0, "zoom ratio", true) - .add("fit-width", ¶m.fit_width, 0, "fit width to pixels", true) + .add("fit-width", ¶m.fit_width, 0, "fit width to pixels", true) .add("fit-height", ¶m.fit_height, 0, "fit height to pixels", true) .add("use-cropbox", ¶m.use_cropbox, 1, "use CropBox instead of MediaBox") .add("hdpi", ¶m.h_dpi, 144.0, "horizontal resolution for graphics in DPI") .add("vdpi", ¶m.v_dpi, 144.0, "vertical resolution for graphics in DPI") - + // output files .add("embed", "specify which elements should be embedded into output", embed_parser, true) .add("embed-css", ¶m.embed_css, 1, "embed CSS files into output") @@ -162,7 +169,7 @@ void parse_options (int argc, char **argv) .add("process-outline", ¶m.process_outline, 1, "show outline in HTML") .add("printing", ¶m.printing, 1, "enable printing support") .add("fallback", ¶m.fallback, 0, "output in fallback mode") - + // fonts .add("embed-external-font", ¶m.embed_external_font, 1, "embed local match for external fonts") .add("font-format", ¶m.font_format, "woff", "suffix for embedded font files (ttf,otf,woff,svg)") @@ -173,7 +180,7 @@ void parse_options (int argc, char **argv) .add("squeeze-wide-glyph", ¶m.squeeze_wide_glyph, 1, "shrink wide glyphs instead of truncating them") .add("override-fstype", ¶m.override_fstype, 0, "clear the fstype bits in TTF/OTF fonts") .add("process-type3", ¶m.process_type3, 0, "convert Type 3 fonts for web (experimental)") - + // text .add("heps", ¶m.h_eps, 1.0, "horizontal threshold for merging text, in pixels") .add("veps", ¶m.v_eps, 1.0, "vertical threshold for merging text, in pixels") @@ -185,19 +192,23 @@ void parse_options (int argc, char **argv) // background image .add("bg-format", ¶m.bg_format, "png", "specify background image format") - + // encryption .add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", true) .add("user-password,u", ¶m.user_password, "", "user password (for encrypted files)", true) .add("no-drm", ¶m.no_drm, 0, "override document DRM settings") - + // misc. .add("clean-tmp", ¶m.clean_tmp, 1, "remove temporary files after conversion") +#ifdef _WIN32 + .add("data-dir", ¶m.data_dir, data_dir, "specify data directory") +#else .add("data-dir", ¶m.data_dir, PDF2HTMLEX_DATA_PATH, "specify data directory") +#endif // TODO: css drawings are hidden on print, for annot links, need to fix it for other drawings // .add("css-draw", ¶m.css_draw, 0, "[experimental and unsupported] CSS drawing") .add("debug", ¶m.debug, 0, "print debugging information") - + // meta .add("version,v", "print copyright and version info", &show_version_and_exit) .add("help,h", "print usage information", &show_usage_and_exit) @@ -342,6 +353,15 @@ void check_param() int main(int argc, char **argv) { +#ifdef _WIN32 + { + // Under Windows, the default data_dir is under /data in the pdf2htmlEX directory + stringstream ss; + ss << dirname(argv[0]) << "/data"; + data_dir = ss.str(); + } +#endif + parse_options(argc, argv); check_param(); @@ -394,11 +414,11 @@ int main(int argc, char **argv) delete ownerPW; } - if (!doc->isOk()) + if (!doc->isOk()) throw "Cannot read the file"; // check for copy permission - if (!doc->okToCopy()) + if (!doc->okToCopy()) { if (param.no_drm == 0) throw "Copying of text from this document is not allowed."; From 589047144aa05b9839c614d9ebc53228538e6a3e Mon Sep 17 00:00:00 2001 From: Marc Sanfacon Date: Fri, 22 Nov 2013 16:12:59 -0500 Subject: [PATCH 03/16] Added an option to specify the base temporary directory --- src/pdf2htmlEX.cc | 99 +++++++++++++++++++++++++---------------------- 1 file changed, 52 insertions(+), 47 deletions(-) diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 6ca3512..cbfce42 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -41,33 +41,10 @@ using namespace pdf2htmlEX; Param param; ArgParser argparser; -string data_dir; #ifdef _WIN32 -#include -#include -char *mkdtemp(char *tempbuf) { - int rand_value = 0; - char* tempbase = NULL; - char tempbasebuf[MAX_PATH] = ""; - - if (strcmp(&tempbuf[strlen(tempbuf)-6], "XXXXXX")) { - errno = EINVAL; - return NULL; - } - - srand((unsigned)time(0)); - rand_value = (int)((rand() / ((double)RAND_MAX+1.0)) * 1e6); - tempbase = strrchr(tempbuf, '/'); - tempbase = tempbase ? tempbase+1 : tempbuf; - strcpy(tempbasebuf, tempbase); - sprintf(&tempbasebuf[strlen(tempbasebuf)-6], "%d", rand_value); - ::GetTempPath(MAX_PATH, tempbuf); - strcat(tempbuf, tempbasebuf); - ::CreateDirectory(tempbuf, NULL); - return tempbuf; -} - +# include +# include #endif void deprecated_font_suffix(const char * dummy = nullptr) @@ -94,7 +71,7 @@ void show_version_and_exit(const char * dummy = nullptr) cerr << " cairo " << cairo_version_string() << endl; #endif #ifdef _WIN32 - cerr << "Default data-dir: " << data_dir << endl; + cerr << "Default data-dir: " << param.data_dir << endl; #else cerr << "Default data-dir: " << PDF2HTMLEX_DATA_PATH << endl; #endif @@ -138,6 +115,39 @@ void embed_parser (const char * str) } } +void prepare_directories() +{ + std::string tmp_dir = param.basetmp_dir + "/pdf2htmlEX-XXXXXX"; +#ifndef _WIN32 + errno = 0; + + auto_ptr pBuf(new char[tmp_dir.size() + 1]); + strcpy(pBuf.get(), tmp_dir.c_str()); + auto p = mkdtemp(pBuf.get()); + if(p == nullptr) + { + const char * errmsg = strerror(errno); + if(!errmsg) + { + errmsg = "unknown error"; + } + cerr << "Cannot create temp directory: " << errmsg << endl; + exit(EXIT_FAILURE); + } + param.tmp_dir = pBuf.get(); +#else + srand((unsigned)time(0)); + int rand_value = (int)((rand() / ((double)RAND_MAX+1.0)) * 1e6); + stringstream ss; + ss << setw(6) << rand_value; + + std::cout << "1- " << tmp_dir << endl; + tmp_dir.erase(tmp_dir.size() - 6); + param.tmp_dir = tmp_dir + ss.str(); + ::CreateDirectory(param.tmp_dir.c_str(), NULL); +#endif +} + void parse_options (int argc, char **argv) { argparser @@ -200,8 +210,9 @@ void parse_options (int argc, char **argv) // misc. .add("clean-tmp", ¶m.clean_tmp, 1, "remove temporary files after conversion") + .add("base-tmp-dir", ¶m.basetmp_dir, param.basetmp_dir, "base temporary directory - will create pdf2htmlEX-XXXXXX under it") #ifdef _WIN32 - .add("data-dir", ¶m.data_dir, data_dir, "specify data directory") + .add("data-dir", ¶m.data_dir, param.data_dir, "specify data directory") #else .add("data-dir", ¶m.data_dir, PDF2HTMLEX_DATA_PATH, "specify data directory") #endif @@ -353,12 +364,19 @@ void check_param() int main(int argc, char **argv) { -#ifdef _WIN32 +#ifndef _WIN32 + param.basetmp_dir = "/tmp"; +#else { // Under Windows, the default data_dir is under /data in the pdf2htmlEX directory stringstream ss; ss << dirname(argv[0]) << "/data"; - data_dir = ss.str(); + param.data_dir = ss.str(); + + // Under Windows, the temp path is not under /tmp, find it. + char temppath[MAX_PATH]; + ::GetTempPath(MAX_PATH, temppath); + param.basetmp_dir = temppath; } #endif @@ -366,26 +384,13 @@ int main(int argc, char **argv) check_param(); //prepare the directories - { - char buf[] = "/tmp/pdf2htmlEX-XXXXXX"; - errno = 0; - auto p = mkdtemp(buf); - if(p == nullptr) - { - const char * errmsg = strerror(errno); - if(!errmsg) - { - errmsg = "unknown error"; - } - cerr << "Cannot create temp directory: " << errmsg << endl; - exit(EXIT_FAILURE); - } - param.tmp_dir = buf; + prepare_directories(); + + if(param.debug) { + cerr << "temporary dir: " << (param.tmp_dir) << endl; } - if(param.debug) - cerr << "temporary dir: " << (param.tmp_dir) << endl; - + exit(0); try { create_directories(param.dest_dir); From 57c02b19727acb067e3ead7702579425ef91517f Mon Sep 17 00:00:00 2001 From: Marc Sanfacon Date: Fri, 22 Nov 2013 16:39:28 -0500 Subject: [PATCH 04/16] Added a parameter to limit the output size. This is an estimate, but should be good enough. --- src/HTMLRenderer/general.cc | 57 ++++++++++++++++++++----------------- src/Param.h | 18 ++++++------ src/TmpFiles.cc | 16 +++++++++-- src/TmpFiles.h | 11 +++---- src/pdf2htmlEX.cc | 3 +- 5 files changed, 62 insertions(+), 43 deletions(-) diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 416725f..2d5721a 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -45,7 +45,7 @@ HTMLRenderer::HTMLRenderer(const Param & param) ,param(param) ,html_text_page(param, all_manager) ,preprocessor(param) - ,tmp_files(param) + ,tmp_files(param) { if(!(param.debug)) { @@ -79,7 +79,7 @@ HTMLRenderer::HTMLRenderer(const Param & param) } HTMLRenderer::~HTMLRenderer() -{ +{ ffw_finalize(); delete [] cur_mapping; delete [] cur_mapping2; @@ -96,7 +96,7 @@ void HTMLRenderer::process(PDFDoc *doc) /////////////////// // Process pages - + bg_renderer = nullptr; if(param.process_nontext) { @@ -107,15 +107,20 @@ void HTMLRenderer::process(PDFDoc *doc) } int page_count = (param.last_page - param.first_page + 1); - for(int i = param.first_page; i <= param.last_page ; ++i) + for(int i = param.first_page; i <= param.last_page ; ++i) { + if (param.max_size != -1 && tmp_files.get_total_size() > param.max_size * 1024) { + cerr << "Stop processing, reach max size\n"; + break; + } + cerr << "Working: " << (i-param.first_page) << "/" << page_count << '\r' << flush; if(param.split_pages) { string filled_template_filename = (char*)str_fmt(param.page_filename.c_str(), i); auto page_fn = str_fmt("%s/%s", param.dest_dir.c_str(), filled_template_filename.c_str()); - f_curpage = new ofstream((char*)page_fn, ofstream::binary); + f_curpage = new ofstream((char*)page_fn, ofstream::binary); if(!(*f_curpage)) throw string("Cannot open ") + (char*)page_fn + " for writing"; set_stream_flags((*f_curpage)); @@ -128,9 +133,9 @@ void HTMLRenderer::process(PDFDoc *doc) bg_renderer->render_page(doc, i); } - doc->displayPage(this, i, + doc->displayPage(this, i, text_zoom_factor() * DEFAULT_DPI, text_zoom_factor() * DEFAULT_DPI, - 0, + 0, (!(param.use_cropbox)), true, // crop false, // printing @@ -149,7 +154,7 @@ void HTMLRenderer::process(PDFDoc *doc) //////////////////////// // Process Outline if(param.process_outline) - process_outline(); + process_outline(); post_process(); @@ -170,7 +175,7 @@ void HTMLRenderer::setDefaultCTM(double *ctm) #if POPPLER_OLDER_THAN_0_23_0 void HTMLRenderer::startPage(int pageNum, GfxState *state) #else -void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref) +void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref) #endif { this->pageNum = pageNum; @@ -183,12 +188,12 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref) long long wid = all_manager.width.install(pageWidth); long long hid = all_manager.height.install(pageHeight); (*f_curpage) - << "
" - << "
"; - + // close page (*f_curpage) << "
" << endl; @@ -266,7 +271,7 @@ void HTMLRenderer::pre_process(PDFDoc * doc) */ { vector zoom_factors; - + if(is_positive(param.zoom)) { zoom_factors.push_back(param.zoom); @@ -283,8 +288,8 @@ void HTMLRenderer::pre_process(PDFDoc * doc) } double zoom = (zoom_factors.empty() ? 1.0 : (*min_element(zoom_factors.begin(), zoom_factors.end()))); - - text_scale_factor1 = max(zoom, param.font_size_multiplier); + + text_scale_factor1 = max(zoom, param.font_size_multiplier); text_scale_factor2 = zoom / text_scale_factor1; } @@ -340,13 +345,13 @@ void HTMLRenderer::pre_process(PDFDoc * doc) * we have to keep the html file for pages into a temporary place * because we'll have to embed css before it * - * Otherwise just generate it + * Otherwise just generate it */ auto fn = str_fmt("%s/__pages", param.tmp_dir.c_str()); tmp_files.add((char*)fn); f_pages.path = (char*)fn; - f_pages.fs.open(f_pages.path, ofstream::binary); + f_pages.fs.open(f_pages.path, ofstream::binary); if(!f_pages.fs) throw string("Cannot open ") + (char*)fn + " for writing"; set_stream_flags(f_pages.fs); @@ -371,7 +376,7 @@ void HTMLRenderer::post_process(void) { f_outline.fs.close(); } - f_pages.fs.close(); + f_pages.fs.close(); f_css.fs.close(); // build the main HTML file @@ -492,7 +497,7 @@ void HTMLRenderer::dump_css (void) all_manager.width .dump_css(f_css.fs); all_manager.left .dump_css(f_css.fs); all_manager.bgimage_size .dump_css(f_css.fs); - + // print css if(param.printing) { @@ -518,8 +523,8 @@ void HTMLRenderer::dump_css (void) void HTMLRenderer::embed_file(ostream & out, const string & path, const string & type, bool copy) { string fn = get_filename(path); - string suffix = (type == "") ? get_suffix(fn) : type; - + string suffix = (type == "") ? get_suffix(fn) : type; + // TODO auto iter = EMBED_STRING_MAP.find(suffix); if(iter == EMBED_STRING_MAP.end()) @@ -529,14 +534,14 @@ void HTMLRenderer::embed_file(ostream & out, const string & path, const string & } const auto & entry = iter->second; - + if(param.*(entry.embed_flag)) { ifstream fin(path, ifstream::binary); if(!fin) throw string("Cannot open file ") + path + " for embedding"; out << entry.prefix_embed; - + if(entry.base64_encode) { out << Base64Stream(fin); diff --git a/src/Param.h b/src/Param.h index 8a566e7..4816f72 100644 --- a/src/Param.h +++ b/src/Param.h @@ -17,20 +17,21 @@ struct Param { // pages int first_page, last_page; - + // dimensions double zoom; double fit_width, fit_height; int use_cropbox; double h_dpi, v_dpi; - - // output + + // output int embed_css; int embed_font; int embed_image; int embed_javascript; int embed_outline; int split_pages; + int max_size; std::string dest_dir; std::string css_filename; std::string page_filename; @@ -39,7 +40,7 @@ struct Param int process_outline; int printing; int fallback; - + // fonts int embed_external_font; std::string font_format; @@ -50,7 +51,7 @@ struct Param int squeeze_wide_glyph; int override_fstype; int process_type3; - + // text double h_eps, v_eps; double space_threshold; @@ -61,17 +62,18 @@ struct Param // background image std::string bg_format; - + // encryption std::string owner_password, user_password; int no_drm; - + // misc. int clean_tmp; std::string data_dir; + std::string basetmp_dir; int css_draw; int debug; - + std::string input_filename, output_filename; // not a paramater diff --git a/src/TmpFiles.cc b/src/TmpFiles.cc index efaf0cf..b55e341 100644 --- a/src/TmpFiles.cc +++ b/src/TmpFiles.cc @@ -9,6 +9,7 @@ #include #include +#include #include "TmpFiles.h" #include "Param.h" @@ -19,11 +20,11 @@ namespace pdf2htmlEX { TmpFiles::TmpFiles( const Param& param ) - : param( param ) + : param( param ) { } TmpFiles::~TmpFiles() -{ +{ clean(); } @@ -54,5 +55,16 @@ void TmpFiles::clean() cerr << "Remove temporary directory: " << param.tmp_dir << endl; } +double TmpFiles::get_total_size() const +{ + double total_size = 0; + struct _stat st; + for(auto iter = tmp_files.begin(); iter != tmp_files.end(); ++iter) { + _stat(iter->c_str(), &st); + total_size += st.st_size; + } + + return total_size; +} } // namespace pdf2htmlEX diff --git a/src/TmpFiles.h b/src/TmpFiles.h index b7ad46c..277281d 100644 --- a/src/TmpFiles.h +++ b/src/TmpFiles.h @@ -7,19 +7,20 @@ namespace pdf2htmlEX { -class TmpFiles +class TmpFiles { public: explicit TmpFiles( const Param& param ); ~TmpFiles(); - void add( const std::string& fn); + void add( const std::string& fn); + double get_total_size() const; private: - void clean(); - + void clean(); + const Param& param; - std::set tmp_files; + std::set tmp_files; }; } // namespace pdf2htmlEX diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index cbfce42..e6d47b1 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -141,7 +141,6 @@ void prepare_directories() stringstream ss; ss << setw(6) << rand_value; - std::cout << "1- " << tmp_dir << endl; tmp_dir.erase(tmp_dir.size() - 6); param.tmp_dir = tmp_dir + ss.str(); ::CreateDirectory(param.tmp_dir.c_str(), NULL); @@ -170,6 +169,7 @@ void parse_options (int argc, char **argv) .add("embed-image", ¶m.embed_image, 1, "embed image files into output") .add("embed-javascript", ¶m.embed_javascript, 1, "embed JavaScript files into output") .add("embed-outline", ¶m.embed_outline, 1, "embed outlines into output") + .add("max-output-size", ¶m.max_size, -1, "maximum output size, in KB (-1 for no max)") .add("split-pages", ¶m.split_pages, 0, "split pages into separate files") .add("dest-dir", ¶m.dest_dir, ".", "specify destination directory") .add("css-filename", ¶m.css_filename, "", "filename of the generated css file") @@ -390,7 +390,6 @@ int main(int argc, char **argv) cerr << "temporary dir: " << (param.tmp_dir) << endl; } - exit(0); try { create_directories(param.dest_dir); From c1f3fa61787221938a565c25c0616fa859a11c7a Mon Sep 17 00:00:00 2001 From: Marc Sanfacon Date: Sun, 24 Nov 2013 10:20:25 -0500 Subject: [PATCH 05/16] Fixed Linux build. Link with all libraries under MINGW only. --- CMakeLists.txt | 31 ++++++++++++++++--------------- src/TmpFiles.cc | 9 +++++++++ 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 48b1822..589d864 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,7 +75,7 @@ else() set(FONTFORGE_INCLUDE_DIRS ${FF_INCLUDE_PATH}/fontforge) include_directories(${FONTFORGE_INCLUDE_DIRS}) # MacOSX gettext is in /opt/local/include - strange - if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") find_path(GETTEXT_INCLUDE_PATH libintl.h HINTS "/usr/local/opt/gettext/include") # homebrew if(GETTEXT_INCLUDE_PATH) include_directories(${GETTEXT_INCLUDE_PATH}) @@ -97,7 +97,7 @@ else() macro(wl_find_library LIB_NAME RESULT) unset(${RESULT}) unset(${RESULT} CACHE) - foreach(FULL_LIB_NAME + foreach(FULL_LIB_NAME ${CMAKE_IMPORT_LIBRARY_PREFIX}${LIB_NAME}${CMAKE_IMPORT_LIBRARY_SUFFIX} ${CMAKE_SHARED_LIBRARY_PREFIX}${LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX} ${CMAKE_STATIC_LIBRARY_PREFIX}${LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX} @@ -131,8 +131,9 @@ else() endif() # Add additional dependencies +if(MINGW) set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} intl iconv gettextlib gettextpo gutils png jpeg openjpeg glib-2.0.dll z xml2 tiff gio-2.0.dll ltdl plibc.dll) - +endif() # debug build flags (overwrite default cmake debug flags) set(CMAKE_C_FLAGS_DEBUG "-ggdb -pg") @@ -177,9 +178,9 @@ configure_file (${CMAKE_SOURCE_DIR}/share/pdf2htmlEX.js.in ${CMAKE_SOURCE_DIR}/s set(PDF2HTMLEX_SRC ${PDF2HTMLEX_SRC} src/Param.h - src/pdf2htmlEX.cc + src/pdf2htmlEX.cc src/pdf2htmlEX-config.h - src/HTMLRenderer/HTMLRenderer.h + src/HTMLRenderer/HTMLRenderer.h src/HTMLRenderer/draw.cc src/HTMLRenderer/general.cc src/HTMLRenderer/image.cc @@ -188,12 +189,12 @@ set(PDF2HTMLEX_SRC ${PDF2HTMLEX_SRC} src/HTMLRenderer/outline.cc src/HTMLRenderer/state.cc src/HTMLRenderer/text.cc - src/BackgroundRenderer/BackgroundRenderer.h + src/BackgroundRenderer/BackgroundRenderer.h src/BackgroundRenderer/BackgroundRenderer.cc - src/BackgroundRenderer/SplashBackgroundRenderer.h - src/BackgroundRenderer/SplashBackgroundRenderer.cc - src/BackgroundRenderer/CairoBackgroundRenderer.h - src/BackgroundRenderer/CairoBackgroundRenderer.cc + src/BackgroundRenderer/SplashBackgroundRenderer.h + src/BackgroundRenderer/SplashBackgroundRenderer.cc + src/BackgroundRenderer/CairoBackgroundRenderer.h + src/BackgroundRenderer/CairoBackgroundRenderer.cc src/util/const.h src/util/const.cc src/util/css_const.h @@ -232,7 +233,7 @@ set(PDF2HTMLEX_SRC ${PDF2HTMLEX_SRC} add_executable(pdf2htmlEX ${PDF2HTMLEX_SRC}) target_link_libraries(pdf2htmlEX ${PDF2HTMLEX_LIBS}) -add_custom_target(pdf2htmlEX_resources ALL DEPENDS +add_custom_target(pdf2htmlEX_resources ALL DEPENDS ${CMAKE_SOURCE_DIR}/share/base.min.css ${CMAKE_SOURCE_DIR}/share/fancy.min.css ${CMAKE_SOURCE_DIR}/share/pdf2htmlEX.min.js @@ -240,15 +241,15 @@ add_custom_target(pdf2htmlEX_resources ALL DEPENDS add_custom_command(OUTPUT ${CMAKE_SOURCE_DIR}/share/pdf2htmlEX.min.js COMMAND ${CMAKE_SOURCE_DIR}/share/build_js.sh - DEPENDS + DEPENDS ${CMAKE_SOURCE_DIR}/share/pdf2htmlEX.js ) -add_custom_command(OUTPUT +add_custom_command(OUTPUT ${CMAKE_SOURCE_DIR}/share/base.min.css ${CMAKE_SOURCE_DIR}/share/fancy.min.css COMMAND ${CMAKE_SOURCE_DIR}/share/build_css.sh - DEPENDS + DEPENDS ${CMAKE_SOURCE_DIR}/share/base.css ${CMAKE_SOURCE_DIR}/share/fancy.css ) @@ -272,5 +273,5 @@ install (FILES ${PDF2HTMLEX_RESOURCE} DESTINATION share/pdf2htmlEX) install (FILES pdf2htmlEX.1 DESTINATION share/man/man1) enable_testing() -add_test(test_naming +add_test(test_naming python ${CMAKE_SOURCE_DIR}/test/test_naming.py) diff --git a/src/TmpFiles.cc b/src/TmpFiles.cc index b55e341..4965866 100644 --- a/src/TmpFiles.cc +++ b/src/TmpFiles.cc @@ -58,9 +58,17 @@ void TmpFiles::clean() double TmpFiles::get_total_size() const { double total_size = 0; +#ifndef _WIN32 + struct stat st; +#else struct _stat st; +#endif for(auto iter = tmp_files.begin(); iter != tmp_files.end(); ++iter) { +#ifndef _WIN32 + stat(iter->c_str(), &st); +#else _stat(iter->c_str(), &st); +#endif total_size += st.st_size; } @@ -68,3 +76,4 @@ double TmpFiles::get_total_size() const } } // namespace pdf2htmlEX + From 45f9cd116bfdb07470f5eba4d7a5d03909518cfc Mon Sep 17 00:00:00 2001 From: Marc Sanfacon Date: Fri, 29 Nov 2013 14:00:14 -0500 Subject: [PATCH 06/16] Trim the withespaces & \r before comparing the lines to fix Windows \r --- src/HTMLRenderer/general.cc | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 2d5721a..94cf02d 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -399,20 +399,6 @@ void HTMLRenderer::post_process(void) long line_no = 0; while(getline(manifest_fin, line)) { - ++line_no; - - if(line == "\"\"\"") - { - embed_string = !embed_string; - continue; - } - - if(embed_string) - { - output << line << endl; - continue; - } - // trim space at both sides { static const char * whitespaces = " \t\n\v\f\r"; @@ -429,6 +415,20 @@ void HTMLRenderer::post_process(void) } } + ++line_no; + + if(line == "\"\"\"") + { + embed_string = !embed_string; + continue; + } + + if(embed_string) + { + output << line << endl; + continue; + } + if(line.empty() || line[0] == '#') continue; From a47d42ad4b5fe71785ca846e48645530790664d4 Mon Sep 17 00:00:00 2001 From: Marc Sanfacon Date: Thu, 5 Dec 2013 15:20:53 -0500 Subject: [PATCH 07/16] Modifications following review from Lu. Win32 modifications are now more localized --- src/TmpFiles.cc | 19 ++++++++--------- src/pdf2htmlEX.cc | 17 +++++++--------- src/util/ffw.c | 29 +++++++++++--------------- src/util/path.cc | 52 ++++++++++++++++++++--------------------------- 4 files changed, 50 insertions(+), 67 deletions(-) diff --git a/src/TmpFiles.cc b/src/TmpFiles.cc index 4965866..903b206 100644 --- a/src/TmpFiles.cc +++ b/src/TmpFiles.cc @@ -16,6 +16,12 @@ using namespace std; +#ifndef _WIN32 +# define STAT stat +#else +# define STAT _stat +#endif + namespace pdf2htmlEX { @@ -55,20 +61,13 @@ void TmpFiles::clean() cerr << "Remove temporary directory: " << param.tmp_dir << endl; } +// Return the total size of the temporary files in bytes double TmpFiles::get_total_size() const { double total_size = 0; -#ifndef _WIN32 - struct stat st; -#else - struct _stat st; -#endif + struct STAT st; for(auto iter = tmp_files.begin(); iter != tmp_files.end(); ++iter) { -#ifndef _WIN32 - stat(iter->c_str(), &st); -#else - _stat(iter->c_str(), &st); -#endif + STAT(iter->c_str(), &st); total_size += st.st_size; } diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index e6d47b1..02c682d 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -45,6 +45,7 @@ ArgParser argparser; #ifdef _WIN32 # include # include +# include #endif void deprecated_font_suffix(const char * dummy = nullptr) @@ -70,11 +71,7 @@ void show_version_and_exit(const char * dummy = nullptr) #if ENABLE_SVG cerr << " cairo " << cairo_version_string() << endl; #endif -#ifdef _WIN32 cerr << "Default data-dir: " << param.data_dir << endl; -#else - cerr << "Default data-dir: " << PDF2HTMLEX_DATA_PATH << endl; -#endif cerr << "Supported image format:"; #ifdef ENABLE_LIBPNG cerr << " png"; @@ -143,7 +140,10 @@ void prepare_directories() tmp_dir.erase(tmp_dir.size() - 6); param.tmp_dir = tmp_dir + ss.str(); - ::CreateDirectory(param.tmp_dir.c_str(), NULL); + if (mkdir(param.tmp_dir.c_str())) { + cerr << "Cannot create temp directory (" << param.tmp_dir << "): " << strerror(errno) << endl; + exit(EXIT_FAILURE); + } #endif } @@ -169,7 +169,7 @@ void parse_options (int argc, char **argv) .add("embed-image", ¶m.embed_image, 1, "embed image files into output") .add("embed-javascript", ¶m.embed_javascript, 1, "embed JavaScript files into output") .add("embed-outline", ¶m.embed_outline, 1, "embed outlines into output") - .add("max-output-size", ¶m.max_size, -1, "maximum output size, in KB (-1 for no max)") + .add("output-size-limit", ¶m.max_size, -1, "Limit the output size, in KB (-1 for no limit). This is only an estimate, the output may be bigger") .add("split-pages", ¶m.split_pages, 0, "split pages into separate files") .add("dest-dir", ¶m.dest_dir, ".", "specify destination directory") .add("css-filename", ¶m.css_filename, "", "filename of the generated css file") @@ -211,11 +211,7 @@ void parse_options (int argc, char **argv) // misc. .add("clean-tmp", ¶m.clean_tmp, 1, "remove temporary files after conversion") .add("base-tmp-dir", ¶m.basetmp_dir, param.basetmp_dir, "base temporary directory - will create pdf2htmlEX-XXXXXX under it") -#ifdef _WIN32 .add("data-dir", ¶m.data_dir, param.data_dir, "specify data directory") -#else - .add("data-dir", ¶m.data_dir, PDF2HTMLEX_DATA_PATH, "specify data directory") -#endif // TODO: css drawings are hidden on print, for annot links, need to fix it for other drawings // .add("css-draw", ¶m.css_draw, 0, "[experimental and unsupported] CSS drawing") .add("debug", ¶m.debug, 0, "print debugging information") @@ -366,6 +362,7 @@ int main(int argc, char **argv) { #ifndef _WIN32 param.basetmp_dir = "/tmp"; + param.data_dir = PDF2HTMLEX_DATA_PATH; #else { // Under Windows, the default data_dir is under /data in the pdf2htmlEX directory diff --git a/src/util/ffw.c b/src/util/ffw.c index ebbd258..579a707 100644 --- a/src/util/ffw.c +++ b/src/util/ffw.c @@ -19,11 +19,6 @@ #include "ffw.h" -#if defined(_WIN32) -#undef printf -#undef vfprintf -#endif - static real EPS=1e-6; static inline int min(int a, int b) @@ -64,7 +59,7 @@ void ffw_init(int debug) if ( default_encoding==NULL ) default_encoding=FindOrMakeEncoding("ISO8859-1"); if ( default_encoding==NULL ) - default_encoding=&custom; /* In case iconv is broken */ + default_encoding=&custom; /* In case iconv is broken */ if(!debug) { @@ -172,13 +167,13 @@ void ffw_save(const char * filename) int r = GenerateScript(cur_fv->sf, _filename , _, -1, -1, NULL, NULL, cur_fv->map, NULL, ly_fore); - + free(_); free(_filename); if(!r) err("Cannot save font to %s\n", filename); -} +} void ffw_close(void) { FontViewClose(cur_fv); @@ -295,8 +290,8 @@ void ffw_add_empty_char(int32_t unicode, int width) { SplineChar * sc = SFMakeChar(cur_fv->sf, cur_fv->map, cur_fv->map->enccount); char buffer[400]; - SCSetMetaData(sc, - strcopy(StdGlyphName(buffer, unicode, + SCSetMetaData(sc, + strcopy(StdGlyphName(buffer, unicode, cur_fv->sf->uni_interp, cur_fv->sf->for_new_glyphs)), unicode, sc->comment); SCSynchronizeWidth(sc, width, sc->width, cur_fv); @@ -382,13 +377,13 @@ void ffw_set_metric(double ascent, double descent) /* * TODO:bitmap, reference have not been considered in this function */ -void ffw_set_widths(int * width_list, int mapping_len, +void ffw_set_widths(int * width_list, int mapping_len, int stretch_narrow, int squeeze_wide) { SplineFont * sf = cur_fv->sf; - if(sf->onlybitmaps - && cur_fv->active_bitmap != NULL + if(sf->onlybitmaps + && cur_fv->active_bitmap != NULL && sf->bitmaps != NULL) { printf("TODO: width vs bitmap\n"); @@ -402,7 +397,7 @@ void ffw_set_widths(int * width_list, int mapping_len, /* * Don't mess with it if the glyphs is not used. */ - if(width_list[i] == -1) + if(width_list[i] == -1) { continue; } @@ -417,9 +412,9 @@ void ffw_set_widths(int * width_list, int mapping_len, } else if(((sc->width > EPS) && (((sc->width > width_list[i] + EPS) && (squeeze_wide)) - || ((sc->width < width_list[i] - EPS) && (stretch_narrow))))) + || ((sc->width < width_list[i] - EPS) && (stretch_narrow))))) { - real transform[6]; + real transform[6]; transform[0] = ((double)width_list[i]) / (sc->width); transform[3] = 1.0; transform[1] = transform[2] = transform[4] = transform[5] = 0; @@ -448,7 +443,7 @@ void ffw_import_svg_glyph(int code, const char * filename, double ox, double oy, { int a = cur_fv->sf->ascent; int d = cur_fv->sf->descent; - real transform[6]; + real transform[6]; transform[0] = 1.0; transform[3] = 1.0; transform[1] = transform[2] = 0.0; diff --git a/src/util/path.cc b/src/util/path.cc index 8815691..6b28e3c 100644 --- a/src/util/path.cc +++ b/src/util/path.cc @@ -12,19 +12,17 @@ #include "path.h" -using std::string; - -#if defined(_WIN32) -#include -int mkdir(const char *pathname, mode_t mode) { - if (::GetFileAttributes(pathname) == FILE_ATTRIBUTE_DIRECTORY) { - errno = EEXIST; - return -1; - } - return ::CreateDirectory(pathname, NULL) ? 0 : -1; -} +#ifdef _WIN32 +# include +# define STAT _stat +# define MKDIR(A, B) mkdir(A) +#else +# define STAT stat +# define MKDIR(A, B) mkdir(A, B) #endif +using std::string; + namespace pdf2htmlEX { void create_directories(const string & path) @@ -36,21 +34,15 @@ void create_directories(const string & path) { create_directories(path.substr(0, idx)); } - - int r = mkdir(path.c_str(), S_IRWXU); + + int r = MKDIR(path.c_str(), S_IRWXU); if(r != 0) { if(errno == EEXIST) { -#if defined(_WIN32) - struct _stat32 stat_buf; - if((_stat32(path.c_str(), &stat_buf) == 0) && S_ISDIR(stat_buf.st_mode)) + struct STAT stat_buf; + if((STAT(path.c_str(), &stat_buf) == 0) && S_ISDIR(stat_buf.st_mode)) return; -#else - struct stat stat_buf; - if((stat(path.c_str(), &stat_buf) == 0) && S_ISDIR(stat_buf.st_mode)) - return; -#endif } throw string("Cannot create directory: ") + path; @@ -61,8 +53,8 @@ bool sanitize_filename(string & filename) { string sanitized; bool format_specifier_found = false; - - for(size_t i = 0; i < filename.size(); i++) + + for(size_t i = 0; i < filename.size(); i++) { if('%' == filename[i]) { @@ -71,7 +63,7 @@ bool sanitize_filename(string & filename) sanitized.push_back('%'); sanitized.push_back('%'); } - else + else { // We haven't found the format specifier yet, so see if we can use this one as a valid formatter size_t original_i = i; @@ -80,14 +72,14 @@ bool sanitize_filename(string & filename) while(++i < filename.size()) { tmp.push_back(filename[i]); - + // If we aren't still in option specifiers, stop looking if(!strchr("0123456789", filename[i])) { break; } } - + // Check to see if we yielded a valid format specifier if('d' == tmp[tmp.size()-1]) { @@ -105,7 +97,7 @@ bool sanitize_filename(string & filename) } } } - else + else { sanitized.push_back(filename[i]); } @@ -114,7 +106,7 @@ bool sanitize_filename(string & filename) // Only sanitize if it is a valid format. if(format_specifier_found) { - filename.assign(sanitized); + filename.assign(sanitized); } return format_specifier_found; @@ -128,7 +120,7 @@ bool is_truetype_suffix(const string & suffix) string get_filename (const string & path) { size_t idx = path.rfind('/'); - if(idx == string::npos) + if(idx == string::npos) return path; else if (idx == path.size() - 1) return ""; @@ -151,4 +143,4 @@ string get_suffix(const string & path) } -} //namespace pdf2htmlEX +} //namespace pdf2htmlEX From 762281ed5bbab8c013b31cae5d58438505727f75 Mon Sep 17 00:00:00 2001 From: Marc Sanfacon Date: Thu, 5 Dec 2013 20:48:45 -0500 Subject: [PATCH 08/16] Added gunicode as a dependency on MINGW. --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 589d864..09ea6d4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -132,7 +132,7 @@ endif() # Add additional dependencies if(MINGW) -set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} intl iconv gettextlib gettextpo gutils png jpeg openjpeg glib-2.0.dll z xml2 tiff gio-2.0.dll ltdl plibc.dll) +set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} intl iconv gettextlib gettextpo gutils png jpeg openjpeg glib-2.0.dll z xml2 tiff gio-2.0.dll ltdl plibc.dll gunicode) endif() # debug build flags (overwrite default cmake debug flags) From 4ffb2abb198978555fd0e5c680117cab12e758e3 Mon Sep 17 00:00:00 2001 From: Marc Sanfacon Date: Fri, 6 Dec 2013 08:29:53 -0500 Subject: [PATCH 09/16] Adjusted the message for tmp-dir Added my name in Author --- AUTHORS | 1 + src/pdf2htmlEX.cc | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index 6002472..d6fd385 100644 --- a/AUTHORS +++ b/AUTHORS @@ -13,6 +13,7 @@ John Hewson Michele Redolfi Mick Giles Ryan Morlok +Marc Sanfacon Packagers: Arthur Titeica diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 02c682d..8869532 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -210,7 +210,7 @@ void parse_options (int argc, char **argv) // misc. .add("clean-tmp", ¶m.clean_tmp, 1, "remove temporary files after conversion") - .add("base-tmp-dir", ¶m.basetmp_dir, param.basetmp_dir, "base temporary directory - will create pdf2htmlEX-XXXXXX under it") + .add("tmp-dir", ¶m.basetmp_dir, param.basetmp_dir, "specify the location of tempory directory.") .add("data-dir", ¶m.data_dir, param.data_dir, "specify data directory") // TODO: css drawings are hidden on print, for annot links, need to fix it for other drawings // .add("css-draw", ¶m.css_draw, 0, "[experimental and unsupported] CSS drawing") @@ -360,6 +360,7 @@ void check_param() int main(int argc, char **argv) { + // We need to adjust these directories before parsing the options. #ifndef _WIN32 param.basetmp_dir = "/tmp"; param.data_dir = PDF2HTMLEX_DATA_PATH; From 5f388ad147ffe75dc089118583e21b7377196d9b Mon Sep 17 00:00:00 2001 From: Marc Sanfacon Date: Fri, 6 Dec 2013 11:17:43 -0500 Subject: [PATCH 10/16] Use _mkdir in Windows --- src/util/path.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/path.cc b/src/util/path.cc index 6b28e3c..0f51399 100644 --- a/src/util/path.cc +++ b/src/util/path.cc @@ -15,7 +15,7 @@ #ifdef _WIN32 # include # define STAT _stat -# define MKDIR(A, B) mkdir(A) +# define MKDIR(A, B) _mkdir(A) #else # define STAT stat # define MKDIR(A, B) mkdir(A, B) From 4ec707d50ab0facbe80c92601e99c820ba7b7cfb Mon Sep 17 00:00:00 2001 From: Marc Sanfacon Date: Sat, 7 Dec 2013 16:58:02 -0500 Subject: [PATCH 11/16] Reorder contributors --- AUTHORS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AUTHORS b/AUTHORS index d6fd385..7cce796 100644 --- a/AUTHORS +++ b/AUTHORS @@ -10,10 +10,10 @@ hasufell Herbert Jones Hongliang Tian John Hewson +Marc Sanfacon Michele Redolfi Mick Giles Ryan Morlok -Marc Sanfacon Packagers: Arthur Titeica From 039e528d0d4ec425f76b8700074765f147432ed8 Mon Sep 17 00:00:00 2001 From: Marc Sanfacon Date: Wed, 11 Dec 2013 11:00:35 -0500 Subject: [PATCH 12/16] Removed MINGW specific link libraries --- CMakeLists.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 09ea6d4..39663a5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -130,11 +130,6 @@ else() set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${PYTHON_LIBRARIES}) endif() -# Add additional dependencies -if(MINGW) -set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} intl iconv gettextlib gettextpo gutils png jpeg openjpeg glib-2.0.dll z xml2 tiff gio-2.0.dll ltdl plibc.dll gunicode) -endif() - # debug build flags (overwrite default cmake debug flags) set(CMAKE_C_FLAGS_DEBUG "-ggdb -pg") set(CMAKE_CXX_FLAGS_DEBUG "-ggdb -pg") From 42fea5b1ad50745ed167c1eaffc75965bd1d0b68 Mon Sep 17 00:00:00 2001 From: Marc Sanfacon Date: Sat, 14 Dec 2013 17:38:02 -0500 Subject: [PATCH 13/16] Renamed output-size-limit to tmp-file-size-limit Added information to man --- pdf2htmlEX.1.in | 11 +++++++++++ src/pdf2htmlEX.cc | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pdf2htmlEX.1.in b/pdf2htmlEX.1.in index 0f49153..e1b0b78 100644 --- a/pdf2htmlEX.1.in +++ b/pdf2htmlEX.1.in @@ -82,6 +82,13 @@ This switch is useful if you want pages to be loaded separately & dynamically -- Also see --page-filename. +.TP +.B --tmp-file-size-limit (Default: -1) +This limits the total size of the temporary files which will also limit the total size of the output file. +This is an estimate and it will stop after a page, once the total temporary files size is greater than this number. + +-1 means no limit and is the default. + .TP .B --dest-dir (Default: .) Specify destination folder. @@ -262,6 +269,10 @@ If switched off, intermediate files won't be cleaned in the end. .B --data-dir (Default: @CMAKE_INSTALL_PREFIX@/share/pdf2htmlEX) Specify the folder holding the manifest and other files (see below for the manifest file)` +.TP +.B --tmp-dir (Default: /tmp) +Specify the temporary folder to use for temporary files + .TP .B --css-draw <0|1> (Default: 0) Experimental and unsupported CSS drawing diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 8869532..9d55e0c 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -169,7 +169,7 @@ void parse_options (int argc, char **argv) .add("embed-image", ¶m.embed_image, 1, "embed image files into output") .add("embed-javascript", ¶m.embed_javascript, 1, "embed JavaScript files into output") .add("embed-outline", ¶m.embed_outline, 1, "embed outlines into output") - .add("output-size-limit", ¶m.max_size, -1, "Limit the output size, in KB (-1 for no limit). This is only an estimate, the output may be bigger") + .add("tmp-file-size-limit", ¶m.max_size, -1, "Limit the temporary file output size, in KB (-1 for no limit). This is only an estimate, the output may be bigger") .add("split-pages", ¶m.split_pages, 0, "split pages into separate files") .add("dest-dir", ¶m.dest_dir, ".", "specify destination directory") .add("css-filename", ¶m.css_filename, "", "filename of the generated css file") From defc1738be146288cac8be870559b91a7f71f2cc Mon Sep 17 00:00:00 2001 From: Marc Sanfacon Date: Fri, 27 Dec 2013 13:52:07 -0500 Subject: [PATCH 14/16] + quiet parameter --- CMakeLists.txt | 2 +- src/HTMLRenderer/font.cc | 58 ++++++++++++++++++------------------- src/HTMLRenderer/general.cc | 20 +++++++++---- src/Param.h | 1 + src/Preprocessor.cc | 16 ++++++---- src/pdf2htmlEX.cc | 1 + 6 files changed, 56 insertions(+), 42 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 39663a5..9011200 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -158,7 +158,7 @@ endif() include(CheckCXXCompilerFlag) check_cxx_compiler_flag("${CMAKE_CXX_FLAGS}" CXX0X_SUPPORT) if(NOT CXX0X_SUPPORT) - message(FATAL_ERROR "Error: you compiler does not support C++0x, please update it.") + message(FATAL_ERROR "Error: your compiler does not support C++0x, please update it.") endif() diff --git a/src/HTMLRenderer/font.cc b/src/HTMLRenderer/font.cc index 4599297..850ff16 100644 --- a/src/HTMLRenderer/font.cc +++ b/src/HTMLRenderer/font.cc @@ -61,7 +61,7 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, FontInfo & info) try { - // inspired by mupdf + // inspired by mupdf string subtype; auto * id = font->getID(); @@ -130,7 +130,7 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, FontInfo & info) } } else if (dict->lookup("FontFile2", &obj)->isStream()) - { + { suffix = ".ttf"; } else if (dict->lookup("FontFile", &obj)->isStream()) @@ -166,9 +166,9 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, FontInfo & info) } obj.streamClose(); } - catch(int) + catch(int) { - cerr << "Someting wrong when trying to dump font " << hex << fn_id << dec << endl; + cerr << "Something wrong when trying to dump font " << hex << fn_id << dec << endl; } obj2.free(); @@ -191,7 +191,7 @@ string HTMLRenderer::dump_type3_font (GfxFont * font, FontInfo & info) FT_Library ft_lib; FT_Init_FreeType(&ft_lib); - CairoFontEngine font_engine(ft_lib); + CairoFontEngine font_engine(ft_lib); #if POPPLER_OLDER_THAN_0_23_0 auto * cur_font = font_engine.getFont(font, cur_doc, true); #else @@ -303,7 +303,7 @@ string HTMLRenderer::dump_type3_font (GfxFont * font, FontInfo & info) cairo_matrix_init_translate(&m, 0.0, transformed_bbox_height); cairo_matrix_multiply(&ctm, &ctm, &m); - // scale up + // scale up cairo_matrix_init_scale(&m, scale, scale); cairo_matrix_multiply(&ctm, &ctm, &m); @@ -327,7 +327,7 @@ string HTMLRenderer::dump_type3_font (GfxFont * font, FontInfo & info) box.y1 = font_bbox[1]; box.x2 = font_bbox[2]; box.y2 = font_bbox[3]; - auto gfx = new Gfx(cur_doc, output_dev, + auto gfx = new Gfx(cur_doc, output_dev, ((Gfx8BitFont*)font)->getResources(), &box, nullptr); output_dev->startDoc(cur_doc, &font_engine); @@ -449,7 +449,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo * later we will map GID (instead of char code) to Unicode * * for CID + nonTrueType - * Flatten the font + * Flatten the font * * for CID Truetype * same as 8bitTrueType, except for that we have to check 65536 charcodes @@ -502,7 +502,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo { if(nameset.insert(string(cn)).second) { - cur_mapping2[i] = cn; + cur_mapping2[i] = cn; } else { @@ -561,7 +561,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo * * -> For 8bit nonTruetype fonts: * Try to calculate the correct Unicode value from the glyph names, when collision is detected in ToUnicode Map - * + * * - Fill in the width_list, and set widths accordingly */ @@ -600,7 +600,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo /* * Skip glyphs without names (only for non-ttf fonts) */ - if(!is_truetype && (font_8bit != nullptr) + if(!is_truetype && (font_8bit != nullptr) && (font_8bit->getCharName(cur_code) == nullptr)) { continue; @@ -671,7 +671,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo } else { - char buf[2]; + char buf[2]; buf[0] = (cur_code >> 8) & 0xff; buf[1] = (cur_code & 0xff); cur_width = font_cid->getWidth(buf, 2) ; @@ -694,7 +694,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo info.space_width = cur_width; has_space = true; } - + width_list[mapped_code] = (int)floor(cur_width * info.em_size + 0.5); } @@ -705,7 +705,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo } ffw_set_widths(width_list, max_key + 1, param.stretch_narrow_glyph, param.squeeze_wide_glyph); - + ffw_reencode_raw(cur_mapping, max_key + 1, 1); // In some space offsets in HTML, we insert a ' ' there in order to improve text copy&paste @@ -769,13 +769,13 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo */ bool hinted = false; - // Call external hinting program if specified + // Call external hinting program if specified if(param.external_hint_tool != "") { hinted = (system((char*)str_fmt("%s \"%s\" \"%s\"", param.external_hint_tool.c_str(), cur_tmp_fn.c_str(), other_tmp_fn.c_str())) == 0); } - // Call internal hinting procedure if specified + // Call internal hinting procedure if specified if((!hinted) && (param.auto_hint)) { ffw_load_font(cur_tmp_fn.c_str()); @@ -790,14 +790,14 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo swap(cur_tmp_fn, other_tmp_fn); } - /* - * Step 5 + /* + * Step 5 * Generate the font, load the metrics and set the embeddig bits (fstype) * * Ascent/Descent are not used in PDF, and the values in PDF may be wrong or inconsistent (there are 3 sets of them) * We need to reload in order to retrieve/fix accurate ascent/descent, some info won't be written to the font by fontforge until saved. */ - string fn = (char*)str_fmt("%s/f%llx.%s", + string fn = (char*)str_fmt("%s/f%llx.%s", (param.embed_font ? param.tmp_dir : param.dest_dir).c_str(), info.id, param.font_format.c_str()); @@ -818,14 +818,14 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo const FontInfo * HTMLRenderer::install_font(GfxFont * font) { assert(sizeof(long long) == 2*sizeof(int)); - + long long fn_id = (font == nullptr) ? 0 : hash_ref(font->getID()); auto iter = font_info_map.find(fn_id); if(iter != font_info_map.end()) return &(iter->second); - long long new_fn_id = font_info_map.size(); + long long new_fn_id = font_info_map.size(); auto cur_info_iter = font_info_map.insert(make_pair(fn_id, FontInfo())).first; @@ -854,7 +854,7 @@ const FontInfo * HTMLRenderer::install_font(GfxFont * font) if(param.debug) { cerr << "Install font " << hex << new_fn_id << dec - << ": (" << (font->getID()->num) << ' ' << (font->getID()->gen) << ") " + << ": (" << (font->getID()->num) << ' ' << (font->getID()->gen) << ") " << (font->getName() ? font->getName()->getCString() : "") << endl; } @@ -895,7 +895,7 @@ const FontInfo * HTMLRenderer::install_font(GfxFont * font) install_embedded_font(font, new_font_info); break; case gfxFontLocResident: - std::cerr << "Warning: Base 14 fonts should not be specially handled now. Please report a bug!" << std::endl; + cerr << "Warning: Base 14 fonts should not be specially handled now. Please report a bug!" << std::endl; /* fall through */ case gfxFontLocExternal: install_external_font(font, new_font_info); @@ -904,14 +904,14 @@ const FontInfo * HTMLRenderer::install_font(GfxFont * font) cerr << "TODO: other font loc" << endl; export_remote_default_font(new_fn_id); break; - } + } delete font_loc; } else { export_remote_default_font(new_fn_id); } - + return &new_font_info; } @@ -935,7 +935,7 @@ void HTMLRenderer::install_external_font(GfxFont * font, FontInfo & info) string fontname(font->getName()->getCString()); // resolve bad encodings in GB - auto iter = GB_ENCODED_FONT_NAME_MAP.find(fontname); + auto iter = GB_ENCODED_FONT_NAME_MAP.find(fontname); if(iter != GB_ENCODED_FONT_NAME_MAP.end()) { fontname = iter->second; @@ -1039,7 +1039,7 @@ void HTMLRenderer::export_remote_font(const FontInfo & info, const string & form << "font-style:normal;" << "font-weight:normal;" << "visibility:visible;" - << "}" + << "}" << endl; } @@ -1054,12 +1054,12 @@ static string general_font_family(GfxFont * font) } // TODO: this function is called when some font is unable to process, may use the name there as a hint -void HTMLRenderer::export_remote_default_font(long long fn_id) +void HTMLRenderer::export_remote_default_font(long long fn_id) { f_css.fs << "." << CSS::FONT_FAMILY_CN << fn_id << "{font-family:sans-serif;visibility:hidden;}" << endl; } -void HTMLRenderer::export_local_font(const FontInfo & info, GfxFont * font, const string & original_font_name, const string & cssfont) +void HTMLRenderer::export_local_font(const FontInfo & info, GfxFont * font, const string & original_font_name, const string & cssfont) { f_css.fs << "." << CSS::FONT_FAMILY_CN << info.id << "{"; f_css.fs << "font-family:" << ((cssfont == "") ? (original_font_name + "," + general_font_family(font)) : cssfont) << ";"; diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 94cf02d..18e1d9b 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -110,11 +110,15 @@ void HTMLRenderer::process(PDFDoc *doc) for(int i = param.first_page; i <= param.last_page ; ++i) { if (param.max_size != -1 && tmp_files.get_total_size() > param.max_size * 1024) { - cerr << "Stop processing, reach max size\n"; + if (!param.quiet) { + cerr << "Stop processing, reach max size\n"; + } break; } - cerr << "Working: " << (i-param.first_page) << "/" << page_count << '\r' << flush; + if (!param.quiet) { + cerr << "Working: " << (i-param.first_page) << "/" << page_count << '\r' << flush; + } if(param.split_pages) { @@ -147,9 +151,11 @@ void HTMLRenderer::process(PDFDoc *doc) f_curpage = nullptr; } } - if(page_count >= 0) - cerr << "Working: " << page_count << "/" << page_count; - cerr << endl; + if (!param.quiet) { + if (page_count >= 0) + cerr << "Working: " << page_count << "/" << page_count; + cerr << endl; + } //////////////////////// // Process Outline @@ -164,7 +170,9 @@ void HTMLRenderer::process(PDFDoc *doc) bg_renderer = nullptr; } - cerr << endl; + if (!param.quiet) { + cerr << endl; + } } void HTMLRenderer::setDefaultCTM(double *ctm) diff --git a/src/Param.h b/src/Param.h index 4816f72..4d2013c 100644 --- a/src/Param.h +++ b/src/Param.h @@ -72,6 +72,7 @@ struct Param std::string data_dir; std::string basetmp_dir; int css_draw; + int quiet; int debug; std::string input_filename, output_filename; diff --git a/src/Preprocessor.cc b/src/Preprocessor.cc index 075074e..cf98c5e 100644 --- a/src/Preprocessor.cc +++ b/src/Preprocessor.cc @@ -43,20 +43,24 @@ Preprocessor::~Preprocessor(void) void Preprocessor::process(PDFDoc * doc) { int page_count = (param.last_page - param.first_page + 1); - for(int i = param.first_page; i <= param.last_page ; ++i) + for(int i = param.first_page; i <= param.last_page ; ++i) { - cerr << "Preprocessing: " << (i-param.first_page) << "/" << page_count << '\r' << flush; + if (!param.quiet) { + cerr << "Preprocessing: " << (i-param.first_page) << "/" << page_count << '\r' << flush; + } doc->displayPage(this, i, DEFAULT_DPI, DEFAULT_DPI, - 0, + 0, (!(param.use_cropbox)), true, // crop false, // printing nullptr, nullptr, nullptr, nullptr); } - if(page_count >= 0) - cerr << "Preprocessing: " << page_count << "/" << page_count; - cerr << endl; + if (!param.quiet) { + if(page_count >= 0) + cerr << "Preprocessing: " << page_count << "/" << page_count; + cerr << endl; + } } void Preprocessor::drawChar(GfxState *state, double x, double y, diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 9d55e0c..02b1c11 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -214,6 +214,7 @@ void parse_options (int argc, char **argv) .add("data-dir", ¶m.data_dir, param.data_dir, "specify data directory") // TODO: css drawings are hidden on print, for annot links, need to fix it for other drawings // .add("css-draw", ¶m.css_draw, 0, "[experimental and unsupported] CSS drawing") + .add("quiet", ¶m.quiet, 0, "do not output information") .add("debug", ¶m.debug, 0, "print debugging information") // meta From 652b40971ac18885e4be4d330c595acf04b5f675 Mon Sep 17 00:00:00 2001 From: Marc Sanfacon Date: Wed, 8 Jan 2014 10:50:32 -0500 Subject: [PATCH 15/16] Revert "+ quiet parameter" This reverts commit defc1738be146288cac8be870559b91a7f71f2cc. --- CMakeLists.txt | 2 +- src/HTMLRenderer/font.cc | 58 ++++++++++++++++++------------------- src/HTMLRenderer/general.cc | 20 ++++--------- src/Param.h | 1 - src/Preprocessor.cc | 16 ++++------ src/pdf2htmlEX.cc | 1 - 6 files changed, 42 insertions(+), 56 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9011200..39663a5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -158,7 +158,7 @@ endif() include(CheckCXXCompilerFlag) check_cxx_compiler_flag("${CMAKE_CXX_FLAGS}" CXX0X_SUPPORT) if(NOT CXX0X_SUPPORT) - message(FATAL_ERROR "Error: your compiler does not support C++0x, please update it.") + message(FATAL_ERROR "Error: you compiler does not support C++0x, please update it.") endif() diff --git a/src/HTMLRenderer/font.cc b/src/HTMLRenderer/font.cc index 850ff16..4599297 100644 --- a/src/HTMLRenderer/font.cc +++ b/src/HTMLRenderer/font.cc @@ -61,7 +61,7 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, FontInfo & info) try { - // inspired by mupdf + // inspired by mupdf string subtype; auto * id = font->getID(); @@ -130,7 +130,7 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, FontInfo & info) } } else if (dict->lookup("FontFile2", &obj)->isStream()) - { + { suffix = ".ttf"; } else if (dict->lookup("FontFile", &obj)->isStream()) @@ -166,9 +166,9 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, FontInfo & info) } obj.streamClose(); } - catch(int) + catch(int) { - cerr << "Something wrong when trying to dump font " << hex << fn_id << dec << endl; + cerr << "Someting wrong when trying to dump font " << hex << fn_id << dec << endl; } obj2.free(); @@ -191,7 +191,7 @@ string HTMLRenderer::dump_type3_font (GfxFont * font, FontInfo & info) FT_Library ft_lib; FT_Init_FreeType(&ft_lib); - CairoFontEngine font_engine(ft_lib); + CairoFontEngine font_engine(ft_lib); #if POPPLER_OLDER_THAN_0_23_0 auto * cur_font = font_engine.getFont(font, cur_doc, true); #else @@ -303,7 +303,7 @@ string HTMLRenderer::dump_type3_font (GfxFont * font, FontInfo & info) cairo_matrix_init_translate(&m, 0.0, transformed_bbox_height); cairo_matrix_multiply(&ctm, &ctm, &m); - // scale up + // scale up cairo_matrix_init_scale(&m, scale, scale); cairo_matrix_multiply(&ctm, &ctm, &m); @@ -327,7 +327,7 @@ string HTMLRenderer::dump_type3_font (GfxFont * font, FontInfo & info) box.y1 = font_bbox[1]; box.x2 = font_bbox[2]; box.y2 = font_bbox[3]; - auto gfx = new Gfx(cur_doc, output_dev, + auto gfx = new Gfx(cur_doc, output_dev, ((Gfx8BitFont*)font)->getResources(), &box, nullptr); output_dev->startDoc(cur_doc, &font_engine); @@ -449,7 +449,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo * later we will map GID (instead of char code) to Unicode * * for CID + nonTrueType - * Flatten the font + * Flatten the font * * for CID Truetype * same as 8bitTrueType, except for that we have to check 65536 charcodes @@ -502,7 +502,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo { if(nameset.insert(string(cn)).second) { - cur_mapping2[i] = cn; + cur_mapping2[i] = cn; } else { @@ -561,7 +561,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo * * -> For 8bit nonTruetype fonts: * Try to calculate the correct Unicode value from the glyph names, when collision is detected in ToUnicode Map - * + * * - Fill in the width_list, and set widths accordingly */ @@ -600,7 +600,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo /* * Skip glyphs without names (only for non-ttf fonts) */ - if(!is_truetype && (font_8bit != nullptr) + if(!is_truetype && (font_8bit != nullptr) && (font_8bit->getCharName(cur_code) == nullptr)) { continue; @@ -671,7 +671,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo } else { - char buf[2]; + char buf[2]; buf[0] = (cur_code >> 8) & 0xff; buf[1] = (cur_code & 0xff); cur_width = font_cid->getWidth(buf, 2) ; @@ -694,7 +694,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo info.space_width = cur_width; has_space = true; } - + width_list[mapped_code] = (int)floor(cur_width * info.em_size + 0.5); } @@ -705,7 +705,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo } ffw_set_widths(width_list, max_key + 1, param.stretch_narrow_glyph, param.squeeze_wide_glyph); - + ffw_reencode_raw(cur_mapping, max_key + 1, 1); // In some space offsets in HTML, we insert a ' ' there in order to improve text copy&paste @@ -769,13 +769,13 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo */ bool hinted = false; - // Call external hinting program if specified + // Call external hinting program if specified if(param.external_hint_tool != "") { hinted = (system((char*)str_fmt("%s \"%s\" \"%s\"", param.external_hint_tool.c_str(), cur_tmp_fn.c_str(), other_tmp_fn.c_str())) == 0); } - // Call internal hinting procedure if specified + // Call internal hinting procedure if specified if((!hinted) && (param.auto_hint)) { ffw_load_font(cur_tmp_fn.c_str()); @@ -790,14 +790,14 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo swap(cur_tmp_fn, other_tmp_fn); } - /* - * Step 5 + /* + * Step 5 * Generate the font, load the metrics and set the embeddig bits (fstype) * * Ascent/Descent are not used in PDF, and the values in PDF may be wrong or inconsistent (there are 3 sets of them) * We need to reload in order to retrieve/fix accurate ascent/descent, some info won't be written to the font by fontforge until saved. */ - string fn = (char*)str_fmt("%s/f%llx.%s", + string fn = (char*)str_fmt("%s/f%llx.%s", (param.embed_font ? param.tmp_dir : param.dest_dir).c_str(), info.id, param.font_format.c_str()); @@ -818,14 +818,14 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo const FontInfo * HTMLRenderer::install_font(GfxFont * font) { assert(sizeof(long long) == 2*sizeof(int)); - + long long fn_id = (font == nullptr) ? 0 : hash_ref(font->getID()); auto iter = font_info_map.find(fn_id); if(iter != font_info_map.end()) return &(iter->second); - long long new_fn_id = font_info_map.size(); + long long new_fn_id = font_info_map.size(); auto cur_info_iter = font_info_map.insert(make_pair(fn_id, FontInfo())).first; @@ -854,7 +854,7 @@ const FontInfo * HTMLRenderer::install_font(GfxFont * font) if(param.debug) { cerr << "Install font " << hex << new_fn_id << dec - << ": (" << (font->getID()->num) << ' ' << (font->getID()->gen) << ") " + << ": (" << (font->getID()->num) << ' ' << (font->getID()->gen) << ") " << (font->getName() ? font->getName()->getCString() : "") << endl; } @@ -895,7 +895,7 @@ const FontInfo * HTMLRenderer::install_font(GfxFont * font) install_embedded_font(font, new_font_info); break; case gfxFontLocResident: - cerr << "Warning: Base 14 fonts should not be specially handled now. Please report a bug!" << std::endl; + std::cerr << "Warning: Base 14 fonts should not be specially handled now. Please report a bug!" << std::endl; /* fall through */ case gfxFontLocExternal: install_external_font(font, new_font_info); @@ -904,14 +904,14 @@ const FontInfo * HTMLRenderer::install_font(GfxFont * font) cerr << "TODO: other font loc" << endl; export_remote_default_font(new_fn_id); break; - } + } delete font_loc; } else { export_remote_default_font(new_fn_id); } - + return &new_font_info; } @@ -935,7 +935,7 @@ void HTMLRenderer::install_external_font(GfxFont * font, FontInfo & info) string fontname(font->getName()->getCString()); // resolve bad encodings in GB - auto iter = GB_ENCODED_FONT_NAME_MAP.find(fontname); + auto iter = GB_ENCODED_FONT_NAME_MAP.find(fontname); if(iter != GB_ENCODED_FONT_NAME_MAP.end()) { fontname = iter->second; @@ -1039,7 +1039,7 @@ void HTMLRenderer::export_remote_font(const FontInfo & info, const string & form << "font-style:normal;" << "font-weight:normal;" << "visibility:visible;" - << "}" + << "}" << endl; } @@ -1054,12 +1054,12 @@ static string general_font_family(GfxFont * font) } // TODO: this function is called when some font is unable to process, may use the name there as a hint -void HTMLRenderer::export_remote_default_font(long long fn_id) +void HTMLRenderer::export_remote_default_font(long long fn_id) { f_css.fs << "." << CSS::FONT_FAMILY_CN << fn_id << "{font-family:sans-serif;visibility:hidden;}" << endl; } -void HTMLRenderer::export_local_font(const FontInfo & info, GfxFont * font, const string & original_font_name, const string & cssfont) +void HTMLRenderer::export_local_font(const FontInfo & info, GfxFont * font, const string & original_font_name, const string & cssfont) { f_css.fs << "." << CSS::FONT_FAMILY_CN << info.id << "{"; f_css.fs << "font-family:" << ((cssfont == "") ? (original_font_name + "," + general_font_family(font)) : cssfont) << ";"; diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 18e1d9b..94cf02d 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -110,15 +110,11 @@ void HTMLRenderer::process(PDFDoc *doc) for(int i = param.first_page; i <= param.last_page ; ++i) { if (param.max_size != -1 && tmp_files.get_total_size() > param.max_size * 1024) { - if (!param.quiet) { - cerr << "Stop processing, reach max size\n"; - } + cerr << "Stop processing, reach max size\n"; break; } - if (!param.quiet) { - cerr << "Working: " << (i-param.first_page) << "/" << page_count << '\r' << flush; - } + cerr << "Working: " << (i-param.first_page) << "/" << page_count << '\r' << flush; if(param.split_pages) { @@ -151,11 +147,9 @@ void HTMLRenderer::process(PDFDoc *doc) f_curpage = nullptr; } } - if (!param.quiet) { - if (page_count >= 0) - cerr << "Working: " << page_count << "/" << page_count; - cerr << endl; - } + if(page_count >= 0) + cerr << "Working: " << page_count << "/" << page_count; + cerr << endl; //////////////////////// // Process Outline @@ -170,9 +164,7 @@ void HTMLRenderer::process(PDFDoc *doc) bg_renderer = nullptr; } - if (!param.quiet) { - cerr << endl; - } + cerr << endl; } void HTMLRenderer::setDefaultCTM(double *ctm) diff --git a/src/Param.h b/src/Param.h index 4d2013c..4816f72 100644 --- a/src/Param.h +++ b/src/Param.h @@ -72,7 +72,6 @@ struct Param std::string data_dir; std::string basetmp_dir; int css_draw; - int quiet; int debug; std::string input_filename, output_filename; diff --git a/src/Preprocessor.cc b/src/Preprocessor.cc index cf98c5e..075074e 100644 --- a/src/Preprocessor.cc +++ b/src/Preprocessor.cc @@ -43,24 +43,20 @@ Preprocessor::~Preprocessor(void) void Preprocessor::process(PDFDoc * doc) { int page_count = (param.last_page - param.first_page + 1); - for(int i = param.first_page; i <= param.last_page ; ++i) + for(int i = param.first_page; i <= param.last_page ; ++i) { - if (!param.quiet) { - cerr << "Preprocessing: " << (i-param.first_page) << "/" << page_count << '\r' << flush; - } + cerr << "Preprocessing: " << (i-param.first_page) << "/" << page_count << '\r' << flush; doc->displayPage(this, i, DEFAULT_DPI, DEFAULT_DPI, - 0, + 0, (!(param.use_cropbox)), true, // crop false, // printing nullptr, nullptr, nullptr, nullptr); } - if (!param.quiet) { - if(page_count >= 0) - cerr << "Preprocessing: " << page_count << "/" << page_count; - cerr << endl; - } + if(page_count >= 0) + cerr << "Preprocessing: " << page_count << "/" << page_count; + cerr << endl; } void Preprocessor::drawChar(GfxState *state, double x, double y, diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 02b1c11..9d55e0c 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -214,7 +214,6 @@ void parse_options (int argc, char **argv) .add("data-dir", ¶m.data_dir, param.data_dir, "specify data directory") // TODO: css drawings are hidden on print, for annot links, need to fix it for other drawings // .add("css-draw", ¶m.css_draw, 0, "[experimental and unsupported] CSS drawing") - .add("quiet", ¶m.quiet, 0, "do not output information") .add("debug", ¶m.debug, 0, "print debugging information") // meta From 5fab160e05cdf5fd8acfc20388687449f4b8c78f Mon Sep 17 00:00:00 2001 From: Marc Sanfacon Date: Thu, 9 Jan 2014 08:14:12 -0500 Subject: [PATCH 16/16] Modifications following code review Fixed rmdir under Windows/MINGW --- CMakeLists.txt | 2 +- src/HTMLRenderer/general.cc | 2 +- src/Param.h | 7 ++----- src/TmpFiles.cc | 5 ++++- src/pdf2htmlEX.cc | 12 ++++++------ 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 39663a5..9011200 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -158,7 +158,7 @@ endif() include(CheckCXXCompilerFlag) check_cxx_compiler_flag("${CMAKE_CXX_FLAGS}" CXX0X_SUPPORT) if(NOT CXX0X_SUPPORT) - message(FATAL_ERROR "Error: you compiler does not support C++0x, please update it.") + message(FATAL_ERROR "Error: your compiler does not support C++0x, please update it.") endif() diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 94cf02d..9c85a97 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -109,7 +109,7 @@ void HTMLRenderer::process(PDFDoc *doc) int page_count = (param.last_page - param.first_page + 1); for(int i = param.first_page; i <= param.last_page ; ++i) { - if (param.max_size != -1 && tmp_files.get_total_size() > param.max_size * 1024) { + if (param.tmp_file_size_limit != -1 && tmp_files.get_total_size() > param.tmp_file_size_limit * 1024) { cerr << "Stop processing, reach max size\n"; break; } diff --git a/src/Param.h b/src/Param.h index 4816f72..dc08c6a 100644 --- a/src/Param.h +++ b/src/Param.h @@ -31,7 +31,7 @@ struct Param int embed_javascript; int embed_outline; int split_pages; - int max_size; + int tmp_file_size_limit; std::string dest_dir; std::string css_filename; std::string page_filename; @@ -70,14 +70,11 @@ struct Param // misc. int clean_tmp; std::string data_dir; - std::string basetmp_dir; + std::string tmp_dir; int css_draw; int debug; std::string input_filename, output_filename; - - // not a paramater - std::string tmp_dir; }; } // namespace pdf2htmlEX diff --git a/src/TmpFiles.cc b/src/TmpFiles.cc index 903b206..ac143bd 100644 --- a/src/TmpFiles.cc +++ b/src/TmpFiles.cc @@ -18,8 +18,11 @@ using namespace std; #ifndef _WIN32 # define STAT stat +# define RMDIR rmdir #else +# include # define STAT _stat +# define RMDIR _rmdir #endif namespace pdf2htmlEX { @@ -56,7 +59,7 @@ void TmpFiles::clean() cerr << "Remove temporary file: " << fn << endl; } - remove(param.tmp_dir.c_str()); + RMDIR(param.tmp_dir.c_str()); if(param.debug) cerr << "Remove temporary directory: " << param.tmp_dir << endl; } diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 9d55e0c..3c316ac 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -114,11 +114,11 @@ void embed_parser (const char * str) void prepare_directories() { - std::string tmp_dir = param.basetmp_dir + "/pdf2htmlEX-XXXXXX"; + std::string tmp_dir = param.tmp_dir + "/pdf2htmlEX-XXXXXX"; #ifndef _WIN32 errno = 0; - auto_ptr pBuf(new char[tmp_dir.size() + 1]); + unique_ptr pBuf(new char[tmp_dir.size() + 1]); strcpy(pBuf.get(), tmp_dir.c_str()); auto p = mkdtemp(pBuf.get()); if(p == nullptr) @@ -169,7 +169,7 @@ void parse_options (int argc, char **argv) .add("embed-image", ¶m.embed_image, 1, "embed image files into output") .add("embed-javascript", ¶m.embed_javascript, 1, "embed JavaScript files into output") .add("embed-outline", ¶m.embed_outline, 1, "embed outlines into output") - .add("tmp-file-size-limit", ¶m.max_size, -1, "Limit the temporary file output size, in KB (-1 for no limit). This is only an estimate, the output may be bigger") + .add("tmp-file-size-limit", ¶m.tmp_file_size_limit, -1, "Limit the temporary file output size, in KB (-1 for no limit). This is only an estimate, the output may be bigger") .add("split-pages", ¶m.split_pages, 0, "split pages into separate files") .add("dest-dir", ¶m.dest_dir, ".", "specify destination directory") .add("css-filename", ¶m.css_filename, "", "filename of the generated css file") @@ -210,7 +210,7 @@ void parse_options (int argc, char **argv) // misc. .add("clean-tmp", ¶m.clean_tmp, 1, "remove temporary files after conversion") - .add("tmp-dir", ¶m.basetmp_dir, param.basetmp_dir, "specify the location of tempory directory.") + .add("tmp-dir", ¶m.tmp_dir, param.tmp_dir, "specify the location of tempory directory.") .add("data-dir", ¶m.data_dir, param.data_dir, "specify data directory") // TODO: css drawings are hidden on print, for annot links, need to fix it for other drawings // .add("css-draw", ¶m.css_draw, 0, "[experimental and unsupported] CSS drawing") @@ -362,7 +362,7 @@ int main(int argc, char **argv) { // We need to adjust these directories before parsing the options. #ifndef _WIN32 - param.basetmp_dir = "/tmp"; + param.tmp_dir = "/tmp"; param.data_dir = PDF2HTMLEX_DATA_PATH; #else { @@ -374,7 +374,7 @@ int main(int argc, char **argv) // Under Windows, the temp path is not under /tmp, find it. char temppath[MAX_PATH]; ::GetTempPath(MAX_PATH, temppath); - param.basetmp_dir = temppath; + param.tmp_dir = temppath; } #endif