mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-21 12:40:08 +00:00
Merge pull request #254 from marcsanfacon/master
MinGW Port & 2 new options
This commit is contained in:
commit
ae7c8e8d9c
1
AUTHORS
1
AUTHORS
@ -11,6 +11,7 @@ hasufell <julian.ospald@googlemail.com>
|
||||
Herbert Jones <herbert@mediafire.com>
|
||||
Hongliang Tian <tatetian@gmail.com>
|
||||
John Hewson <john@jahewson.com>
|
||||
Marc Sanfacon <marc.sanfacon@gmail.com>
|
||||
Michele Redolfi <michele@tecnicaict.com>
|
||||
Mick Giles <mick@mickgiles.com>
|
||||
Ryan Morlok <ryan.morlok@morlok.com>
|
||||
|
@ -130,7 +130,6 @@ else()
|
||||
set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${PYTHON_LIBRARIES})
|
||||
endif()
|
||||
|
||||
|
||||
# debug build flags (overwrite default cmake debug flags)
|
||||
set(CMAKE_C_FLAGS_DEBUG "-ggdb -pg")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "-ggdb -pg")
|
||||
@ -159,7 +158,7 @@ endif()
|
||||
include(CheckCXXCompilerFlag)
|
||||
check_cxx_compiler_flag("${CMAKE_CXX_FLAGS}" CXX0X_SUPPORT)
|
||||
if(NOT CXX0X_SUPPORT)
|
||||
message(FATAL_ERROR "Error: you compiler does not support C++0x, please update it.")
|
||||
message(FATAL_ERROR "Error: your compiler does not support C++0x, please update it.")
|
||||
endif()
|
||||
|
||||
|
||||
|
@ -82,6 +82,13 @@ This switch is useful if you want pages to be loaded separately & dynamically --
|
||||
|
||||
Also see --page-filename.
|
||||
|
||||
.TP
|
||||
.B --tmp-file-size-limit <limit> (Default: -1)
|
||||
This limits the total size of the temporary files which will also limit the total size of the output file.
|
||||
This is an estimate and it will stop after a page, once the total temporary files size is greater than this number.
|
||||
|
||||
-1 means no limit and is the default.
|
||||
|
||||
.TP
|
||||
.B --dest-dir <dir> (Default: .)
|
||||
Specify destination folder.
|
||||
@ -262,6 +269,10 @@ If switched off, intermediate files won't be cleaned in the end.
|
||||
.B --data-dir <dir> (Default: @CMAKE_INSTALL_PREFIX@/share/pdf2htmlEX)
|
||||
Specify the folder holding the manifest and other files (see below for the manifest file)`
|
||||
|
||||
.TP
|
||||
.B --tmp-dir <dir> (Default: /tmp)
|
||||
Specify the temporary folder to use for temporary files
|
||||
|
||||
.TP
|
||||
.B --css-draw <0|1> (Default: 0)
|
||||
Experimental and unsupported CSS drawing
|
||||
|
@ -109,6 +109,11 @@ void HTMLRenderer::process(PDFDoc *doc)
|
||||
int page_count = (param.last_page - param.first_page + 1);
|
||||
for(int i = param.first_page; i <= param.last_page ; ++i)
|
||||
{
|
||||
if (param.tmp_file_size_limit != -1 && tmp_files.get_total_size() > param.tmp_file_size_limit * 1024) {
|
||||
cerr << "Stop processing, reach max size\n";
|
||||
break;
|
||||
}
|
||||
|
||||
cerr << "Working: " << (i-param.first_page) << "/" << page_count << '\r' << flush;
|
||||
|
||||
if(param.split_pages)
|
||||
@ -394,20 +399,6 @@ void HTMLRenderer::post_process(void)
|
||||
long line_no = 0;
|
||||
while(getline(manifest_fin, line))
|
||||
{
|
||||
++line_no;
|
||||
|
||||
if(line == "\"\"\"")
|
||||
{
|
||||
embed_string = !embed_string;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(embed_string)
|
||||
{
|
||||
output << line << endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
// trim space at both sides
|
||||
{
|
||||
static const char * whitespaces = " \t\n\v\f\r";
|
||||
@ -424,6 +415,20 @@ void HTMLRenderer::post_process(void)
|
||||
}
|
||||
}
|
||||
|
||||
++line_no;
|
||||
|
||||
if(line == "\"\"\"")
|
||||
{
|
||||
embed_string = !embed_string;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(embed_string)
|
||||
{
|
||||
output << line << endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(line.empty() || line[0] == '#')
|
||||
continue;
|
||||
|
||||
|
@ -31,6 +31,7 @@ struct Param
|
||||
int embed_javascript;
|
||||
int embed_outline;
|
||||
int split_pages;
|
||||
int tmp_file_size_limit;
|
||||
std::string dest_dir;
|
||||
std::string css_filename;
|
||||
std::string page_filename;
|
||||
@ -69,13 +70,11 @@ struct Param
|
||||
// misc.
|
||||
int clean_tmp;
|
||||
std::string data_dir;
|
||||
std::string tmp_dir;
|
||||
int css_draw;
|
||||
int debug;
|
||||
|
||||
std::string input_filename, output_filename;
|
||||
|
||||
// not a paramater
|
||||
std::string tmp_dir;
|
||||
};
|
||||
|
||||
} // namespace pdf2htmlEX
|
||||
|
@ -9,12 +9,22 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <cstdio>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "TmpFiles.h"
|
||||
#include "Param.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
#ifndef _WIN32
|
||||
# define STAT stat
|
||||
# define RMDIR rmdir
|
||||
#else
|
||||
# include <direct.h>
|
||||
# define STAT _stat
|
||||
# define RMDIR _rmdir
|
||||
#endif
|
||||
|
||||
namespace pdf2htmlEX {
|
||||
|
||||
|
||||
@ -49,10 +59,23 @@ void TmpFiles::clean()
|
||||
cerr << "Remove temporary file: " << fn << endl;
|
||||
}
|
||||
|
||||
remove(param.tmp_dir.c_str());
|
||||
RMDIR(param.tmp_dir.c_str());
|
||||
if(param.debug)
|
||||
cerr << "Remove temporary directory: " << param.tmp_dir << endl;
|
||||
}
|
||||
|
||||
// Return the total size of the temporary files in bytes
|
||||
double TmpFiles::get_total_size() const
|
||||
{
|
||||
double total_size = 0;
|
||||
struct STAT st;
|
||||
for(auto iter = tmp_files.begin(); iter != tmp_files.end(); ++iter) {
|
||||
STAT(iter->c_str(), &st);
|
||||
total_size += st.st_size;
|
||||
}
|
||||
|
||||
return total_size;
|
||||
}
|
||||
|
||||
} // namespace pdf2htmlEX
|
||||
|
||||
|
@ -14,6 +14,7 @@ public:
|
||||
~TmpFiles();
|
||||
|
||||
void add( const std::string& fn);
|
||||
double get_total_size() const;
|
||||
|
||||
private:
|
||||
void clean();
|
||||
|
@ -42,6 +42,12 @@ using namespace pdf2htmlEX;
|
||||
Param param;
|
||||
ArgParser argparser;
|
||||
|
||||
#ifdef _WIN32
|
||||
# include <iomanip>
|
||||
# include <libgen.h>
|
||||
# include <direct.h>
|
||||
#endif
|
||||
|
||||
void deprecated_font_suffix(const char * dummy = nullptr)
|
||||
{
|
||||
cerr << "--font-suffix is deprecated. Use `--font-format` instead." << endl;
|
||||
@ -65,7 +71,7 @@ void show_version_and_exit(const char * dummy = nullptr)
|
||||
#if ENABLE_SVG
|
||||
cerr << " cairo " << cairo_version_string() << endl;
|
||||
#endif
|
||||
cerr << "Default data-dir: " << PDF2HTMLEX_DATA_PATH << endl;
|
||||
cerr << "Default data-dir: " << param.data_dir << endl;
|
||||
cerr << "Supported image format:";
|
||||
#ifdef ENABLE_LIBPNG
|
||||
cerr << " png";
|
||||
@ -106,6 +112,41 @@ void embed_parser (const char * str)
|
||||
}
|
||||
}
|
||||
|
||||
void prepare_directories()
|
||||
{
|
||||
std::string tmp_dir = param.tmp_dir + "/pdf2htmlEX-XXXXXX";
|
||||
#ifndef _WIN32
|
||||
errno = 0;
|
||||
|
||||
unique_ptr<char> pBuf(new char[tmp_dir.size() + 1]);
|
||||
strcpy(pBuf.get(), tmp_dir.c_str());
|
||||
auto p = mkdtemp(pBuf.get());
|
||||
if(p == nullptr)
|
||||
{
|
||||
const char * errmsg = strerror(errno);
|
||||
if(!errmsg)
|
||||
{
|
||||
errmsg = "unknown error";
|
||||
}
|
||||
cerr << "Cannot create temp directory: " << errmsg << endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
param.tmp_dir = pBuf.get();
|
||||
#else
|
||||
srand((unsigned)time(0));
|
||||
int rand_value = (int)((rand() / ((double)RAND_MAX+1.0)) * 1e6);
|
||||
stringstream ss;
|
||||
ss << setw(6) << rand_value;
|
||||
|
||||
tmp_dir.erase(tmp_dir.size() - 6);
|
||||
param.tmp_dir = tmp_dir + ss.str();
|
||||
if (mkdir(param.tmp_dir.c_str())) {
|
||||
cerr << "Cannot create temp directory (" << param.tmp_dir << "): " << strerror(errno) << endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void parse_options (int argc, char **argv)
|
||||
{
|
||||
argparser
|
||||
@ -128,6 +169,7 @@ void parse_options (int argc, char **argv)
|
||||
.add("embed-image", ¶m.embed_image, 1, "embed image files into output")
|
||||
.add("embed-javascript", ¶m.embed_javascript, 1, "embed JavaScript files into output")
|
||||
.add("embed-outline", ¶m.embed_outline, 1, "embed outlines into output")
|
||||
.add("tmp-file-size-limit", ¶m.tmp_file_size_limit, -1, "Limit the temporary file output size, in KB (-1 for no limit). This is only an estimate, the output may be bigger")
|
||||
.add("split-pages", ¶m.split_pages, 0, "split pages into separate files")
|
||||
.add("dest-dir", ¶m.dest_dir, ".", "specify destination directory")
|
||||
.add("css-filename", ¶m.css_filename, "", "filename of the generated css file")
|
||||
@ -168,7 +210,8 @@ void parse_options (int argc, char **argv)
|
||||
|
||||
// misc.
|
||||
.add("clean-tmp", ¶m.clean_tmp, 1, "remove temporary files after conversion")
|
||||
.add("data-dir", ¶m.data_dir, PDF2HTMLEX_DATA_PATH, "specify data directory")
|
||||
.add("tmp-dir", ¶m.tmp_dir, param.tmp_dir, "specify the location of tempory directory.")
|
||||
.add("data-dir", ¶m.data_dir, param.data_dir, "specify data directory")
|
||||
// TODO: css drawings are hidden on print, for annot links, need to fix it for other drawings
|
||||
// .add("css-draw", ¶m.css_draw, 0, "[experimental and unsupported] CSS drawing")
|
||||
.add("debug", ¶m.debug, 0, "print debugging information")
|
||||
@ -317,29 +360,33 @@ void check_param()
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
// We need to adjust these directories before parsing the options.
|
||||
#ifndef _WIN32
|
||||
param.tmp_dir = "/tmp";
|
||||
param.data_dir = PDF2HTMLEX_DATA_PATH;
|
||||
#else
|
||||
{
|
||||
// Under Windows, the default data_dir is under /data in the pdf2htmlEX directory
|
||||
stringstream ss;
|
||||
ss << dirname(argv[0]) << "/data";
|
||||
param.data_dir = ss.str();
|
||||
|
||||
// Under Windows, the temp path is not under /tmp, find it.
|
||||
char temppath[MAX_PATH];
|
||||
::GetTempPath(MAX_PATH, temppath);
|
||||
param.tmp_dir = temppath;
|
||||
}
|
||||
#endif
|
||||
|
||||
parse_options(argc, argv);
|
||||
check_param();
|
||||
|
||||
//prepare the directories
|
||||
{
|
||||
char buf[] = "/tmp/pdf2htmlEX-XXXXXX";
|
||||
errno = 0;
|
||||
auto p = mkdtemp(buf);
|
||||
if(p == nullptr)
|
||||
{
|
||||
const char * errmsg = strerror(errno);
|
||||
if(!errmsg)
|
||||
{
|
||||
errmsg = "unknown error";
|
||||
}
|
||||
cerr << "Cannot create temp directory: " << errmsg << endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
param.tmp_dir = buf;
|
||||
}
|
||||
prepare_directories();
|
||||
|
||||
if(param.debug)
|
||||
if(param.debug) {
|
||||
cerr << "temporary dir: " << (param.tmp_dir) << endl;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
|
@ -12,6 +12,15 @@
|
||||
|
||||
#include "path.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
# include <direct.h>
|
||||
# define STAT _stat
|
||||
# define MKDIR(A, B) _mkdir(A)
|
||||
#else
|
||||
# define STAT stat
|
||||
# define MKDIR(A, B) mkdir(A, B)
|
||||
#endif
|
||||
|
||||
using std::string;
|
||||
|
||||
namespace pdf2htmlEX {
|
||||
@ -26,13 +35,13 @@ void create_directories(const string & path)
|
||||
create_directories(path.substr(0, idx));
|
||||
}
|
||||
|
||||
int r = mkdir(path.c_str(), S_IRWXU);
|
||||
int r = MKDIR(path.c_str(), S_IRWXU);
|
||||
if(r != 0)
|
||||
{
|
||||
if(errno == EEXIST)
|
||||
{
|
||||
struct stat stat_buf;
|
||||
if((stat(path.c_str(), &stat_buf) == 0) && S_ISDIR(stat_buf.st_mode))
|
||||
struct STAT stat_buf;
|
||||
if((STAT(path.c_str(), &stat_buf) == 0) && S_ISDIR(stat_buf.st_mode))
|
||||
return;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user