1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-21 12:40:08 +00:00

Merge pull request #254 from marcsanfacon/master

MinGW Port & 2 new options
This commit is contained in:
Lu Wang 2014-01-11 00:10:59 -08:00
commit ae7c8e8d9c
10 changed files with 224 additions and 129 deletions

View File

@ -11,6 +11,7 @@ hasufell <julian.ospald@googlemail.com>
Herbert Jones <herbert@mediafire.com>
Hongliang Tian <tatetian@gmail.com>
John Hewson <john@jahewson.com>
Marc Sanfacon <marc.sanfacon@gmail.com>
Michele Redolfi <michele@tecnicaict.com>
Mick Giles <mick@mickgiles.com>
Ryan Morlok <ryan.morlok@morlok.com>

View File

@ -130,7 +130,6 @@ else()
set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${PYTHON_LIBRARIES})
endif()
# debug build flags (overwrite default cmake debug flags)
set(CMAKE_C_FLAGS_DEBUG "-ggdb -pg")
set(CMAKE_CXX_FLAGS_DEBUG "-ggdb -pg")
@ -159,7 +158,7 @@ endif()
include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("${CMAKE_CXX_FLAGS}" CXX0X_SUPPORT)
if(NOT CXX0X_SUPPORT)
message(FATAL_ERROR "Error: you compiler does not support C++0x, please update it.")
message(FATAL_ERROR "Error: your compiler does not support C++0x, please update it.")
endif()

View File

@ -82,6 +82,13 @@ This switch is useful if you want pages to be loaded separately & dynamically --
Also see --page-filename.
.TP
.B --tmp-file-size-limit <limit> (Default: -1)
This limits the total size of the temporary files which will also limit the total size of the output file.
This is an estimate and it will stop after a page, once the total temporary files size is greater than this number.
-1 means no limit and is the default.
.TP
.B --dest-dir <dir> (Default: .)
Specify destination folder.
@ -262,6 +269,10 @@ If switched off, intermediate files won't be cleaned in the end.
.B --data-dir <dir> (Default: @CMAKE_INSTALL_PREFIX@/share/pdf2htmlEX)
Specify the folder holding the manifest and other files (see below for the manifest file)`
.TP
.B --tmp-dir <dir> (Default: /tmp)
Specify the temporary folder to use for temporary files
.TP
.B --css-draw <0|1> (Default: 0)
Experimental and unsupported CSS drawing

View File

@ -45,7 +45,7 @@ HTMLRenderer::HTMLRenderer(const Param & param)
,param(param)
,html_text_page(param, all_manager)
,preprocessor(param)
,tmp_files(param)
,tmp_files(param)
{
if(!(param.debug))
{
@ -109,6 +109,11 @@ void HTMLRenderer::process(PDFDoc *doc)
int page_count = (param.last_page - param.first_page + 1);
for(int i = param.first_page; i <= param.last_page ; ++i)
{
if (param.tmp_file_size_limit != -1 && tmp_files.get_total_size() > param.tmp_file_size_limit * 1024) {
cerr << "Stop processing, reach max size\n";
break;
}
cerr << "Working: " << (i-param.first_page) << "/" << page_count << '\r' << flush;
if(param.split_pages)
@ -394,20 +399,6 @@ void HTMLRenderer::post_process(void)
long line_no = 0;
while(getline(manifest_fin, line))
{
++line_no;
if(line == "\"\"\"")
{
embed_string = !embed_string;
continue;
}
if(embed_string)
{
output << line << endl;
continue;
}
// trim space at both sides
{
static const char * whitespaces = " \t\n\v\f\r";
@ -424,6 +415,20 @@ void HTMLRenderer::post_process(void)
}
}
++line_no;
if(line == "\"\"\"")
{
embed_string = !embed_string;
continue;
}
if(embed_string)
{
output << line << endl;
continue;
}
if(line.empty() || line[0] == '#')
continue;

View File

@ -31,6 +31,7 @@ struct Param
int embed_javascript;
int embed_outline;
int split_pages;
int tmp_file_size_limit;
std::string dest_dir;
std::string css_filename;
std::string page_filename;
@ -69,13 +70,11 @@ struct Param
// misc.
int clean_tmp;
std::string data_dir;
std::string tmp_dir;
int css_draw;
int debug;
std::string input_filename, output_filename;
// not a paramater
std::string tmp_dir;
};
} // namespace pdf2htmlEX

View File

@ -9,17 +9,27 @@
#include <iostream>
#include <cstdio>
#include <sys/stat.h>
#include "TmpFiles.h"
#include "Param.h"
using namespace std;
#ifndef _WIN32
# define STAT stat
# define RMDIR rmdir
#else
# include <direct.h>
# define STAT _stat
# define RMDIR _rmdir
#endif
namespace pdf2htmlEX {
TmpFiles::TmpFiles( const Param& param )
: param( param )
: param( param )
{ }
TmpFiles::~TmpFiles()
@ -49,10 +59,23 @@ void TmpFiles::clean()
cerr << "Remove temporary file: " << fn << endl;
}
remove(param.tmp_dir.c_str());
RMDIR(param.tmp_dir.c_str());
if(param.debug)
cerr << "Remove temporary directory: " << param.tmp_dir << endl;
}
// Return the total size of the temporary files in bytes
double TmpFiles::get_total_size() const
{
double total_size = 0;
struct STAT st;
for(auto iter = tmp_files.begin(); iter != tmp_files.end(); ++iter) {
STAT(iter->c_str(), &st);
total_size += st.st_size;
}
return total_size;
}
} // namespace pdf2htmlEX

View File

@ -13,13 +13,14 @@ public:
explicit TmpFiles( const Param& param );
~TmpFiles();
void add( const std::string& fn);
void add( const std::string& fn);
double get_total_size() const;
private:
void clean();
void clean();
const Param& param;
std::set<std::string> tmp_files;
std::set<std::string> tmp_files;
};
} // namespace pdf2htmlEX

View File

@ -42,6 +42,12 @@ using namespace pdf2htmlEX;
Param param;
ArgParser argparser;
#ifdef _WIN32
# include <iomanip>
# include <libgen.h>
# include <direct.h>
#endif
void deprecated_font_suffix(const char * dummy = nullptr)
{
cerr << "--font-suffix is deprecated. Use `--font-format` instead." << endl;
@ -65,7 +71,7 @@ void show_version_and_exit(const char * dummy = nullptr)
#if ENABLE_SVG
cerr << " cairo " << cairo_version_string() << endl;
#endif
cerr << "Default data-dir: " << PDF2HTMLEX_DATA_PATH << endl;
cerr << "Default data-dir: " << param.data_dir << endl;
cerr << "Supported image format:";
#ifdef ENABLE_LIBPNG
cerr << " png";
@ -106,6 +112,41 @@ void embed_parser (const char * str)
}
}
void prepare_directories()
{
std::string tmp_dir = param.tmp_dir + "/pdf2htmlEX-XXXXXX";
#ifndef _WIN32
errno = 0;
unique_ptr<char> pBuf(new char[tmp_dir.size() + 1]);
strcpy(pBuf.get(), tmp_dir.c_str());
auto p = mkdtemp(pBuf.get());
if(p == nullptr)
{
const char * errmsg = strerror(errno);
if(!errmsg)
{
errmsg = "unknown error";
}
cerr << "Cannot create temp directory: " << errmsg << endl;
exit(EXIT_FAILURE);
}
param.tmp_dir = pBuf.get();
#else
srand((unsigned)time(0));
int rand_value = (int)((rand() / ((double)RAND_MAX+1.0)) * 1e6);
stringstream ss;
ss << setw(6) << rand_value;
tmp_dir.erase(tmp_dir.size() - 6);
param.tmp_dir = tmp_dir + ss.str();
if (mkdir(param.tmp_dir.c_str())) {
cerr << "Cannot create temp directory (" << param.tmp_dir << "): " << strerror(errno) << endl;
exit(EXIT_FAILURE);
}
#endif
}
void parse_options (int argc, char **argv)
{
argparser
@ -128,6 +169,7 @@ void parse_options (int argc, char **argv)
.add("embed-image", &param.embed_image, 1, "embed image files into output")
.add("embed-javascript", &param.embed_javascript, 1, "embed JavaScript files into output")
.add("embed-outline", &param.embed_outline, 1, "embed outlines into output")
.add("tmp-file-size-limit", &param.tmp_file_size_limit, -1, "Limit the temporary file output size, in KB (-1 for no limit). This is only an estimate, the output may be bigger")
.add("split-pages", &param.split_pages, 0, "split pages into separate files")
.add("dest-dir", &param.dest_dir, ".", "specify destination directory")
.add("css-filename", &param.css_filename, "", "filename of the generated css file")
@ -168,7 +210,8 @@ void parse_options (int argc, char **argv)
// misc.
.add("clean-tmp", &param.clean_tmp, 1, "remove temporary files after conversion")
.add("data-dir", &param.data_dir, PDF2HTMLEX_DATA_PATH, "specify data directory")
.add("tmp-dir", &param.tmp_dir, param.tmp_dir, "specify the location of tempory directory.")
.add("data-dir", &param.data_dir, param.data_dir, "specify data directory")
// TODO: css drawings are hidden on print, for annot links, need to fix it for other drawings
// .add("css-draw", &param.css_draw, 0, "[experimental and unsupported] CSS drawing")
.add("debug", &param.debug, 0, "print debugging information")
@ -317,29 +360,33 @@ void check_param()
int main(int argc, char **argv)
{
// We need to adjust these directories before parsing the options.
#ifndef _WIN32
param.tmp_dir = "/tmp";
param.data_dir = PDF2HTMLEX_DATA_PATH;
#else
{
// Under Windows, the default data_dir is under /data in the pdf2htmlEX directory
stringstream ss;
ss << dirname(argv[0]) << "/data";
param.data_dir = ss.str();
// Under Windows, the temp path is not under /tmp, find it.
char temppath[MAX_PATH];
::GetTempPath(MAX_PATH, temppath);
param.tmp_dir = temppath;
}
#endif
parse_options(argc, argv);
check_param();
//prepare the directories
{
char buf[] = "/tmp/pdf2htmlEX-XXXXXX";
errno = 0;
auto p = mkdtemp(buf);
if(p == nullptr)
{
const char * errmsg = strerror(errno);
if(!errmsg)
{
errmsg = "unknown error";
}
cerr << "Cannot create temp directory: " << errmsg << endl;
exit(EXIT_FAILURE);
}
param.tmp_dir = buf;
}
prepare_directories();
if(param.debug)
if(param.debug) {
cerr << "temporary dir: " << (param.tmp_dir) << endl;
}
try
{

View File

@ -59,7 +59,7 @@ void ffw_init(int debug)
if ( default_encoding==NULL )
default_encoding=FindOrMakeEncoding("ISO8859-1");
if ( default_encoding==NULL )
default_encoding=&custom; /* In case iconv is broken */
default_encoding=&custom; /* In case iconv is broken */
if(!debug)
{

View File

@ -12,6 +12,15 @@
#include "path.h"
#ifdef _WIN32
# include <direct.h>
# define STAT _stat
# define MKDIR(A, B) _mkdir(A)
#else
# define STAT stat
# define MKDIR(A, B) mkdir(A, B)
#endif
using std::string;
namespace pdf2htmlEX {
@ -26,13 +35,13 @@ void create_directories(const string & path)
create_directories(path.substr(0, idx));
}
int r = mkdir(path.c_str(), S_IRWXU);
int r = MKDIR(path.c_str(), S_IRWXU);
if(r != 0)
{
if(errno == EEXIST)
{
struct stat stat_buf;
if((stat(path.c_str(), &stat_buf) == 0) && S_ISDIR(stat_buf.st_mode))
struct STAT stat_buf;
if((STAT(path.c_str(), &stat_buf) == 0) && S_ISDIR(stat_buf.st_mode))
return;
}