diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc
index 416725f..2d5721a 100644
--- a/src/HTMLRenderer/general.cc
+++ b/src/HTMLRenderer/general.cc
@@ -45,7 +45,7 @@ HTMLRenderer::HTMLRenderer(const Param & param)
,param(param)
,html_text_page(param, all_manager)
,preprocessor(param)
- ,tmp_files(param)
+ ,tmp_files(param)
{
if(!(param.debug))
{
@@ -79,7 +79,7 @@ HTMLRenderer::HTMLRenderer(const Param & param)
}
HTMLRenderer::~HTMLRenderer()
-{
+{
ffw_finalize();
delete [] cur_mapping;
delete [] cur_mapping2;
@@ -96,7 +96,7 @@ void HTMLRenderer::process(PDFDoc *doc)
///////////////////
// Process pages
-
+
bg_renderer = nullptr;
if(param.process_nontext)
{
@@ -107,15 +107,20 @@ void HTMLRenderer::process(PDFDoc *doc)
}
int page_count = (param.last_page - param.first_page + 1);
- for(int i = param.first_page; i <= param.last_page ; ++i)
+ for(int i = param.first_page; i <= param.last_page ; ++i)
{
+ if (param.max_size != -1 && tmp_files.get_total_size() > param.max_size * 1024) {
+ cerr << "Stop processing, reach max size\n";
+ break;
+ }
+
cerr << "Working: " << (i-param.first_page) << "/" << page_count << '\r' << flush;
if(param.split_pages)
{
string filled_template_filename = (char*)str_fmt(param.page_filename.c_str(), i);
auto page_fn = str_fmt("%s/%s", param.dest_dir.c_str(), filled_template_filename.c_str());
- f_curpage = new ofstream((char*)page_fn, ofstream::binary);
+ f_curpage = new ofstream((char*)page_fn, ofstream::binary);
if(!(*f_curpage))
throw string("Cannot open ") + (char*)page_fn + " for writing";
set_stream_flags((*f_curpage));
@@ -128,9 +133,9 @@ void HTMLRenderer::process(PDFDoc *doc)
bg_renderer->render_page(doc, i);
}
- doc->displayPage(this, i,
+ doc->displayPage(this, i,
text_zoom_factor() * DEFAULT_DPI, text_zoom_factor() * DEFAULT_DPI,
- 0,
+ 0,
(!(param.use_cropbox)),
true, // crop
false, // printing
@@ -149,7 +154,7 @@ void HTMLRenderer::process(PDFDoc *doc)
////////////////////////
// Process Outline
if(param.process_outline)
- process_outline();
+ process_outline();
post_process();
@@ -170,7 +175,7 @@ void HTMLRenderer::setDefaultCTM(double *ctm)
#if POPPLER_OLDER_THAN_0_23_0
void HTMLRenderer::startPage(int pageNum, GfxState *state)
#else
-void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
+void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
#endif
{
this->pageNum = pageNum;
@@ -183,12 +188,12 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
long long wid = all_manager.width.install(pageWidth);
long long hid = all_manager.height.install(pageHeight);
(*f_curpage)
- << "
"
- << "
";
-
+
// close page
(*f_curpage) << "
" << endl;
@@ -266,7 +271,7 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
*/
{
vector zoom_factors;
-
+
if(is_positive(param.zoom))
{
zoom_factors.push_back(param.zoom);
@@ -283,8 +288,8 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
}
double zoom = (zoom_factors.empty() ? 1.0 : (*min_element(zoom_factors.begin(), zoom_factors.end())));
-
- text_scale_factor1 = max(zoom, param.font_size_multiplier);
+
+ text_scale_factor1 = max(zoom, param.font_size_multiplier);
text_scale_factor2 = zoom / text_scale_factor1;
}
@@ -340,13 +345,13 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
* we have to keep the html file for pages into a temporary place
* because we'll have to embed css before it
*
- * Otherwise just generate it
+ * Otherwise just generate it
*/
auto fn = str_fmt("%s/__pages", param.tmp_dir.c_str());
tmp_files.add((char*)fn);
f_pages.path = (char*)fn;
- f_pages.fs.open(f_pages.path, ofstream::binary);
+ f_pages.fs.open(f_pages.path, ofstream::binary);
if(!f_pages.fs)
throw string("Cannot open ") + (char*)fn + " for writing";
set_stream_flags(f_pages.fs);
@@ -371,7 +376,7 @@ void HTMLRenderer::post_process(void)
{
f_outline.fs.close();
}
- f_pages.fs.close();
+ f_pages.fs.close();
f_css.fs.close();
// build the main HTML file
@@ -492,7 +497,7 @@ void HTMLRenderer::dump_css (void)
all_manager.width .dump_css(f_css.fs);
all_manager.left .dump_css(f_css.fs);
all_manager.bgimage_size .dump_css(f_css.fs);
-
+
// print css
if(param.printing)
{
@@ -518,8 +523,8 @@ void HTMLRenderer::dump_css (void)
void HTMLRenderer::embed_file(ostream & out, const string & path, const string & type, bool copy)
{
string fn = get_filename(path);
- string suffix = (type == "") ? get_suffix(fn) : type;
-
+ string suffix = (type == "") ? get_suffix(fn) : type;
+
// TODO
auto iter = EMBED_STRING_MAP.find(suffix);
if(iter == EMBED_STRING_MAP.end())
@@ -529,14 +534,14 @@ void HTMLRenderer::embed_file(ostream & out, const string & path, const string &
}
const auto & entry = iter->second;
-
+
if(param.*(entry.embed_flag))
{
ifstream fin(path, ifstream::binary);
if(!fin)
throw string("Cannot open file ") + path + " for embedding";
out << entry.prefix_embed;
-
+
if(entry.base64_encode)
{
out << Base64Stream(fin);
diff --git a/src/Param.h b/src/Param.h
index 8a566e7..4816f72 100644
--- a/src/Param.h
+++ b/src/Param.h
@@ -17,20 +17,21 @@ struct Param
{
// pages
int first_page, last_page;
-
+
// dimensions
double zoom;
double fit_width, fit_height;
int use_cropbox;
double h_dpi, v_dpi;
-
- // output
+
+ // output
int embed_css;
int embed_font;
int embed_image;
int embed_javascript;
int embed_outline;
int split_pages;
+ int max_size;
std::string dest_dir;
std::string css_filename;
std::string page_filename;
@@ -39,7 +40,7 @@ struct Param
int process_outline;
int printing;
int fallback;
-
+
// fonts
int embed_external_font;
std::string font_format;
@@ -50,7 +51,7 @@ struct Param
int squeeze_wide_glyph;
int override_fstype;
int process_type3;
-
+
// text
double h_eps, v_eps;
double space_threshold;
@@ -61,17 +62,18 @@ struct Param
// background image
std::string bg_format;
-
+
// encryption
std::string owner_password, user_password;
int no_drm;
-
+
// misc.
int clean_tmp;
std::string data_dir;
+ std::string basetmp_dir;
int css_draw;
int debug;
-
+
std::string input_filename, output_filename;
// not a paramater
diff --git a/src/TmpFiles.cc b/src/TmpFiles.cc
index efaf0cf..b55e341 100644
--- a/src/TmpFiles.cc
+++ b/src/TmpFiles.cc
@@ -9,6 +9,7 @@
#include
#include
+#include
#include "TmpFiles.h"
#include "Param.h"
@@ -19,11 +20,11 @@ namespace pdf2htmlEX {
TmpFiles::TmpFiles( const Param& param )
- : param( param )
+ : param( param )
{ }
TmpFiles::~TmpFiles()
-{
+{
clean();
}
@@ -54,5 +55,16 @@ void TmpFiles::clean()
cerr << "Remove temporary directory: " << param.tmp_dir << endl;
}
+double TmpFiles::get_total_size() const
+{
+ double total_size = 0;
+ struct _stat st;
+ for(auto iter = tmp_files.begin(); iter != tmp_files.end(); ++iter) {
+ _stat(iter->c_str(), &st);
+ total_size += st.st_size;
+ }
+
+ return total_size;
+}
} // namespace pdf2htmlEX
diff --git a/src/TmpFiles.h b/src/TmpFiles.h
index b7ad46c..277281d 100644
--- a/src/TmpFiles.h
+++ b/src/TmpFiles.h
@@ -7,19 +7,20 @@
namespace pdf2htmlEX {
-class TmpFiles
+class TmpFiles
{
public:
explicit TmpFiles( const Param& param );
~TmpFiles();
- void add( const std::string& fn);
+ void add( const std::string& fn);
+ double get_total_size() const;
private:
- void clean();
-
+ void clean();
+
const Param& param;
- std::set tmp_files;
+ std::set tmp_files;
};
} // namespace pdf2htmlEX
diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc
index cbfce42..e6d47b1 100644
--- a/src/pdf2htmlEX.cc
+++ b/src/pdf2htmlEX.cc
@@ -141,7 +141,6 @@ void prepare_directories()
stringstream ss;
ss << setw(6) << rand_value;
- std::cout << "1- " << tmp_dir << endl;
tmp_dir.erase(tmp_dir.size() - 6);
param.tmp_dir = tmp_dir + ss.str();
::CreateDirectory(param.tmp_dir.c_str(), NULL);
@@ -170,6 +169,7 @@ void parse_options (int argc, char **argv)
.add("embed-image", ¶m.embed_image, 1, "embed image files into output")
.add("embed-javascript", ¶m.embed_javascript, 1, "embed JavaScript files into output")
.add("embed-outline", ¶m.embed_outline, 1, "embed outlines into output")
+ .add("max-output-size", ¶m.max_size, -1, "maximum output size, in KB (-1 for no max)")
.add("split-pages", ¶m.split_pages, 0, "split pages into separate files")
.add("dest-dir", ¶m.dest_dir, ".", "specify destination directory")
.add("css-filename", ¶m.css_filename, "", "filename of the generated css file")
@@ -390,7 +390,6 @@ int main(int argc, char **argv)
cerr << "temporary dir: " << (param.tmp_dir) << endl;
}
- exit(0);
try
{
create_directories(param.dest_dir);