From 0a7d0f20a2c59a3da08ba684152b066307ca8d83 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 12 Sep 2012 23:55:29 +0800 Subject: [PATCH] bahaviour with single-html & split-pages --- pdf2htmlEX.1 | 2 +- src/HTMLRenderer/general.cc | 51 +++++++++++++++++++------------------ src/include/HTMLRenderer.h | 3 ++- src/pdf2htmlEX.cc | 4 +-- 4 files changed, 31 insertions(+), 29 deletions(-) diff --git a/pdf2htmlEX.1 b/pdf2htmlEX.1 index ad4f354..7ae49f3 100644 --- a/pdf2htmlEX.1 +++ b/pdf2htmlEX.1 @@ -61,7 +61,7 @@ If switched out, there will be several files generated along with the HTML file .B --split-pages <0|1> (Default: 0) If turned on, each page is saved in a separated files, also the generated css file will be store separatedly as if single-html=0 -The output files will be named as 0.page, 1.page, ... +The output files will be named as 0.page, 1.page, ... .TP .B --embed-base-font <0|1> (Default: 1) Whether to embed base 14 fonts. diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index d0dc13d..ef132a5 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -89,9 +89,8 @@ void HTMLRenderer::process(PDFDoc *doc) { if(param->split_pages) { - auto page_fn = str_fmt("%s/__pages%x", tmp_dir.c_str(), i); + auto page_fn = str_fmt("%s/%s%d.page", dest_dir.c_str(), param->output_filename.c_str(), i); html_fout.open((char*)page_fn, ofstream::binary); - add_tmp_file((char*)page_fn); fix_stream(html_fout); } @@ -153,7 +152,7 @@ void HTMLRenderer::pre_process() ? str_fmt("%s/__css", tmp_dir.c_str()) : str_fmt("%s/%s", dest_dir.c_str(), param->css_filename.c_str()); - if(param->single_html) + if(param->single_html && (!param->split_pages)) add_tmp_file((char*)fn); css_path = (char*)fn, @@ -172,12 +171,8 @@ void HTMLRenderer::pre_process() * * Otherwise just generate it */ - auto fn = (param->single_html) - ? str_fmt("%s/__pages", tmp_dir.c_str()) - : str_fmt("%s/%s", dest_dir.c_str(), param->output_filename.c_str()); - - if(param->single_html) - add_tmp_file((char*)fn); + auto fn = str_fmt("%s/__pages", tmp_dir.c_str()); + add_tmp_file((char*)fn); html_path = (char*)fn; html_fout.open(html_path, ofstream::binary); @@ -191,8 +186,8 @@ void HTMLRenderer::post_process() html_fout.close(); css_fout.close(); - //only when !split-page, do we have some work left to do - if(!param->split_pages) + //only when split-page, do we have some work left to do + if(param->split_pages) return; ofstream output((char*)str_fmt("%s/%s", dest_dir.c_str(), param->output_filename.c_str())); @@ -205,6 +200,12 @@ void HTMLRenderer::post_process() string line; while(getline(manifest_fin, line)) { + if(line == "\"\"\"") + { + embed_string = !embed_string; + continue; + } + if(embed_string) { output << line << endl; @@ -214,15 +215,10 @@ void HTMLRenderer::post_process() if(line.empty() || line[0] == '#') continue; - if(line == "\"\"\"") - { - embed_string = !embed_string; - continue; - } if(line[0] == '@') { - embed_file(output, PDF2HTMLEX_DATA_PATH + "/" + line.substr(1), true); + embed_file(output, PDF2HTMLEX_DATA_PATH + "/" + line.substr(1), "", true); continue; } @@ -230,7 +226,7 @@ void HTMLRenderer::post_process() { if(line == "$css") { - embed_file(output, css_path, false); + embed_file(output, css_path, ".css", false); } else if (line == "$pages") { @@ -338,29 +334,34 @@ void HTMLRenderer::clean_tmp_files() cerr << "Remove temporary directory: " << tmp_dir << endl; } -void HTMLRenderer::embed_file(ostream & out, const string & path, bool copy) +void HTMLRenderer::embed_file(ostream & out, const string & path, const string & type, bool copy) { string fn = get_filename(path); - string suffix = get_suffix(fn); + string suffix = (type == "") ? get_suffix(fn) : type; - auto iter = EMBED_STRING_MAP.find(make_pair(suffix, param->single_html)); + auto iter = EMBED_STRING_MAP.find(make_pair(suffix, (bool)param->single_html)); if(iter == EMBED_STRING_MAP.end()) { - cerr << "Warning: unknown suffix in manifest: " << suffix << endl; + cerr << "Warning: unknown suffix: " << suffix << endl; return; } if(param->single_html) { - cerr << iter->second.first << endl + out << iter->second.first << endl << ifstream(path, ifstream::binary).rdbuf() << iter->second.second << endl; } else { - cerr << iter->second.first + out << iter->second.first << fn - << iter->second.second; + << iter->second.second << endl; + + if(copy) + { + ofstream(dest_dir + "/" + fn, ofstream::binary) << ifstream(path, ifstream::binary).rdbuf(); + } } } diff --git a/src/include/HTMLRenderer.h b/src/include/HTMLRenderer.h index 115ee0c..e068dc2 100644 --- a/src/include/HTMLRenderer.h +++ b/src/include/HTMLRenderer.h @@ -170,8 +170,9 @@ class HTMLRenderer : public OutputDev // depending on single-html, to embed the content or add a link to it + // "type": specify the file type, usually it's the suffix, in which case this parameter could be "" // "copy": indicates whether to copy the file into dest_dir, if not embedded - void embed_file(std::ostream & out, const std::string & path, bool copy); + void embed_file(std::ostream & out, const std::string & path, const std::string & type, bool copy); //////////////////////////////////////////////////// // state tracking diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index a058f66..3568211 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -198,12 +198,12 @@ int main(int argc, char **argv) if(get_suffix(param.input_filename) == ".pdf") { - param.output_filename = s.substr(0, s.size() - 4) + ".css"; + param.css_filename = s.substr(0, s.size() - 4) + ".css"; } else { if(!param.split_pages) - param.output_filename = s + ".css"; + param.css_filename = s + ".css"; } }