1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-09-29 01:31:29 +00:00

bahaviour with single-html & split-pages

This commit is contained in:
Lu Wang 2012-09-12 23:55:29 +08:00
parent 61cdce7e7d
commit 0a7d0f20a2
4 changed files with 31 additions and 29 deletions

View File

@ -61,7 +61,7 @@ If switched out, there will be several files generated along with the HTML file
.B --split-pages <0|1> (Default: 0) .B --split-pages <0|1> (Default: 0)
If turned on, each page is saved in a separated files, also the generated css file will be store separatedly as if single-html=0 If turned on, each page is saved in a separated files, also the generated css file will be store separatedly as if single-html=0
The output files will be named as <output-filename>0.page, <output-file>1.page, ... The output files will be named as <output-filename>0.page, <output-filename>1.page, ...
.TP .TP
.B --embed-base-font <0|1> (Default: 1) .B --embed-base-font <0|1> (Default: 1)
Whether to embed base 14 fonts. Whether to embed base 14 fonts.

View File

@ -89,9 +89,8 @@ void HTMLRenderer::process(PDFDoc *doc)
{ {
if(param->split_pages) if(param->split_pages)
{ {
auto page_fn = str_fmt("%s/__pages%x", tmp_dir.c_str(), i); auto page_fn = str_fmt("%s/%s%d.page", dest_dir.c_str(), param->output_filename.c_str(), i);
html_fout.open((char*)page_fn, ofstream::binary); html_fout.open((char*)page_fn, ofstream::binary);
add_tmp_file((char*)page_fn);
fix_stream(html_fout); fix_stream(html_fout);
} }
@ -153,7 +152,7 @@ void HTMLRenderer::pre_process()
? str_fmt("%s/__css", tmp_dir.c_str()) ? str_fmt("%s/__css", tmp_dir.c_str())
: str_fmt("%s/%s", dest_dir.c_str(), param->css_filename.c_str()); : str_fmt("%s/%s", dest_dir.c_str(), param->css_filename.c_str());
if(param->single_html) if(param->single_html && (!param->split_pages))
add_tmp_file((char*)fn); add_tmp_file((char*)fn);
css_path = (char*)fn, css_path = (char*)fn,
@ -172,12 +171,8 @@ void HTMLRenderer::pre_process()
* *
* Otherwise just generate it * Otherwise just generate it
*/ */
auto fn = (param->single_html) auto fn = str_fmt("%s/__pages", tmp_dir.c_str());
? str_fmt("%s/__pages", tmp_dir.c_str()) add_tmp_file((char*)fn);
: str_fmt("%s/%s", dest_dir.c_str(), param->output_filename.c_str());
if(param->single_html)
add_tmp_file((char*)fn);
html_path = (char*)fn; html_path = (char*)fn;
html_fout.open(html_path, ofstream::binary); html_fout.open(html_path, ofstream::binary);
@ -191,8 +186,8 @@ void HTMLRenderer::post_process()
html_fout.close(); html_fout.close();
css_fout.close(); css_fout.close();
//only when !split-page, do we have some work left to do //only when split-page, do we have some work left to do
if(!param->split_pages) if(param->split_pages)
return; return;
ofstream output((char*)str_fmt("%s/%s", dest_dir.c_str(), param->output_filename.c_str())); ofstream output((char*)str_fmt("%s/%s", dest_dir.c_str(), param->output_filename.c_str()));
@ -205,6 +200,12 @@ void HTMLRenderer::post_process()
string line; string line;
while(getline(manifest_fin, line)) while(getline(manifest_fin, line))
{ {
if(line == "\"\"\"")
{
embed_string = !embed_string;
continue;
}
if(embed_string) if(embed_string)
{ {
output << line << endl; output << line << endl;
@ -214,15 +215,10 @@ void HTMLRenderer::post_process()
if(line.empty() || line[0] == '#') if(line.empty() || line[0] == '#')
continue; continue;
if(line == "\"\"\"")
{
embed_string = !embed_string;
continue;
}
if(line[0] == '@') if(line[0] == '@')
{ {
embed_file(output, PDF2HTMLEX_DATA_PATH + "/" + line.substr(1), true); embed_file(output, PDF2HTMLEX_DATA_PATH + "/" + line.substr(1), "", true);
continue; continue;
} }
@ -230,7 +226,7 @@ void HTMLRenderer::post_process()
{ {
if(line == "$css") if(line == "$css")
{ {
embed_file(output, css_path, false); embed_file(output, css_path, ".css", false);
} }
else if (line == "$pages") else if (line == "$pages")
{ {
@ -338,29 +334,34 @@ void HTMLRenderer::clean_tmp_files()
cerr << "Remove temporary directory: " << tmp_dir << endl; cerr << "Remove temporary directory: " << tmp_dir << endl;
} }
void HTMLRenderer::embed_file(ostream & out, const string & path, bool copy) void HTMLRenderer::embed_file(ostream & out, const string & path, const string & type, bool copy)
{ {
string fn = get_filename(path); string fn = get_filename(path);
string suffix = get_suffix(fn); string suffix = (type == "") ? get_suffix(fn) : type;
auto iter = EMBED_STRING_MAP.find(make_pair(suffix, param->single_html)); auto iter = EMBED_STRING_MAP.find(make_pair(suffix, (bool)param->single_html));
if(iter == EMBED_STRING_MAP.end()) if(iter == EMBED_STRING_MAP.end())
{ {
cerr << "Warning: unknown suffix in manifest: " << suffix << endl; cerr << "Warning: unknown suffix: " << suffix << endl;
return; return;
} }
if(param->single_html) if(param->single_html)
{ {
cerr << iter->second.first << endl out << iter->second.first << endl
<< ifstream(path, ifstream::binary).rdbuf() << ifstream(path, ifstream::binary).rdbuf()
<< iter->second.second << endl; << iter->second.second << endl;
} }
else else
{ {
cerr << iter->second.first out << iter->second.first
<< fn << fn
<< iter->second.second; << iter->second.second << endl;
if(copy)
{
ofstream(dest_dir + "/" + fn, ofstream::binary) << ifstream(path, ifstream::binary).rdbuf();
}
} }
} }

View File

@ -170,8 +170,9 @@ class HTMLRenderer : public OutputDev
// depending on single-html, to embed the content or add a link to it // depending on single-html, to embed the content or add a link to it
// "type": specify the file type, usually it's the suffix, in which case this parameter could be ""
// "copy": indicates whether to copy the file into dest_dir, if not embedded // "copy": indicates whether to copy the file into dest_dir, if not embedded
void embed_file(std::ostream & out, const std::string & path, bool copy); void embed_file(std::ostream & out, const std::string & path, const std::string & type, bool copy);
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// state tracking // state tracking

View File

@ -198,12 +198,12 @@ int main(int argc, char **argv)
if(get_suffix(param.input_filename) == ".pdf") if(get_suffix(param.input_filename) == ".pdf")
{ {
param.output_filename = s.substr(0, s.size() - 4) + ".css"; param.css_filename = s.substr(0, s.size() - 4) + ".css";
} }
else else
{ {
if(!param.split_pages) if(!param.split_pages)
param.output_filename = s + ".css"; param.css_filename = s + ".css";
} }
} }