1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-07-03 00:35:40 +00:00

refactor file objects

This commit is contained in:
Lu Wang 2013-01-28 18:46:44 +08:00
parent 014ef7ecc2
commit d38697d890
7 changed files with 105 additions and 95 deletions

View File

@ -39,6 +39,13 @@ new pdf2htmlEX.Viewer('pdf-main');
<title></title> <title></title>
</head> </head>
<body> <body>
<div id="pdf-outline">
"""
$outlines
"""
</div>
<div id="pdf-main"> <div id="pdf-main">
""" """

View File

@ -428,8 +428,11 @@ class HTMLRenderer : public OutputDev
std::map<double, long long> left_map; std::map<double, long long> left_map;
const Param * param; const Param * param;
std::ofstream html_fout, css_fout;
std::string html_path, css_path; struct {
std::ofstream fs;
std::string path;
} f_pages, f_css, f_outlines;
static const std::string MANIFEST_FILENAME; static const std::string MANIFEST_FILENAME;
}; };

View File

@ -83,7 +83,7 @@ void HTMLRenderer::TextLineBuffer::flush(void)
max_ascent = max<double>(max_ascent, s.ascent * s.draw_font_size); max_ascent = max<double>(max_ascent, s.ascent * s.draw_font_size);
} }
ostream & out = renderer->html_fout; ostream & out = renderer->f_pages.fs;
out << "<div style=\"" out << "<div style=\""
<< "bottom:" << round(y) << "px;" << "bottom:" << round(y) << "px;"
<< "\"" << "\""

View File

@ -372,48 +372,48 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, co
} }
} }
html_fout << "<div class=\"Cd t" << install_transform_matrix(new_tm) << "\" style=\""; f_pages.fs << "<div class=\"Cd t" << install_transform_matrix(new_tm) << "\" style=\"";
if(line_color) if(line_color)
{ {
html_fout << "border-color:" << *line_color << ";"; f_pages.fs << "border-color:" << *line_color << ";";
html_fout << "border-width:"; f_pages.fs << "border-width:";
for(int i = 0; i < line_width_count; ++i) for(int i = 0; i < line_width_count; ++i)
{ {
if(i > 0) html_fout << ' '; if(i > 0) f_pages.fs << ' ';
double lw = line_width_array[i] * scale; double lw = line_width_array[i] * scale;
html_fout << round(lw); f_pages.fs << round(lw);
if(is_positive(lw)) html_fout << "px"; if(is_positive(lw)) f_pages.fs << "px";
} }
html_fout << ";"; f_pages.fs << ";";
} }
else else
{ {
html_fout << "border:none;"; f_pages.fs << "border:none;";
} }
if(fill_color) if(fill_color)
{ {
html_fout << "background-color:" << (*fill_color) << ";"; f_pages.fs << "background-color:" << (*fill_color) << ";";
} }
else else
{ {
html_fout << "background-color:transparent;"; f_pages.fs << "background-color:transparent;";
} }
if(style_function) if(style_function)
{ {
style_function(style_function_data, html_fout); style_function(style_function_data, f_pages.fs);
} }
html_fout << "bottom:" << round(y) << "px;" f_pages.fs << "bottom:" << round(y) << "px;"
<< "left:" << round(x) << "px;" << "left:" << round(x) << "px;"
<< "width:" << round(w * scale) << "px;" << "width:" << round(w * scale) << "px;"
<< "height:" << round(h * scale) << "px;"; << "height:" << round(h * scale) << "px;";
html_fout << "\"></div>"; f_pages.fs << "\"></div>";
} }

View File

@ -20,7 +20,7 @@ namespace pdf2htmlEX {
void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suffix, const string & fontfileformat, GfxFont * font) void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suffix, const string & fontfileformat, GfxFont * font)
{ {
css_fout << "@font-face{" f_css.fs << "@font-face{"
<< "font-family:f" << info.id << ";" << "font-family:f" << info.id << ";"
<< "src:url("; << "src:url(";
@ -32,15 +32,15 @@ void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suff
ifstream fin(path, ifstream::binary); ifstream fin(path, ifstream::binary);
if(!fin) if(!fin)
throw "Cannot locate font file: " + path; throw "Cannot locate font file: " + path;
css_fout << "'data:font/" + fontfileformat + ";base64," << base64stream(fin) << "'"; f_css.fs << "'data:font/" + fontfileformat + ";base64," << base64stream(fin) << "'";
} }
else else
{ {
css_fout << (char*)fn; f_css.fs << (char*)fn;
} }
} }
css_fout << ")" f_css.fs << ")"
<< "format(\"" << fontfileformat << "\");" << "format(\"" << fontfileformat << "\");"
<< "}" // end of @font-face << "}" // end of @font-face
<< ".f" << info.id << "{" << ".f" << info.id << "{"
@ -66,45 +66,45 @@ static string general_font_family(GfxFont * font)
// TODO: this function is called when some font is unable to process, may use the name there as a hint // TODO: this function is called when some font is unable to process, may use the name there as a hint
void HTMLRenderer::export_remote_default_font(long long fn_id) void HTMLRenderer::export_remote_default_font(long long fn_id)
{ {
css_fout << ".f" << fn_id << "{font-family:sans-serif;visibility:hidden;}" << endl; f_css.fs << ".f" << fn_id << "{font-family:sans-serif;visibility:hidden;}" << endl;
} }
void HTMLRenderer::export_local_font(const FontInfo & info, GfxFont * font, const string & original_font_name, const string & cssfont) void HTMLRenderer::export_local_font(const FontInfo & info, GfxFont * font, const string & original_font_name, const string & cssfont)
{ {
css_fout << ".f" << info.id << "{"; f_css.fs << ".f" << info.id << "{";
css_fout << "font-family:" << ((cssfont == "") ? (original_font_name + "," + general_font_family(font)) : cssfont) << ";"; f_css.fs << "font-family:" << ((cssfont == "") ? (original_font_name + "," + general_font_family(font)) : cssfont) << ";";
string fn = original_font_name; string fn = original_font_name;
for(auto iter = fn.begin(); iter != fn.end(); ++iter) for(auto iter = fn.begin(); iter != fn.end(); ++iter)
*iter = tolower(*iter); *iter = tolower(*iter);
if(font->isBold() || (fn.find("bold") != string::npos)) if(font->isBold() || (fn.find("bold") != string::npos))
css_fout << "font-weight:bold;"; f_css.fs << "font-weight:bold;";
else else
css_fout << "font-weight:normal;"; f_css.fs << "font-weight:normal;";
if(fn.find("oblique") != string::npos) if(fn.find("oblique") != string::npos)
css_fout << "font-style:oblique;"; f_css.fs << "font-style:oblique;";
else if(font->isItalic() || (fn.find("italic") != string::npos)) else if(font->isItalic() || (fn.find("italic") != string::npos))
css_fout << "font-style:italic;"; f_css.fs << "font-style:italic;";
else else
css_fout << "font-style:normal;"; f_css.fs << "font-style:normal;";
css_fout << "line-height:" << round(info.ascent - info.descent) << ";"; f_css.fs << "line-height:" << round(info.ascent - info.descent) << ";";
css_fout << "visibility:visible;"; f_css.fs << "visibility:visible;";
css_fout << "}" << endl; f_css.fs << "}" << endl;
} }
void HTMLRenderer::export_font_size (long long fs_id, double font_size) void HTMLRenderer::export_font_size (long long fs_id, double font_size)
{ {
css_fout << ".s" << fs_id << "{font-size:" << round(font_size) << "px;}" << endl; f_css.fs << ".s" << fs_id << "{font-size:" << round(font_size) << "px;}" << endl;
} }
void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm) void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm)
{ {
css_fout << ".t" << tm_id << "{"; f_css.fs << ".t" << tm_id << "{";
// always ignore tm[4] and tm[5] because // always ignore tm[4] and tm[5] because
// we have already shifted the origin // we have already shifted the origin
@ -114,7 +114,7 @@ void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm)
{ {
auto prefixes = {"", "-ms-", "-moz-", "-webkit-", "-o-"}; auto prefixes = {"", "-ms-", "-moz-", "-webkit-", "-o-"};
for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter) for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter)
css_fout << *iter << "transform:none;"; f_css.fs << *iter << "transform:none;";
} }
else else
{ {
@ -122,53 +122,53 @@ void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm)
for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter) for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter)
{ {
// PDF use a different coordinate system from Web // PDF use a different coordinate system from Web
css_fout << *iter << "transform:matrix(" f_css.fs << *iter << "transform:matrix("
<< round(tm[0]) << ',' << round(tm[0]) << ','
<< round(-tm[1]) << ',' << round(-tm[1]) << ','
<< round(-tm[2]) << ',' << round(-tm[2]) << ','
<< round(tm[3]) << ','; << round(tm[3]) << ',';
css_fout << "0,0);"; f_css.fs << "0,0);";
} }
} }
css_fout << "}" << endl; f_css.fs << "}" << endl;
} }
void HTMLRenderer::export_letter_space (long long ls_id, double letter_space) void HTMLRenderer::export_letter_space (long long ls_id, double letter_space)
{ {
css_fout << ".l" << ls_id << "{letter-spacing:" << round(letter_space) << "px;}" << endl; f_css.fs << ".l" << ls_id << "{letter-spacing:" << round(letter_space) << "px;}" << endl;
} }
void HTMLRenderer::export_word_space (long long ws_id, double word_space) void HTMLRenderer::export_word_space (long long ws_id, double word_space)
{ {
css_fout << ".w" << ws_id << "{word-spacing:" << round(word_space) << "px;}" << endl; f_css.fs << ".w" << ws_id << "{word-spacing:" << round(word_space) << "px;}" << endl;
} }
void HTMLRenderer::export_color (long long color_id, const GfxRGB * rgb) void HTMLRenderer::export_color (long long color_id, const GfxRGB * rgb)
{ {
css_fout << ".c" << color_id << "{color:" << (*rgb) << ";}" << endl; f_css.fs << ".c" << color_id << "{color:" << (*rgb) << ";}" << endl;
} }
void HTMLRenderer::export_whitespace (long long ws_id, double ws_width) void HTMLRenderer::export_whitespace (long long ws_id, double ws_width)
{ {
if(ws_width > 0) if(ws_width > 0)
css_fout << "._" << ws_id << "{display:inline-block;width:" << round(ws_width) << "px;}" << endl; f_css.fs << "._" << ws_id << "{display:inline-block;width:" << round(ws_width) << "px;}" << endl;
else else
css_fout << "._" << ws_id << "{display:inline;margin-left:" << round(ws_width) << "px;}" << endl; f_css.fs << "._" << ws_id << "{display:inline;margin-left:" << round(ws_width) << "px;}" << endl;
} }
void HTMLRenderer::export_rise (long long rise_id, double rise) void HTMLRenderer::export_rise (long long rise_id, double rise)
{ {
css_fout << ".r" << rise_id << "{top:" << round(-rise) << "px;}" << endl; f_css.fs << ".r" << rise_id << "{top:" << round(-rise) << "px;}" << endl;
} }
void HTMLRenderer::export_height (long long height_id, double height) void HTMLRenderer::export_height (long long height_id, double height)
{ {
css_fout << ".h" << height_id << "{height:" << round(height) << "px;}" << endl; f_css.fs << ".h" << height_id << "{height:" << round(height) << "px;}" << endl;
} }
void HTMLRenderer::export_left (long long left_id, double left) void HTMLRenderer::export_left (long long left_id, double left)
{ {
css_fout << ".L" << left_id << "{left:" << round(left) << "px;}" << endl; f_css.fs << ".L" << left_id << "{left:" << round(left) << "px;}" << endl;
} }
} }

View File

@ -89,10 +89,10 @@ void HTMLRenderer::process(PDFDoc *doc)
if(param->split_pages) if(param->split_pages)
{ {
auto page_fn = str_fmt("%s/%s%d.page", param->dest_dir.c_str(), param->output_filename.c_str(), i); auto page_fn = str_fmt("%s/%s%d.page", param->dest_dir.c_str(), param->output_filename.c_str(), i);
html_fout.open((char*)page_fn, ofstream::binary); f_pages.fs.open((char*)page_fn, ofstream::binary);
if(!html_fout) if(!f_pages.fs)
throw string("Cannot open ") + (char*)page_fn + " for writing"; throw string("Cannot open ") + (char*)page_fn + " for writing";
set_stream_flags(html_fout); set_stream_flags(f_pages.fs);
} }
if(param->process_nontext) if(param->process_nontext)
@ -113,7 +113,7 @@ void HTMLRenderer::process(PDFDoc *doc)
if(param->split_pages) if(param->split_pages)
{ {
html_fout.close(); f_pages.fs.close();
} }
} }
if(page_count >= 0) if(page_count >= 0)
@ -141,7 +141,7 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state)
assert((!line_opened) && "Open line in startPage detected!"); assert((!line_opened) && "Open line in startPage detected!");
html_fout f_pages.fs
<< "<div class=\"d\" style=\"width:" << "<div class=\"d\" style=\"width:"
<< (pageWidth) << "px;height:" << (pageWidth) << "px;height:"
<< (pageHeight) << "px;\">" << (pageHeight) << "px;\">"
@ -150,7 +150,7 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state)
if(param->process_nontext) if(param->process_nontext)
{ {
html_fout << "background-image:url("; f_pages.fs << "background-image:url(";
{ {
if(param->single_html) if(param->single_html)
@ -159,18 +159,18 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state)
ifstream fin((char*)path, ifstream::binary); ifstream fin((char*)path, ifstream::binary);
if(!fin) if(!fin)
throw string("Cannot read background image ") + (char*)path; throw string("Cannot read background image ") + (char*)path;
html_fout << "'data:image/png;base64," << base64stream(fin) << "'"; f_pages.fs << "'data:image/png;base64," << base64stream(fin) << "'";
} }
else else
{ {
html_fout << str_fmt("p%x.png", pageNum); f_pages.fs << str_fmt("p%x.png", pageNum);
} }
} }
html_fout << ");background-position:0 0;background-size:" << pageWidth << "px " << pageHeight << "px;background-repeat:no-repeat;"; f_pages.fs << ");background-position:0 0;background-size:" << pageWidth << "px " << pageHeight << "px;background-repeat:no-repeat;";
} }
html_fout << "\">"; f_pages.fs << "\">";
draw_text_scale = 1.0; draw_text_scale = 1.0;
cur_font_info = install_font(nullptr); cur_font_info = install_font(nullptr);
@ -205,26 +205,26 @@ void HTMLRenderer::endPage() {
cur_doc->processLinks(this, pageNum); cur_doc->processLinks(this, pageNum);
// close box // close box
html_fout << "</div>"; f_pages.fs << "</div>";
// dump info for js // dump info for js
// TODO: create a function for this // TODO: create a function for this
// BE CAREFUL WITH ESCAPES // BE CAREFUL WITH ESCAPES
html_fout << "<div class=\"j\" data-data='{"; f_pages.fs << "<div class=\"j\" data-data='{";
//default CTM //default CTM
html_fout << "\"ctm\":["; f_pages.fs << "\"ctm\":[";
for(int i = 0; i < 6; ++i) for(int i = 0; i < 6; ++i)
{ {
if(i > 0) html_fout << ","; if(i > 0) f_pages.fs << ",";
html_fout << round(default_ctm[i]); f_pages.fs << round(default_ctm[i]);
} }
html_fout << "]"; f_pages.fs << "]";
html_fout << "}'></div>"; f_pages.fs << "}'></div>";
// close page // close page
html_fout << "</div></div>" << endl; f_pages.fs << "</div></div>" << endl;
} }
void HTMLRenderer::pre_process(PDFDoc * doc) void HTMLRenderer::pre_process(PDFDoc * doc)
@ -289,11 +289,11 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
if(param->single_html && (!param->split_pages)) if(param->single_html && (!param->split_pages))
tmp_files.add((char*)fn); tmp_files.add((char*)fn);
css_path = (char*)fn, f_css.path = (char*)fn,
css_fout.open(css_path, ofstream::binary); f_css.fs.open(f_css.path, ofstream::binary);
if(!css_fout) if(!f_css.fs)
throw string("Cannot open ") + (char*)fn + " for writing"; throw string("Cannot open ") + (char*)fn + " for writing";
set_stream_flags(css_fout); set_stream_flags(f_css.fs);
} }
// if split-pages is specified, open & close the file in the process loop // if split-pages is specified, open & close the file in the process loop
@ -310,21 +310,21 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
auto fn = str_fmt("%s/__pages", param->tmp_dir.c_str()); auto fn = str_fmt("%s/__pages", param->tmp_dir.c_str());
tmp_files.add((char*)fn); tmp_files.add((char*)fn);
html_path = (char*)fn; f_pages.path = (char*)fn;
html_fout.open(html_path, ofstream::binary); f_pages.fs.open(f_pages.path, ofstream::binary);
if(!html_fout) if(!f_pages.fs)
throw string("Cannot open ") + (char*)fn + " for writing"; throw string("Cannot open ") + (char*)fn + " for writing";
set_stream_flags(html_fout); set_stream_flags(f_pages.fs);
} }
} }
void HTMLRenderer::post_process() void HTMLRenderer::post_process()
{ {
// close files // close files
html_fout.close(); f_pages.fs.close();
css_fout.close(); f_css.fs.close();
//only when split-page, do we have some work left to do //only when split-page == 0, do we have some work left to do
if(param->split_pages) if(param->split_pages)
return; return;
@ -372,11 +372,11 @@ void HTMLRenderer::post_process()
{ {
if(line == "$css") if(line == "$css")
{ {
embed_file(output, css_path, ".css", false); embed_file(output, f_css.path, ".css", false);
} }
else if (line == "$pages") else if (line == "$pages")
{ {
ifstream fin(html_path, ifstream::binary); ifstream fin(f_pages.path, ifstream::binary);
if(!fin) if(!fin)
throw "Cannot open read the pages"; throw "Cannot open read the pages";
output << fin.rdbuf(); output << fin.rdbuf();

View File

@ -183,15 +183,15 @@ void HTMLRenderer::processLink(AnnotLink * al)
if(!dest_str.empty()) if(!dest_str.empty())
{ {
html_fout << "<a class=\"a\" href=\"" << dest_str << "\""; f_pages.fs << "<a class=\"a\" href=\"" << dest_str << "\"";
if(!dest_detail_str.empty()) if(!dest_detail_str.empty())
html_fout << " data-dest-detail='" << dest_detail_str << "'"; f_pages.fs << " data-dest-detail='" << dest_detail_str << "'";
html_fout << ">"; f_pages.fs << ">";
} }
html_fout << "<div class=\"Cd t" f_pages.fs << "<div class=\"Cd t"
<< install_transform_matrix(default_ctm) << install_transform_matrix(default_ctm)
<< "\" style=\""; << "\" style=\"";
@ -218,31 +218,31 @@ void HTMLRenderer::processLink(AnnotLink * al)
border_top_bottom_width, border_left_right_width); border_top_bottom_width, border_left_right_width);
if(abs(border_top_bottom_width - border_left_right_width) < EPS) if(abs(border_top_bottom_width - border_left_right_width) < EPS)
html_fout << "border-width:" << round(border_top_bottom_width) << "px;"; f_pages.fs << "border-width:" << round(border_top_bottom_width) << "px;";
else else
html_fout << "border-width:" << round(border_top_bottom_width) << "px " << round(border_left_right_width) << "px;"; f_pages.fs << "border-width:" << round(border_top_bottom_width) << "px " << round(border_left_right_width) << "px;";
} }
auto style = border->getStyle(); auto style = border->getStyle();
switch(style) switch(style)
{ {
case AnnotBorder::borderSolid: case AnnotBorder::borderSolid:
html_fout << "border-style:solid;"; f_pages.fs << "border-style:solid;";
break; break;
case AnnotBorder::borderDashed: case AnnotBorder::borderDashed:
html_fout << "border-style:dashed;"; f_pages.fs << "border-style:dashed;";
break; break;
case AnnotBorder::borderBeveled: case AnnotBorder::borderBeveled:
html_fout << "border-style:outset;"; f_pages.fs << "border-style:outset;";
break; break;
case AnnotBorder::borderInset: case AnnotBorder::borderInset:
html_fout << "border-style:inset;"; f_pages.fs << "border-style:inset;";
break; break;
case AnnotBorder::borderUnderlined: case AnnotBorder::borderUnderlined:
html_fout << "border-style:none;border-bottom-style:solid;"; f_pages.fs << "border-style:none;border-bottom-style:solid;";
break; break;
default: default:
cerr << "Warning:Unknown annotation border style: " << style << endl; cerr << "Warning:Unknown annotation border style: " << style << endl;
html_fout << "border-style:solid;"; f_pages.fs << "border-style:solid;";
} }
@ -260,36 +260,36 @@ void HTMLRenderer::processLink(AnnotLink * al)
r = g = b = 0; r = g = b = 0;
} }
html_fout << "border-color:rgb(" f_pages.fs << "border-color:rgb("
<< dec << (int)dblToByte(r) << "," << (int)dblToByte(g) << "," << (int)dblToByte(b) << hex << dec << (int)dblToByte(r) << "," << (int)dblToByte(g) << "," << (int)dblToByte(b) << hex
<< ");"; << ");";
} }
else else
{ {
html_fout << "border-style:none;"; f_pages.fs << "border-style:none;";
} }
} }
else else
{ {
html_fout << "border-style:none;"; f_pages.fs << "border-style:none;";
} }
tm_transform(default_ctm, x, y); tm_transform(default_ctm, x, y);
html_fout << "position:absolute;" f_pages.fs << "position:absolute;"
<< "left:" << round(x) << "px;" << "left:" << round(x) << "px;"
<< "bottom:" << round(y) << "px;" << "bottom:" << round(y) << "px;"
<< "width:" << round(w) << "px;" << "width:" << round(w) << "px;"
<< "height:" << round(h) << "px;"; << "height:" << round(h) << "px;";
// fix for IE // fix for IE
html_fout << "background-color:rgba(255,255,255,0.000001);"; f_pages.fs << "background-color:rgba(255,255,255,0.000001);";
html_fout << "\"></div>"; f_pages.fs << "\"></div>";
if(dest_str != "") if(dest_str != "")
{ {
html_fout << "</a>"; f_pages.fs << "</a>";
} }
} }