mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 13:00:08 +00:00
refactor file objects
This commit is contained in:
parent
014ef7ecc2
commit
d38697d890
@ -39,6 +39,13 @@ new pdf2htmlEX.Viewer('pdf-main');
|
||||
<title></title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="pdf-outline">
|
||||
"""
|
||||
|
||||
$outlines
|
||||
|
||||
"""
|
||||
</div>
|
||||
<div id="pdf-main">
|
||||
"""
|
||||
|
||||
|
@ -428,8 +428,11 @@ class HTMLRenderer : public OutputDev
|
||||
std::map<double, long long> left_map;
|
||||
|
||||
const Param * param;
|
||||
std::ofstream html_fout, css_fout;
|
||||
std::string html_path, css_path;
|
||||
|
||||
struct {
|
||||
std::ofstream fs;
|
||||
std::string path;
|
||||
} f_pages, f_css, f_outlines;
|
||||
|
||||
static const std::string MANIFEST_FILENAME;
|
||||
};
|
||||
|
@ -83,7 +83,7 @@ void HTMLRenderer::TextLineBuffer::flush(void)
|
||||
max_ascent = max<double>(max_ascent, s.ascent * s.draw_font_size);
|
||||
}
|
||||
|
||||
ostream & out = renderer->html_fout;
|
||||
ostream & out = renderer->f_pages.fs;
|
||||
out << "<div style=\""
|
||||
<< "bottom:" << round(y) << "px;"
|
||||
<< "\""
|
||||
|
@ -372,48 +372,48 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, co
|
||||
}
|
||||
}
|
||||
|
||||
html_fout << "<div class=\"Cd t" << install_transform_matrix(new_tm) << "\" style=\"";
|
||||
f_pages.fs << "<div class=\"Cd t" << install_transform_matrix(new_tm) << "\" style=\"";
|
||||
|
||||
if(line_color)
|
||||
{
|
||||
html_fout << "border-color:" << *line_color << ";";
|
||||
f_pages.fs << "border-color:" << *line_color << ";";
|
||||
|
||||
html_fout << "border-width:";
|
||||
f_pages.fs << "border-width:";
|
||||
for(int i = 0; i < line_width_count; ++i)
|
||||
{
|
||||
if(i > 0) html_fout << ' ';
|
||||
if(i > 0) f_pages.fs << ' ';
|
||||
|
||||
double lw = line_width_array[i] * scale;
|
||||
html_fout << round(lw);
|
||||
if(is_positive(lw)) html_fout << "px";
|
||||
f_pages.fs << round(lw);
|
||||
if(is_positive(lw)) f_pages.fs << "px";
|
||||
}
|
||||
html_fout << ";";
|
||||
f_pages.fs << ";";
|
||||
}
|
||||
else
|
||||
{
|
||||
html_fout << "border:none;";
|
||||
f_pages.fs << "border:none;";
|
||||
}
|
||||
|
||||
if(fill_color)
|
||||
{
|
||||
html_fout << "background-color:" << (*fill_color) << ";";
|
||||
f_pages.fs << "background-color:" << (*fill_color) << ";";
|
||||
}
|
||||
else
|
||||
{
|
||||
html_fout << "background-color:transparent;";
|
||||
f_pages.fs << "background-color:transparent;";
|
||||
}
|
||||
|
||||
if(style_function)
|
||||
{
|
||||
style_function(style_function_data, html_fout);
|
||||
style_function(style_function_data, f_pages.fs);
|
||||
}
|
||||
|
||||
html_fout << "bottom:" << round(y) << "px;"
|
||||
f_pages.fs << "bottom:" << round(y) << "px;"
|
||||
<< "left:" << round(x) << "px;"
|
||||
<< "width:" << round(w * scale) << "px;"
|
||||
<< "height:" << round(h * scale) << "px;";
|
||||
|
||||
html_fout << "\"></div>";
|
||||
f_pages.fs << "\"></div>";
|
||||
}
|
||||
|
||||
|
||||
|
@ -20,7 +20,7 @@ namespace pdf2htmlEX {
|
||||
|
||||
void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suffix, const string & fontfileformat, GfxFont * font)
|
||||
{
|
||||
css_fout << "@font-face{"
|
||||
f_css.fs << "@font-face{"
|
||||
<< "font-family:f" << info.id << ";"
|
||||
<< "src:url(";
|
||||
|
||||
@ -32,15 +32,15 @@ void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suff
|
||||
ifstream fin(path, ifstream::binary);
|
||||
if(!fin)
|
||||
throw "Cannot locate font file: " + path;
|
||||
css_fout << "'data:font/" + fontfileformat + ";base64," << base64stream(fin) << "'";
|
||||
f_css.fs << "'data:font/" + fontfileformat + ";base64," << base64stream(fin) << "'";
|
||||
}
|
||||
else
|
||||
{
|
||||
css_fout << (char*)fn;
|
||||
f_css.fs << (char*)fn;
|
||||
}
|
||||
}
|
||||
|
||||
css_fout << ")"
|
||||
f_css.fs << ")"
|
||||
<< "format(\"" << fontfileformat << "\");"
|
||||
<< "}" // end of @font-face
|
||||
<< ".f" << info.id << "{"
|
||||
@ -66,45 +66,45 @@ static string general_font_family(GfxFont * font)
|
||||
// TODO: this function is called when some font is unable to process, may use the name there as a hint
|
||||
void HTMLRenderer::export_remote_default_font(long long fn_id)
|
||||
{
|
||||
css_fout << ".f" << fn_id << "{font-family:sans-serif;visibility:hidden;}" << endl;
|
||||
f_css.fs << ".f" << fn_id << "{font-family:sans-serif;visibility:hidden;}" << endl;
|
||||
}
|
||||
|
||||
void HTMLRenderer::export_local_font(const FontInfo & info, GfxFont * font, const string & original_font_name, const string & cssfont)
|
||||
{
|
||||
css_fout << ".f" << info.id << "{";
|
||||
css_fout << "font-family:" << ((cssfont == "") ? (original_font_name + "," + general_font_family(font)) : cssfont) << ";";
|
||||
f_css.fs << ".f" << info.id << "{";
|
||||
f_css.fs << "font-family:" << ((cssfont == "") ? (original_font_name + "," + general_font_family(font)) : cssfont) << ";";
|
||||
|
||||
string fn = original_font_name;
|
||||
for(auto iter = fn.begin(); iter != fn.end(); ++iter)
|
||||
*iter = tolower(*iter);
|
||||
|
||||
if(font->isBold() || (fn.find("bold") != string::npos))
|
||||
css_fout << "font-weight:bold;";
|
||||
f_css.fs << "font-weight:bold;";
|
||||
else
|
||||
css_fout << "font-weight:normal;";
|
||||
f_css.fs << "font-weight:normal;";
|
||||
|
||||
if(fn.find("oblique") != string::npos)
|
||||
css_fout << "font-style:oblique;";
|
||||
f_css.fs << "font-style:oblique;";
|
||||
else if(font->isItalic() || (fn.find("italic") != string::npos))
|
||||
css_fout << "font-style:italic;";
|
||||
f_css.fs << "font-style:italic;";
|
||||
else
|
||||
css_fout << "font-style:normal;";
|
||||
f_css.fs << "font-style:normal;";
|
||||
|
||||
css_fout << "line-height:" << round(info.ascent - info.descent) << ";";
|
||||
f_css.fs << "line-height:" << round(info.ascent - info.descent) << ";";
|
||||
|
||||
css_fout << "visibility:visible;";
|
||||
f_css.fs << "visibility:visible;";
|
||||
|
||||
css_fout << "}" << endl;
|
||||
f_css.fs << "}" << endl;
|
||||
}
|
||||
|
||||
void HTMLRenderer::export_font_size (long long fs_id, double font_size)
|
||||
{
|
||||
css_fout << ".s" << fs_id << "{font-size:" << round(font_size) << "px;}" << endl;
|
||||
f_css.fs << ".s" << fs_id << "{font-size:" << round(font_size) << "px;}" << endl;
|
||||
}
|
||||
|
||||
void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm)
|
||||
{
|
||||
css_fout << ".t" << tm_id << "{";
|
||||
f_css.fs << ".t" << tm_id << "{";
|
||||
|
||||
// always ignore tm[4] and tm[5] because
|
||||
// we have already shifted the origin
|
||||
@ -114,7 +114,7 @@ void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm)
|
||||
{
|
||||
auto prefixes = {"", "-ms-", "-moz-", "-webkit-", "-o-"};
|
||||
for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter)
|
||||
css_fout << *iter << "transform:none;";
|
||||
f_css.fs << *iter << "transform:none;";
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -122,53 +122,53 @@ void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm)
|
||||
for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter)
|
||||
{
|
||||
// PDF use a different coordinate system from Web
|
||||
css_fout << *iter << "transform:matrix("
|
||||
f_css.fs << *iter << "transform:matrix("
|
||||
<< round(tm[0]) << ','
|
||||
<< round(-tm[1]) << ','
|
||||
<< round(-tm[2]) << ','
|
||||
<< round(tm[3]) << ',';
|
||||
|
||||
css_fout << "0,0);";
|
||||
f_css.fs << "0,0);";
|
||||
}
|
||||
}
|
||||
css_fout << "}" << endl;
|
||||
f_css.fs << "}" << endl;
|
||||
}
|
||||
|
||||
void HTMLRenderer::export_letter_space (long long ls_id, double letter_space)
|
||||
{
|
||||
css_fout << ".l" << ls_id << "{letter-spacing:" << round(letter_space) << "px;}" << endl;
|
||||
f_css.fs << ".l" << ls_id << "{letter-spacing:" << round(letter_space) << "px;}" << endl;
|
||||
}
|
||||
|
||||
void HTMLRenderer::export_word_space (long long ws_id, double word_space)
|
||||
{
|
||||
css_fout << ".w" << ws_id << "{word-spacing:" << round(word_space) << "px;}" << endl;
|
||||
f_css.fs << ".w" << ws_id << "{word-spacing:" << round(word_space) << "px;}" << endl;
|
||||
}
|
||||
|
||||
void HTMLRenderer::export_color (long long color_id, const GfxRGB * rgb)
|
||||
{
|
||||
css_fout << ".c" << color_id << "{color:" << (*rgb) << ";}" << endl;
|
||||
f_css.fs << ".c" << color_id << "{color:" << (*rgb) << ";}" << endl;
|
||||
}
|
||||
|
||||
void HTMLRenderer::export_whitespace (long long ws_id, double ws_width)
|
||||
{
|
||||
if(ws_width > 0)
|
||||
css_fout << "._" << ws_id << "{display:inline-block;width:" << round(ws_width) << "px;}" << endl;
|
||||
f_css.fs << "._" << ws_id << "{display:inline-block;width:" << round(ws_width) << "px;}" << endl;
|
||||
else
|
||||
css_fout << "._" << ws_id << "{display:inline;margin-left:" << round(ws_width) << "px;}" << endl;
|
||||
f_css.fs << "._" << ws_id << "{display:inline;margin-left:" << round(ws_width) << "px;}" << endl;
|
||||
}
|
||||
|
||||
void HTMLRenderer::export_rise (long long rise_id, double rise)
|
||||
{
|
||||
css_fout << ".r" << rise_id << "{top:" << round(-rise) << "px;}" << endl;
|
||||
f_css.fs << ".r" << rise_id << "{top:" << round(-rise) << "px;}" << endl;
|
||||
}
|
||||
|
||||
void HTMLRenderer::export_height (long long height_id, double height)
|
||||
{
|
||||
css_fout << ".h" << height_id << "{height:" << round(height) << "px;}" << endl;
|
||||
f_css.fs << ".h" << height_id << "{height:" << round(height) << "px;}" << endl;
|
||||
}
|
||||
void HTMLRenderer::export_left (long long left_id, double left)
|
||||
{
|
||||
css_fout << ".L" << left_id << "{left:" << round(left) << "px;}" << endl;
|
||||
f_css.fs << ".L" << left_id << "{left:" << round(left) << "px;}" << endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -89,10 +89,10 @@ void HTMLRenderer::process(PDFDoc *doc)
|
||||
if(param->split_pages)
|
||||
{
|
||||
auto page_fn = str_fmt("%s/%s%d.page", param->dest_dir.c_str(), param->output_filename.c_str(), i);
|
||||
html_fout.open((char*)page_fn, ofstream::binary);
|
||||
if(!html_fout)
|
||||
f_pages.fs.open((char*)page_fn, ofstream::binary);
|
||||
if(!f_pages.fs)
|
||||
throw string("Cannot open ") + (char*)page_fn + " for writing";
|
||||
set_stream_flags(html_fout);
|
||||
set_stream_flags(f_pages.fs);
|
||||
}
|
||||
|
||||
if(param->process_nontext)
|
||||
@ -113,7 +113,7 @@ void HTMLRenderer::process(PDFDoc *doc)
|
||||
|
||||
if(param->split_pages)
|
||||
{
|
||||
html_fout.close();
|
||||
f_pages.fs.close();
|
||||
}
|
||||
}
|
||||
if(page_count >= 0)
|
||||
@ -141,7 +141,7 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state)
|
||||
|
||||
assert((!line_opened) && "Open line in startPage detected!");
|
||||
|
||||
html_fout
|
||||
f_pages.fs
|
||||
<< "<div class=\"d\" style=\"width:"
|
||||
<< (pageWidth) << "px;height:"
|
||||
<< (pageHeight) << "px;\">"
|
||||
@ -150,7 +150,7 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state)
|
||||
|
||||
if(param->process_nontext)
|
||||
{
|
||||
html_fout << "background-image:url(";
|
||||
f_pages.fs << "background-image:url(";
|
||||
|
||||
{
|
||||
if(param->single_html)
|
||||
@ -159,18 +159,18 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state)
|
||||
ifstream fin((char*)path, ifstream::binary);
|
||||
if(!fin)
|
||||
throw string("Cannot read background image ") + (char*)path;
|
||||
html_fout << "'data:image/png;base64," << base64stream(fin) << "'";
|
||||
f_pages.fs << "'data:image/png;base64," << base64stream(fin) << "'";
|
||||
}
|
||||
else
|
||||
{
|
||||
html_fout << str_fmt("p%x.png", pageNum);
|
||||
f_pages.fs << str_fmt("p%x.png", pageNum);
|
||||
}
|
||||
}
|
||||
|
||||
html_fout << ");background-position:0 0;background-size:" << pageWidth << "px " << pageHeight << "px;background-repeat:no-repeat;";
|
||||
f_pages.fs << ");background-position:0 0;background-size:" << pageWidth << "px " << pageHeight << "px;background-repeat:no-repeat;";
|
||||
}
|
||||
|
||||
html_fout << "\">";
|
||||
f_pages.fs << "\">";
|
||||
draw_text_scale = 1.0;
|
||||
|
||||
cur_font_info = install_font(nullptr);
|
||||
@ -205,26 +205,26 @@ void HTMLRenderer::endPage() {
|
||||
cur_doc->processLinks(this, pageNum);
|
||||
|
||||
// close box
|
||||
html_fout << "</div>";
|
||||
f_pages.fs << "</div>";
|
||||
|
||||
// dump info for js
|
||||
// TODO: create a function for this
|
||||
// BE CAREFUL WITH ESCAPES
|
||||
html_fout << "<div class=\"j\" data-data='{";
|
||||
f_pages.fs << "<div class=\"j\" data-data='{";
|
||||
|
||||
//default CTM
|
||||
html_fout << "\"ctm\":[";
|
||||
f_pages.fs << "\"ctm\":[";
|
||||
for(int i = 0; i < 6; ++i)
|
||||
{
|
||||
if(i > 0) html_fout << ",";
|
||||
html_fout << round(default_ctm[i]);
|
||||
if(i > 0) f_pages.fs << ",";
|
||||
f_pages.fs << round(default_ctm[i]);
|
||||
}
|
||||
html_fout << "]";
|
||||
f_pages.fs << "]";
|
||||
|
||||
html_fout << "}'></div>";
|
||||
f_pages.fs << "}'></div>";
|
||||
|
||||
// close page
|
||||
html_fout << "</div></div>" << endl;
|
||||
f_pages.fs << "</div></div>" << endl;
|
||||
}
|
||||
|
||||
void HTMLRenderer::pre_process(PDFDoc * doc)
|
||||
@ -289,11 +289,11 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
|
||||
if(param->single_html && (!param->split_pages))
|
||||
tmp_files.add((char*)fn);
|
||||
|
||||
css_path = (char*)fn,
|
||||
css_fout.open(css_path, ofstream::binary);
|
||||
if(!css_fout)
|
||||
f_css.path = (char*)fn,
|
||||
f_css.fs.open(f_css.path, ofstream::binary);
|
||||
if(!f_css.fs)
|
||||
throw string("Cannot open ") + (char*)fn + " for writing";
|
||||
set_stream_flags(css_fout);
|
||||
set_stream_flags(f_css.fs);
|
||||
}
|
||||
|
||||
// if split-pages is specified, open & close the file in the process loop
|
||||
@ -310,21 +310,21 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
|
||||
auto fn = str_fmt("%s/__pages", param->tmp_dir.c_str());
|
||||
tmp_files.add((char*)fn);
|
||||
|
||||
html_path = (char*)fn;
|
||||
html_fout.open(html_path, ofstream::binary);
|
||||
if(!html_fout)
|
||||
f_pages.path = (char*)fn;
|
||||
f_pages.fs.open(f_pages.path, ofstream::binary);
|
||||
if(!f_pages.fs)
|
||||
throw string("Cannot open ") + (char*)fn + " for writing";
|
||||
set_stream_flags(html_fout);
|
||||
set_stream_flags(f_pages.fs);
|
||||
}
|
||||
}
|
||||
|
||||
void HTMLRenderer::post_process()
|
||||
{
|
||||
// close files
|
||||
html_fout.close();
|
||||
css_fout.close();
|
||||
f_pages.fs.close();
|
||||
f_css.fs.close();
|
||||
|
||||
//only when split-page, do we have some work left to do
|
||||
//only when split-page == 0, do we have some work left to do
|
||||
if(param->split_pages)
|
||||
return;
|
||||
|
||||
@ -372,11 +372,11 @@ void HTMLRenderer::post_process()
|
||||
{
|
||||
if(line == "$css")
|
||||
{
|
||||
embed_file(output, css_path, ".css", false);
|
||||
embed_file(output, f_css.path, ".css", false);
|
||||
}
|
||||
else if (line == "$pages")
|
||||
{
|
||||
ifstream fin(html_path, ifstream::binary);
|
||||
ifstream fin(f_pages.path, ifstream::binary);
|
||||
if(!fin)
|
||||
throw "Cannot open read the pages";
|
||||
output << fin.rdbuf();
|
||||
|
@ -183,15 +183,15 @@ void HTMLRenderer::processLink(AnnotLink * al)
|
||||
|
||||
if(!dest_str.empty())
|
||||
{
|
||||
html_fout << "<a class=\"a\" href=\"" << dest_str << "\"";
|
||||
f_pages.fs << "<a class=\"a\" href=\"" << dest_str << "\"";
|
||||
|
||||
if(!dest_detail_str.empty())
|
||||
html_fout << " data-dest-detail='" << dest_detail_str << "'";
|
||||
f_pages.fs << " data-dest-detail='" << dest_detail_str << "'";
|
||||
|
||||
html_fout << ">";
|
||||
f_pages.fs << ">";
|
||||
}
|
||||
|
||||
html_fout << "<div class=\"Cd t"
|
||||
f_pages.fs << "<div class=\"Cd t"
|
||||
<< install_transform_matrix(default_ctm)
|
||||
<< "\" style=\"";
|
||||
|
||||
@ -218,31 +218,31 @@ void HTMLRenderer::processLink(AnnotLink * al)
|
||||
border_top_bottom_width, border_left_right_width);
|
||||
|
||||
if(abs(border_top_bottom_width - border_left_right_width) < EPS)
|
||||
html_fout << "border-width:" << round(border_top_bottom_width) << "px;";
|
||||
f_pages.fs << "border-width:" << round(border_top_bottom_width) << "px;";
|
||||
else
|
||||
html_fout << "border-width:" << round(border_top_bottom_width) << "px " << round(border_left_right_width) << "px;";
|
||||
f_pages.fs << "border-width:" << round(border_top_bottom_width) << "px " << round(border_left_right_width) << "px;";
|
||||
}
|
||||
auto style = border->getStyle();
|
||||
switch(style)
|
||||
{
|
||||
case AnnotBorder::borderSolid:
|
||||
html_fout << "border-style:solid;";
|
||||
f_pages.fs << "border-style:solid;";
|
||||
break;
|
||||
case AnnotBorder::borderDashed:
|
||||
html_fout << "border-style:dashed;";
|
||||
f_pages.fs << "border-style:dashed;";
|
||||
break;
|
||||
case AnnotBorder::borderBeveled:
|
||||
html_fout << "border-style:outset;";
|
||||
f_pages.fs << "border-style:outset;";
|
||||
break;
|
||||
case AnnotBorder::borderInset:
|
||||
html_fout << "border-style:inset;";
|
||||
f_pages.fs << "border-style:inset;";
|
||||
break;
|
||||
case AnnotBorder::borderUnderlined:
|
||||
html_fout << "border-style:none;border-bottom-style:solid;";
|
||||
f_pages.fs << "border-style:none;border-bottom-style:solid;";
|
||||
break;
|
||||
default:
|
||||
cerr << "Warning:Unknown annotation border style: " << style << endl;
|
||||
html_fout << "border-style:solid;";
|
||||
f_pages.fs << "border-style:solid;";
|
||||
}
|
||||
|
||||
|
||||
@ -260,36 +260,36 @@ void HTMLRenderer::processLink(AnnotLink * al)
|
||||
r = g = b = 0;
|
||||
}
|
||||
|
||||
html_fout << "border-color:rgb("
|
||||
f_pages.fs << "border-color:rgb("
|
||||
<< dec << (int)dblToByte(r) << "," << (int)dblToByte(g) << "," << (int)dblToByte(b) << hex
|
||||
<< ");";
|
||||
}
|
||||
else
|
||||
{
|
||||
html_fout << "border-style:none;";
|
||||
f_pages.fs << "border-style:none;";
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
html_fout << "border-style:none;";
|
||||
f_pages.fs << "border-style:none;";
|
||||
}
|
||||
|
||||
tm_transform(default_ctm, x, y);
|
||||
|
||||
html_fout << "position:absolute;"
|
||||
f_pages.fs << "position:absolute;"
|
||||
<< "left:" << round(x) << "px;"
|
||||
<< "bottom:" << round(y) << "px;"
|
||||
<< "width:" << round(w) << "px;"
|
||||
<< "height:" << round(h) << "px;";
|
||||
|
||||
// fix for IE
|
||||
html_fout << "background-color:rgba(255,255,255,0.000001);";
|
||||
f_pages.fs << "background-color:rgba(255,255,255,0.000001);";
|
||||
|
||||
html_fout << "\"></div>";
|
||||
f_pages.fs << "\"></div>";
|
||||
|
||||
if(dest_str != "")
|
||||
{
|
||||
html_fout << "</a>";
|
||||
f_pages.fs << "</a>";
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user