1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-09-17 21:16:03 +00:00
pdf2htmlEX/src/HTMLRenderer/general.cc

596 lines
16 KiB
C++
Raw Normal View History

2012-08-14 08:23:15 +00:00
/*
* general.cc
*
* Hanlding general stuffs
*
* by WangLu
* 2012.08.14
*/
2012-09-09 17:40:37 +00:00
#include <cstdio>
2012-09-12 15:26:14 +00:00
#include <ostream>
2012-09-22 14:47:44 +00:00
#include <cmath>
2012-09-05 10:43:37 +00:00
2012-08-14 08:23:15 +00:00
#include <splash/SplashBitmap.h>
2012-09-16 10:30:34 +00:00
#include <Link.h>
2012-08-14 08:23:15 +00:00
#include "HTMLRenderer.h"
#include "BackgroundRenderer.h"
2012-08-14 09:13:29 +00:00
#include "namespace.h"
2012-09-17 12:40:10 +00:00
#include "ffw.h"
2012-09-10 05:08:47 +00:00
#include "pdf2htmlEX-config.h"
2012-08-14 08:23:15 +00:00
2012-09-12 15:26:14 +00:00
namespace pdf2htmlEX {
using std::fixed;
2012-08-14 12:30:18 +00:00
using std::flush;
2012-09-12 15:26:14 +00:00
using std::ostream;
2012-09-22 14:47:44 +00:00
using std::max;
2012-08-14 12:30:18 +00:00
static void dummy(void *, enum ErrorCategory, int pos, char *)
2012-09-03 13:59:39 +00:00
{
}
2012-08-14 08:23:15 +00:00
HTMLRenderer::HTMLRenderer(const Param * param)
2012-09-16 10:30:34 +00:00
:OutputDev()
,line_opened(false)
2012-09-04 15:33:15 +00:00
,line_buf(this)
2012-09-22 06:41:29 +00:00
,preprocessor(param)
2012-08-14 08:23:15 +00:00
,image_count(0)
,param(param)
{
2012-09-17 12:40:10 +00:00
if(!(param->debug))
{
//disable error function of poppler
setErrorCallback(&dummy, nullptr);
}
2012-09-03 13:59:39 +00:00
2012-09-17 12:40:10 +00:00
ffw_init(param->debug);
2012-09-03 13:54:48 +00:00
cur_mapping = new int32_t [0x10000];
cur_mapping2 = new char* [0x100];
width_list = new int [0x10000];
2012-09-22 14:47:44 +00:00
/*
* determine scale factors
*/
scale_factor1 = max(param->zoom, param->font_size_multiplier);
scale_factor2 = (param->zoom) / scale_factor1;
2012-08-14 08:23:15 +00:00
}
HTMLRenderer::~HTMLRenderer()
2012-08-15 04:27:41 +00:00
{
2012-09-17 12:40:10 +00:00
ffw_fin();
2012-08-15 04:27:41 +00:00
clean_tmp_files();
2012-09-03 13:54:48 +00:00
delete [] cur_mapping;
delete [] cur_mapping2;
delete [] width_list;
2012-08-15 04:27:41 +00:00
}
2012-08-14 08:23:15 +00:00
2012-09-07 00:13:45 +00:00
static GBool annot_cb(Annot *, void *) {
return false;
};
2012-08-14 08:23:15 +00:00
void HTMLRenderer::process(PDFDoc *doc)
{
2012-09-16 10:30:34 +00:00
cur_doc = doc;
2012-08-14 08:23:15 +00:00
xref = doc->getXRef();
2012-09-06 16:58:23 +00:00
cerr << "Preprocessing: ";
2012-09-22 06:41:29 +00:00
preprocessor.process(doc);
2012-08-14 08:23:15 +00:00
2012-09-06 16:58:23 +00:00
cerr << "Working: ";
BackgroundRenderer * bg_renderer = nullptr;
2012-08-14 08:23:15 +00:00
if(param->process_nontext)
{
// Render non-text objects as image
// copied from poppler
SplashColor color;
color[0] = color[1] = color[2] = 255;
2012-08-14 12:30:18 +00:00
bg_renderer = new BackgroundRenderer(splashModeRGB8, 4, gFalse, color);
2012-08-14 08:23:15 +00:00
bg_renderer->startDoc(doc);
2012-08-14 12:30:18 +00:00
}
2012-08-14 08:23:15 +00:00
2012-08-14 12:30:18 +00:00
pre_process();
2012-09-07 00:13:45 +00:00
2012-08-14 12:30:18 +00:00
for(int i = param->first_page; i <= param->last_page ; ++i)
{
2012-09-12 15:26:14 +00:00
if(param->split_pages)
{
2012-09-12 16:16:34 +00:00
auto page_fn = str_fmt("%s/%s%d.page", param->dest_dir.c_str(), param->output_filename.c_str(), i);
2012-09-12 15:26:14 +00:00
html_fout.open((char*)page_fn, ofstream::binary);
2012-09-17 12:07:50 +00:00
if(!html_fout)
throw string("Cannot open ") + (char*)page_fn + " for writing";
2012-09-12 15:26:14 +00:00
fix_stream(html_fout);
}
2012-08-14 12:30:18 +00:00
if(param->process_nontext)
2012-08-14 08:23:15 +00:00
{
2012-08-15 13:26:13 +00:00
doc->displayPage(bg_renderer, i, param->h_dpi, param->v_dpi,
2012-09-16 10:30:34 +00:00
0, true, false, false,
nullptr, nullptr, &annot_cb, nullptr);
2012-08-15 04:27:41 +00:00
2012-09-07 17:09:09 +00:00
{
2012-09-12 16:16:34 +00:00
auto fn = str_fmt("%s/p%x.png", (param->single_html ? param->tmp_dir : param->dest_dir).c_str(), i);
2012-09-07 17:09:09 +00:00
if(param->single_html)
2012-09-09 16:21:46 +00:00
add_tmp_file((char*)fn);
bg_renderer->getBitmap()->writeImgFile(splashFormatPng,
(char*)fn,
param->h_dpi, param->v_dpi);
2012-09-07 17:09:09 +00:00
}
2012-08-14 08:23:15 +00:00
}
2012-08-14 12:30:18 +00:00
doc->displayPage(this, i, (param->zoom) * DEFAULT_DPI, (param->zoom) * DEFAULT_DPI,
2012-08-14 12:30:18 +00:00
0, true, false, false,
nullptr, nullptr, nullptr, nullptr);
2012-09-12 15:26:14 +00:00
if(param->split_pages)
{
html_fout.close();
}
2012-08-14 12:30:18 +00:00
cerr << "." << flush;
2012-08-14 08:23:15 +00:00
}
2012-09-12 15:26:14 +00:00
2012-08-14 12:30:18 +00:00
post_process();
if(bg_renderer)
delete bg_renderer;
cerr << endl;
2012-08-14 08:23:15 +00:00
}
2012-09-16 10:30:34 +00:00
void HTMLRenderer::setDefaultCTM(double *ctm)
{
memcpy(default_ctm, ctm, sizeof(default_ctm));
}
void HTMLRenderer::startPage(int pageNum, GfxState *state)
{
this->pageNum = pageNum;
this->pageWidth = state->getPageWidth();
this->pageHeight = state->getPageHeight();
assert((!line_opened) && "Open line in startPage detected!");
html_fout
2012-09-22 14:47:44 +00:00
<< "<div id=\"p" << pageNum << "\" class=\"p\" style=\"width:"
<< (pageWidth * scale_factor2) << "px;height:"
<< (pageHeight * scale_factor2) << "px;\">"
<< "<div id=\"b" << pageNum << "\" class=\"b\" style=\"width:"
<< pageWidth << "px;height:"
<< pageHeight << "px;";
/*
2012-09-22 14:47:44 +00:00
{
auto prefixes = {"", "-ms-", "-moz-", "-webkit-", "-o-"};
for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter)
html_fout << *iter << "transform:scale(" << scale_factor2 << ");";
}
*/
2012-09-16 10:30:34 +00:00
2012-09-17 07:28:52 +00:00
if(param->process_nontext)
2012-09-16 10:30:34 +00:00
{
2012-09-17 07:28:52 +00:00
html_fout << "background-image:url(";
2012-09-16 10:30:34 +00:00
{
2012-09-17 07:28:52 +00:00
if(param->single_html)
{
auto path = str_fmt("%s/p%x.png", param->tmp_dir.c_str(), pageNum);
ifstream fin((char*)path, ifstream::binary);
if(!fin)
throw string("Cannot read background image ") + (char*)path;
html_fout << "'data:image/png;base64," << base64stream(fin) << "'";
}
else
{
html_fout << str_fmt("p%x.png", pageNum);
}
2012-09-16 10:30:34 +00:00
}
2012-09-17 07:28:52 +00:00
html_fout << ");background-position:0 0;background-size:" << pageWidth << "px " << pageHeight << "px;background-repeat:no-repeat;";
2012-09-16 10:30:34 +00:00
}
2012-09-17 07:28:52 +00:00
html_fout << "\">";
2012-09-16 10:30:34 +00:00
draw_scale = 1.0;
cur_font_info = install_font(nullptr);
cur_font_size = draw_font_size = 0;
cur_fs_id = install_font_size(cur_font_size);
memcpy(cur_ctm, id_matrix, sizeof(cur_ctm));
memcpy(draw_ctm, id_matrix, sizeof(draw_ctm));
cur_tm_id = install_transform_matrix(draw_ctm);
cur_letter_space = cur_word_space = 0;
cur_ls_id = install_letter_space(cur_letter_space);
cur_ws_id = install_word_space(cur_word_space);
cur_color.r = cur_color.g = cur_color.b = 0;
cur_color_id = install_color(&cur_color);
cur_rise = 0;
cur_rise_id = install_rise(cur_rise);
cur_tx = cur_ty = 0;
draw_tx = draw_ty = 0;
reset_state_change();
all_changed = true;
}
void HTMLRenderer::endPage() {
close_line();
// process links before the page is closed
cur_doc->processLinks(this, pageNum);
2012-09-12 15:26:14 +00:00
// close page
2012-09-18 18:13:26 +00:00
html_fout << "</div></div>" << endl;
2012-09-16 10:30:34 +00:00
}
2012-08-14 13:23:33 +00:00
2012-09-16 10:30:34 +00:00
/*
* Based on pdftohtml from poppler
* TODO: CSS for link rectangles
2012-09-16 10:30:34 +00:00
*/
void HTMLRenderer::processLink(AnnotLink * al)
2012-09-16 10:30:34 +00:00
{
std::string dest_str;
auto action = al->getAction();
if(action)
2012-08-14 13:23:33 +00:00
{
2012-09-16 10:30:34 +00:00
auto kind = action->getKind();
switch(kind)
2012-09-07 17:09:09 +00:00
{
2012-09-16 10:30:34 +00:00
case actionGoTo:
{
auto catalog = cur_doc->getCatalog();
auto * real_action = dynamic_cast<LinkGoTo*>(action);
LinkDest * dest = nullptr;
if(auto _ = real_action->getDest())
dest = _->copy();
else if (auto _ = real_action->getNamedDest())
dest = catalog->findDest(_);
if(dest)
{
int pageno = 0;
if(dest->isPageRef())
{
auto pageref = dest->getPageRef();
pageno = catalog->findPage(pageref.num, pageref.gen);
}
else
{
pageno = dest->getPageNum();
}
delete dest;
if(pageno > 0)
2012-09-18 18:13:26 +00:00
dest_str = (char*)str_fmt("#p%x", pageno);
2012-09-16 10:30:34 +00:00
}
}
break;
case actionGoToR:
{
cerr << "TODO: actionGoToR is not implemented." << endl;
}
break;
case actionURI:
{
auto * real_action = dynamic_cast<LinkURI*>(action);
dest_str = real_action->getURI()->getCString();
}
break;
case actionLaunch:
{
cerr << "TODO: actionLaunch is not implemented." << endl;
}
break;
default:
cerr << "Warning: unknown annotation type: " << kind << endl;
break;
2012-09-07 17:09:09 +00:00
}
2012-08-14 13:23:33 +00:00
}
2012-09-16 10:30:34 +00:00
if(dest_str != "")
{
html_fout << "<a href=\"" << dest_str << "\">";
}
2012-09-17 18:49:23 +00:00
html_fout << "<div style=\"";
2012-09-16 12:47:42 +00:00
2012-09-17 18:49:23 +00:00
double width = 0;
2012-09-16 12:47:42 +00:00
auto * border = al->getBorder();
if(border)
{
2012-09-17 18:49:23 +00:00
width = border->getWidth() * (param->zoom);
if(width > 0)
2012-09-16 12:47:42 +00:00
{
2012-09-17 18:49:23 +00:00
html_fout << "border-width:" << _round(width) << "px;";
2012-09-16 12:47:42 +00:00
auto style = border->getStyle();
switch(style)
{
case AnnotBorder::borderSolid:
html_fout << "border-style:solid;";
break;
case AnnotBorder::borderDashed:
html_fout << "border-style:dashed;";
break;
case AnnotBorder::borderBeveled:
html_fout << "border-style:outset;";
break;
case AnnotBorder::borderInset:
html_fout << "border-style:inset;";
break;
case AnnotBorder::borderUnderlined:
html_fout << "border-style:none;border-bottom-style:solid;";
break;
default:
cerr << "Warning:Unknown annotation border style: " << style << endl;
html_fout << "border-style:solid;";
}
auto color = al->getColor();
double r,g,b;
if(color && (color->getSpace() == AnnotColor::colorRGB))
{
const double * v = color->getValues();
r = v[0];
g = v[1];
b = v[2];
}
else
{
r = g = b = 0;
}
html_fout << "border-color:rgb("
<< dec << (int)dblToByte(r) << "," << (int)dblToByte(g) << "," << (int)dblToByte(b) << hex
<< ");";
}
else
{
html_fout << "border-style:none;";
}
}
else
{
html_fout << "border-style:none;";
}
// fix for IE
html_fout << "background-color:rgba(255,255,255,0.000001);";
2012-09-17 18:49:23 +00:00
double x1,x2,y1,y2;
al->getRect(&x1, &y1, &x2, &y2);
x1 = default_ctm[0] * x1 + default_ctm[2] * y1 + default_ctm[4];
y1 = default_ctm[1] * x1 + default_ctm[3] * y1 + default_ctm[5];
x2 = default_ctm[0] * x2 + default_ctm[2] * y2 + default_ctm[4];
y2 = default_ctm[1] * x2 + default_ctm[3] * y2 + default_ctm[5];
// TODO: check overlap when x2-x1-width<0 or y2-y1-width<0
html_fout << "position:absolute;"
<< "left:" << _round(x1 - width/2) << "px;"
<< "bottom:" << _round(y1 - width/2) << "px;"
<< "width:" << _round(x2-x1-width) << "px;"
<< "height:" << _round(y2-y1-width) << "px;";
2012-09-18 18:13:26 +00:00
html_fout << "background-color:rgb(0,0,0,0);";
2012-09-17 18:49:23 +00:00
2012-09-16 12:47:42 +00:00
html_fout << "\"></div>";
2012-08-14 08:23:15 +00:00
2012-09-16 10:30:34 +00:00
if(dest_str != "")
{
html_fout << "</a>";
}
2012-08-14 08:23:15 +00:00
}
void HTMLRenderer::pre_process()
{
// we may output utf8 characters, so always use binary
{
/*
* If single-html && !split-pages
* we have to keep the generated css file into a temporary place
* and embed it into the main html later
*
*
* If single-html && split-page
* as there's no place to embed the css file, just leave it alone (into param->dest_dir)
*
* If !single-html
* leave it in param->dest_dir
*/
auto fn = (param->single_html && (!param->split_pages))
? str_fmt("%s/__css", param->tmp_dir.c_str())
: str_fmt("%s/%s", param->dest_dir.c_str(), param->css_filename.c_str());
if(param->single_html && (!param->split_pages))
add_tmp_file((char*)fn);
css_path = (char*)fn,
css_fout.open(css_path, ofstream::binary);
2012-09-17 12:07:50 +00:00
if(!css_fout)
throw string("Cannot open ") + (char*)fn + " for writing";
fix_stream(css_fout);
}
// if split-pages is specified, open & close the file in the process loop
// if not, open the file here:
if(!param->split_pages)
{
/*
* If single-html
* we have to keep the html file (for page) into a temporary place
* because we'll have to embed css before it
*
* Otherwise just generate it
*/
auto fn = str_fmt("%s/__pages", param->tmp_dir.c_str());
add_tmp_file((char*)fn);
html_path = (char*)fn;
html_fout.open(html_path, ofstream::binary);
2012-09-17 12:07:50 +00:00
if(!html_fout)
throw string("Cannot open ") + (char*)fn + " for writing";
fix_stream(html_fout);
}
}
void HTMLRenderer::post_process()
{
// close files
html_fout.close();
css_fout.close();
//only when split-page, do we have some work left to do
if(param->split_pages)
return;
2012-09-17 12:07:50 +00:00
ofstream output;
{
auto fn = str_fmt("%s/%s", param->dest_dir.c_str(), param->output_filename.c_str());
output.open((char*)fn, ofstream::binary);
if(!output)
throw string("Cannot open ") + (char*)fn + " for writing";
fix_stream(output);
}
// apply manifest
2012-09-17 12:07:50 +00:00
ifstream manifest_fin((char*)str_fmt("%s/%s", param->data_dir.c_str(), MANIFEST_FILENAME.c_str()), ifstream::binary);
if(!manifest_fin)
throw "Cannot open the manifest file";
bool embed_string = false;
string line;
while(getline(manifest_fin, line))
{
if(line == "\"\"\"")
{
embed_string = !embed_string;
continue;
}
if(embed_string)
{
output << line << endl;
continue;
}
if(line.empty() || line[0] == '#')
continue;
if(line[0] == '@')
{
embed_file(output, param->data_dir + "/" + line.substr(1), "", true);
continue;
}
if(line[0] == '$')
{
if(line == "$css")
{
embed_file(output, css_path, ".css", false);
}
else if (line == "$pages")
{
ifstream fin(html_path, ifstream::binary);
if(!fin)
throw "Cannot open read the pages";
output << fin.rdbuf();
}
else
{
cerr << "Warning: unknown line in manifest: " << line << endl;
}
continue;
}
cerr << "Warning: unknown line in manifest: " << line << endl;
}
}
void HTMLRenderer::fix_stream (std::ostream & out)
{
2012-09-14 07:18:29 +00:00
out << hex;
}
2012-08-15 04:27:41 +00:00
void HTMLRenderer::add_tmp_file(const string & fn)
{
2012-08-15 07:43:49 +00:00
if(!param->clean_tmp)
return;
2012-08-15 04:27:41 +00:00
if(tmp_files.insert(fn).second && param->debug)
cerr << "Add new temporary file: " << fn << endl;
}
2012-08-14 08:23:15 +00:00
2012-08-15 04:27:41 +00:00
void HTMLRenderer::clean_tmp_files()
{
2012-08-15 07:43:49 +00:00
if(!param->clean_tmp)
return;
2012-09-09 06:48:10 +00:00
for(auto iter = tmp_files.begin(); iter != tmp_files.end(); ++iter)
2012-08-15 04:27:41 +00:00
{
2012-09-09 17:40:37 +00:00
const string & fn = *iter;
remove(fn.c_str());
2012-08-15 04:27:41 +00:00
if(param->debug)
2012-09-09 17:40:37 +00:00
cerr << "Remove temporary file: " << fn << endl;
2012-08-15 04:27:41 +00:00
}
2012-09-09 17:40:37 +00:00
2012-09-12 16:16:34 +00:00
remove(param->tmp_dir.c_str());
2012-09-09 17:40:37 +00:00
if(param->debug)
2012-09-12 16:16:34 +00:00
cerr << "Remove temporary directory: " << param->tmp_dir << endl;
2012-08-15 04:27:41 +00:00
}
void HTMLRenderer::embed_file(ostream & out, const string & path, const string & type, bool copy)
2012-09-12 15:26:14 +00:00
{
string fn = get_filename(path);
string suffix = (type == "") ? get_suffix(fn) : type;
2012-09-12 15:26:14 +00:00
auto iter = EMBED_STRING_MAP.find(make_pair(suffix, (bool)param->single_html));
2012-09-12 15:26:14 +00:00
if(iter == EMBED_STRING_MAP.end())
{
cerr << "Warning: unknown suffix: " << suffix << endl;
2012-09-12 15:26:14 +00:00
return;
}
if(param->single_html)
{
ifstream fin(path, ifstream::binary);
if(!fin)
2012-09-17 12:07:50 +00:00
throw string("Cannot open file ") + path + " for embedding";
out << iter->second.first << endl
<< fin.rdbuf()
2012-09-12 15:26:14 +00:00
<< iter->second.second << endl;
}
else
{
out << iter->second.first
2012-09-12 15:26:14 +00:00
<< fn
<< iter->second.second << endl;
if(copy)
{
ifstream fin(path, ifstream::binary);
if(!fin)
throw string("Cannot copy file: ") + path;
2012-09-17 12:07:50 +00:00
auto out_path = param->dest_dir + "/" + fn;
ofstream out(out_path, ofstream::binary);
if(!out)
throw string("Cannot open file ") + path + " for embedding";
out << fin.rdbuf();
}
2012-09-12 15:26:14 +00:00
}
}
const std::string HTMLRenderer::MANIFEST_FILENAME = "manifest";
}// namespace pdf2htmlEX