1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-09-17 21:16:03 +00:00
pdf2htmlEX/src/HTMLRenderer/general.cc

242 lines
6.6 KiB
C++
Raw Normal View History

2012-08-14 08:23:15 +00:00
/*
* general.cc
*
* Hanlding general stuffs
*
* by WangLu
* 2012.08.14
*/
#include <splash/SplashBitmap.h>
#include "HTMLRenderer.h"
#include "BackgroundRenderer.h"
#include "config.h"
2012-08-14 09:13:29 +00:00
#include "namespace.h"
2012-08-14 08:23:15 +00:00
2012-08-14 12:30:18 +00:00
using std::flush;
2012-08-15 04:27:41 +00:00
using boost::filesystem::remove;
using boost::filesystem::filesystem_error;
2012-08-14 12:30:18 +00:00
2012-08-14 08:23:15 +00:00
HTMLRenderer::HTMLRenderer(const Param * param)
:line_status(LineStatus::NONE)
2012-08-14 08:23:15 +00:00
,image_count(0)
,param(param)
2012-08-14 09:50:16 +00:00
,dest_dir(param->dest_dir)
2012-08-15 06:27:59 +00:00
,tmp_dir(param->tmp_dir)
2012-08-14 08:23:15 +00:00
{
}
HTMLRenderer::~HTMLRenderer()
2012-08-15 04:27:41 +00:00
{
clean_tmp_files();
}
2012-08-14 08:23:15 +00:00
void HTMLRenderer::process(PDFDoc *doc)
{
2012-08-14 12:30:18 +00:00
cerr << "Working: ";
2012-08-14 08:23:15 +00:00
xref = doc->getXRef();
2012-08-14 12:30:18 +00:00
BackgroundRenderer * bg_renderer = nullptr;
2012-08-14 08:23:15 +00:00
if(param->process_nontext)
{
// Render non-text objects as image
// copied from poppler
SplashColor color;
color[0] = color[1] = color[2] = 255;
2012-08-14 12:30:18 +00:00
bg_renderer = new BackgroundRenderer(splashModeRGB8, 4, gFalse, color);
2012-08-14 08:23:15 +00:00
bg_renderer->startDoc(doc);
2012-08-14 12:30:18 +00:00
}
2012-08-14 08:23:15 +00:00
2012-08-14 12:30:18 +00:00
pre_process();
for(int i = param->first_page; i <= param->last_page ; ++i)
{
if(param->process_nontext)
2012-08-14 08:23:15 +00:00
{
2012-08-15 13:26:13 +00:00
doc->displayPage(bg_renderer, i, param->h_dpi, param->v_dpi,
2012-08-14 08:23:15 +00:00
0, true, false, false,
nullptr, nullptr, nullptr, nullptr);
2012-08-15 04:27:41 +00:00
string fn = (format("p%|1$x|.png")%i).str();
2012-08-15 13:26:13 +00:00
bg_renderer->getBitmap()->writeImgFile(splashFormatPng, (char*)((param->single_html ? tmp_dir : dest_dir) / fn) .c_str(), param->h_dpi, param->v_dpi);
2012-08-15 04:27:41 +00:00
if(param->single_html)
add_tmp_file(fn);
2012-08-14 08:23:15 +00:00
}
2012-08-14 12:30:18 +00:00
2012-08-15 13:26:13 +00:00
doc->displayPage(this, i, param->zoom * DEFAULT_DPI, param->zoom * DEFAULT_DPI,
2012-08-14 12:30:18 +00:00
0, true, false, false,
nullptr, nullptr, nullptr, nullptr);
cerr << "." << flush;
2012-08-14 08:23:15 +00:00
}
2012-08-14 12:30:18 +00:00
post_process();
if(bg_renderer)
delete bg_renderer;
cerr << endl;
2012-08-14 08:23:15 +00:00
}
2012-08-14 12:30:18 +00:00
void HTMLRenderer::pre_process()
2012-08-14 08:23:15 +00:00
{
2012-08-14 18:28:19 +00:00
// we may output utf8 characters, so use binary
2012-08-15 04:27:41 +00:00
if(param->single_html)
2012-08-14 12:30:18 +00:00
{
// don't use output_file directly
// otherwise it'll be a disaster when tmp_dir == dest_dir
const string tmp_output_fn = param->output_filename + ".part";
html_fout.open(tmp_dir / tmp_output_fn, ofstream::binary);
allcss_fout.open(tmp_dir / CSS_FILENAME, ofstream::binary);
2012-08-15 04:27:41 +00:00
add_tmp_file(tmp_output_fn);
add_tmp_file(CSS_FILENAME);
2012-08-15 04:27:41 +00:00
}
else
{
html_fout.open(dest_dir / param->output_filename, ofstream::binary);
allcss_fout.open(dest_dir / CSS_FILENAME, ofstream::binary);
2012-08-15 04:27:41 +00:00
2012-08-17 10:13:21 +00:00
html_fout << ifstream(PDF2HTMLEX_DATA_PATH / HEAD_HTML_FILENAME, ifstream::binary).rdbuf();
html_fout << "<link rel=\"stylesheet\" type=\"text/css\" href=\"" << CSS_FILENAME << "\"/>" << endl;
2012-08-17 10:13:21 +00:00
html_fout << ifstream(PDF2HTMLEX_DATA_PATH / NECK_HTML_FILENAME, ifstream::binary).rdbuf();
2012-08-14 12:30:18 +00:00
}
2012-08-17 10:13:21 +00:00
allcss_fout << ifstream(PDF2HTMLEX_DATA_PATH / CSS_FILENAME, ifstream::binary).rdbuf();
2012-08-14 08:23:15 +00:00
}
2012-08-14 12:30:18 +00:00
void HTMLRenderer::post_process()
2012-08-14 08:23:15 +00:00
{
2012-08-14 12:30:18 +00:00
if(!param->single_html)
{
2012-08-17 10:13:21 +00:00
html_fout << ifstream(PDF2HTMLEX_DATA_PATH / TAIL_HTML_FILENAME, ifstream::binary).rdbuf();
2012-08-14 12:30:18 +00:00
}
html_fout.close();
allcss_fout.close();
if(param->single_html)
{
process_single_html();
}
2012-08-14 08:23:15 +00:00
}
void HTMLRenderer::startPage(int pageNum, GfxState *state)
{
this->pageNum = pageNum;
this->pageWidth = state->getPageWidth();
this->pageHeight = state->getPageHeight();
assert(line_status == LineStatus::NONE);
2012-08-14 08:23:15 +00:00
2012-08-14 18:54:39 +00:00
html_fout << format("<div id=\"p%|1$x|\" class=\"p\" style=\"width:%2%px;height:%3%px;") % pageNum % pageWidth % pageHeight;
2012-08-14 08:23:15 +00:00
2012-08-14 18:54:39 +00:00
html_fout << "background-image:url(";
2012-08-14 13:23:33 +00:00
const std::string fn = (format("p%|1$x|.png") % pageNum).str();
if(param->single_html)
{
2012-08-14 13:48:57 +00:00
auto path = tmp_dir / fn;
2012-08-15 03:15:33 +00:00
html_fout << "'data:image/png;base64," << base64stream(ifstream(path, ifstream::binary)) << "'";
2012-08-14 13:23:33 +00:00
}
else
{
2012-08-14 18:54:39 +00:00
html_fout << fn;
2012-08-14 13:23:33 +00:00
}
2012-08-14 18:54:39 +00:00
html_fout << format(");background-position:0 0;background-size:%1%px %2%px;background-repeat:no-repeat;\">") % pageWidth % pageHeight;
2012-08-14 08:23:15 +00:00
cur_rise = 0;
2012-08-14 08:23:15 +00:00
2012-08-21 18:37:25 +00:00
draw_scale = 1.0;
cur_fn_id = install_font(nullptr);
cur_font_size = draw_font_size = 0;
cur_fs_id = install_font_size(cur_font_size);
2012-08-14 08:23:15 +00:00
memcpy(cur_ctm, id_matrix, sizeof(cur_ctm));
2012-08-21 18:37:25 +00:00
memcpy(draw_ctm, id_matrix, sizeof(draw_ctm));
cur_tm_id = install_transform_matrix(draw_ctm);
cur_letter_space = cur_word_space = 0;
2012-08-21 18:37:25 +00:00
cur_ls_id = install_letter_space(cur_letter_space);
cur_ws_id = install_word_space(cur_word_space);
cur_color.r = cur_color.g = cur_color.b = 0;
2012-08-21 18:37:25 +00:00
cur_color_id = install_color(&cur_color);
cur_tx = cur_ty = 0;
2012-08-14 08:23:15 +00:00
draw_tx = draw_ty = 0;
reset_state_change();
2012-08-20 22:20:20 +00:00
all_changed = true;
2012-08-14 08:23:15 +00:00
}
void HTMLRenderer::endPage() {
close_line();
2012-08-14 08:23:15 +00:00
// close page
html_fout << "</div>" << endl;
}
2012-08-14 12:30:18 +00:00
void HTMLRenderer::process_single_html()
{
ofstream out (dest_dir / param->output_filename, ofstream::binary);
2012-08-17 10:13:21 +00:00
out << ifstream(PDF2HTMLEX_DATA_PATH / HEAD_HTML_FILENAME , ifstream::binary).rdbuf();
2012-08-14 12:30:18 +00:00
out << "<style type=\"text/css\">" << endl;
out << ifstream(tmp_dir / CSS_FILENAME, ifstream::binary).rdbuf();
2012-08-14 12:30:18 +00:00
out << "</style>" << endl;
2012-08-17 10:13:21 +00:00
out << ifstream(PDF2HTMLEX_DATA_PATH / NECK_HTML_FILENAME, ifstream::binary).rdbuf();
2012-08-14 08:23:15 +00:00
out << ifstream(tmp_dir / (param->output_filename + ".part"), ifstream::binary).rdbuf();
2012-08-14 12:30:18 +00:00
2012-08-17 10:13:21 +00:00
out << ifstream(PDF2HTMLEX_DATA_PATH / TAIL_HTML_FILENAME, ifstream::binary).rdbuf();
2012-08-14 12:30:18 +00:00
}
2012-08-14 08:23:15 +00:00
2012-08-15 04:27:41 +00:00
void HTMLRenderer::add_tmp_file(const string & fn)
{
2012-08-15 07:43:49 +00:00
if(!param->clean_tmp)
return;
2012-08-15 04:27:41 +00:00
if(tmp_files.insert(fn).second && param->debug)
cerr << "Add new temporary file: " << fn << endl;
}
2012-08-14 08:23:15 +00:00
2012-08-15 04:27:41 +00:00
void HTMLRenderer::clean_tmp_files()
{
2012-08-15 07:43:49 +00:00
if(!param->clean_tmp)
return;
2012-08-15 04:27:41 +00:00
for(const auto & fn : tmp_files)
{
try
{
remove(tmp_dir / fn);
if(param->debug)
cerr << "Remove temporary file: " << fn << endl;
}
catch(const filesystem_error &)
{ }
}
try
{
remove(tmp_dir);
if(param->debug)
cerr << "Remove temporary directory: " << tmp_dir << endl;
}
catch(const filesystem_error &)
{ }
}
const std::string HTMLRenderer::HEAD_HTML_FILENAME = "head.html";
const std::string HTMLRenderer::NECK_HTML_FILENAME = "neck.html";
const std::string HTMLRenderer::TAIL_HTML_FILENAME = "tail.html";
const std::string HTMLRenderer::CSS_FILENAME = "all.css";
const std::string HTMLRenderer::NULL_FILENAME = "null";
2012-08-15 07:43:49 +00:00
const std::string HTMLRenderer::FONTFORGE_SCRIPT_FILENAME = "pdf2htmlEX.pe";