2012-08-14 08:23:15 +00:00
|
|
|
/*
|
|
|
|
* general.cc
|
|
|
|
*
|
|
|
|
* Hanlding general stuffs
|
|
|
|
*
|
|
|
|
* by WangLu
|
|
|
|
* 2012.08.14
|
|
|
|
*/
|
|
|
|
|
2012-09-09 17:40:37 +00:00
|
|
|
#include <cstdio>
|
2012-09-12 15:26:14 +00:00
|
|
|
#include <ostream>
|
2012-09-05 10:43:37 +00:00
|
|
|
|
2012-08-14 08:23:15 +00:00
|
|
|
#include <splash/SplashBitmap.h>
|
|
|
|
|
|
|
|
#include "HTMLRenderer.h"
|
|
|
|
#include "BackgroundRenderer.h"
|
2012-08-14 09:13:29 +00:00
|
|
|
#include "namespace.h"
|
2012-09-10 05:08:47 +00:00
|
|
|
#include "ff.h"
|
|
|
|
#include "pdf2htmlEX-config.h"
|
2012-08-14 08:23:15 +00:00
|
|
|
|
2012-09-12 15:26:14 +00:00
|
|
|
namespace pdf2htmlEX {
|
|
|
|
|
2012-08-21 20:22:56 +00:00
|
|
|
using std::fixed;
|
2012-08-14 12:30:18 +00:00
|
|
|
using std::flush;
|
2012-09-12 15:26:14 +00:00
|
|
|
using std::ostream;
|
2012-08-14 12:30:18 +00:00
|
|
|
|
2012-09-11 13:52:46 +00:00
|
|
|
static void dummy(void *, enum ErrorCategory, int pos, char *)
|
2012-09-03 13:59:39 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2012-08-14 08:23:15 +00:00
|
|
|
HTMLRenderer::HTMLRenderer(const Param * param)
|
2012-09-04 15:33:15 +00:00
|
|
|
:line_opened(false)
|
|
|
|
,line_buf(this)
|
2012-08-14 08:23:15 +00:00
|
|
|
,image_count(0)
|
|
|
|
,param(param)
|
2012-08-14 09:50:16 +00:00
|
|
|
,dest_dir(param->dest_dir)
|
2012-08-15 06:27:59 +00:00
|
|
|
,tmp_dir(param->tmp_dir)
|
2012-08-14 08:23:15 +00:00
|
|
|
{
|
2012-09-03 13:59:39 +00:00
|
|
|
//disable error function of poppler
|
|
|
|
setErrorCallback(&dummy, nullptr);
|
|
|
|
|
2012-09-03 12:57:14 +00:00
|
|
|
ff_init();
|
2012-09-03 13:54:48 +00:00
|
|
|
cur_mapping = new int32_t [0x10000];
|
2012-09-03 14:32:56 +00:00
|
|
|
cur_mapping2 = new char* [0x100];
|
2012-08-14 08:23:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
HTMLRenderer::~HTMLRenderer()
|
2012-08-15 04:27:41 +00:00
|
|
|
{
|
2012-09-06 23:55:10 +00:00
|
|
|
ff_fin();
|
2012-08-15 04:27:41 +00:00
|
|
|
clean_tmp_files();
|
2012-09-03 13:54:48 +00:00
|
|
|
delete [] cur_mapping;
|
2012-09-03 14:32:56 +00:00
|
|
|
delete [] cur_mapping2;
|
2012-08-15 04:27:41 +00:00
|
|
|
}
|
2012-08-14 08:23:15 +00:00
|
|
|
|
2012-09-07 00:13:45 +00:00
|
|
|
static GBool annot_cb(Annot *, void *) {
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
|
2012-08-14 08:23:15 +00:00
|
|
|
void HTMLRenderer::process(PDFDoc *doc)
|
|
|
|
{
|
|
|
|
xref = doc->getXRef();
|
|
|
|
|
2012-09-06 16:58:23 +00:00
|
|
|
cerr << "Preprocessing: ";
|
|
|
|
for(int i = param->first_page; i <= param->last_page ; ++i)
|
|
|
|
{
|
|
|
|
doc->displayPage(&font_preprocessor, i, param->h_dpi, param->v_dpi,
|
|
|
|
0, true, false, false,
|
|
|
|
nullptr, nullptr, nullptr, nullptr);
|
|
|
|
cerr << "." << flush;
|
|
|
|
}
|
|
|
|
cerr << endl;
|
2012-08-14 08:23:15 +00:00
|
|
|
|
2012-09-06 16:58:23 +00:00
|
|
|
cerr << "Working: ";
|
|
|
|
BackgroundRenderer * bg_renderer = nullptr;
|
2012-08-14 08:23:15 +00:00
|
|
|
if(param->process_nontext)
|
|
|
|
{
|
|
|
|
// Render non-text objects as image
|
|
|
|
// copied from poppler
|
|
|
|
SplashColor color;
|
|
|
|
color[0] = color[1] = color[2] = 255;
|
|
|
|
|
2012-08-14 12:30:18 +00:00
|
|
|
bg_renderer = new BackgroundRenderer(splashModeRGB8, 4, gFalse, color);
|
2012-08-14 08:23:15 +00:00
|
|
|
bg_renderer->startDoc(doc);
|
2012-08-14 12:30:18 +00:00
|
|
|
}
|
2012-08-14 08:23:15 +00:00
|
|
|
|
2012-08-14 12:30:18 +00:00
|
|
|
pre_process();
|
2012-09-07 00:13:45 +00:00
|
|
|
|
2012-08-14 12:30:18 +00:00
|
|
|
for(int i = param->first_page; i <= param->last_page ; ++i)
|
|
|
|
{
|
2012-09-12 15:26:14 +00:00
|
|
|
if(param->split_pages)
|
|
|
|
{
|
|
|
|
auto page_fn = str_fmt("%s/__pages%x", tmp_dir.c_str(), i);
|
|
|
|
html_fout.open((char*)page_fn, ofstream::binary);
|
|
|
|
add_tmp_file((char*)page_fn);
|
|
|
|
fix_stream(html_fout);
|
|
|
|
}
|
|
|
|
|
2012-08-14 12:30:18 +00:00
|
|
|
if(param->process_nontext)
|
2012-08-14 08:23:15 +00:00
|
|
|
{
|
2012-08-15 13:26:13 +00:00
|
|
|
doc->displayPage(bg_renderer, i, param->h_dpi, param->v_dpi,
|
2012-08-14 08:23:15 +00:00
|
|
|
0, true, false, false,
|
2012-09-07 00:13:45 +00:00
|
|
|
nullptr, nullptr, &annot_cb, nullptr);
|
2012-08-15 04:27:41 +00:00
|
|
|
|
2012-09-07 17:09:09 +00:00
|
|
|
{
|
2012-09-09 16:21:46 +00:00
|
|
|
auto fn = str_fmt("%s/p%llx.png", (param->single_html ? tmp_dir : dest_dir).c_str(), i);
|
2012-09-07 17:09:09 +00:00
|
|
|
if(param->single_html)
|
2012-09-09 16:21:46 +00:00
|
|
|
add_tmp_file((char*)fn);
|
|
|
|
|
|
|
|
bg_renderer->getBitmap()->writeImgFile(splashFormatPng,
|
|
|
|
(char*)fn,
|
|
|
|
param->h_dpi, param->v_dpi);
|
2012-09-07 17:09:09 +00:00
|
|
|
}
|
2012-08-14 08:23:15 +00:00
|
|
|
}
|
2012-08-14 12:30:18 +00:00
|
|
|
|
2012-08-15 13:26:13 +00:00
|
|
|
doc->displayPage(this, i, param->zoom * DEFAULT_DPI, param->zoom * DEFAULT_DPI,
|
2012-08-14 12:30:18 +00:00
|
|
|
0, true, false, false,
|
|
|
|
nullptr, nullptr, nullptr, nullptr);
|
|
|
|
|
2012-09-12 15:26:14 +00:00
|
|
|
if(param->split_pages)
|
|
|
|
{
|
|
|
|
html_fout.close();
|
|
|
|
}
|
|
|
|
|
2012-08-14 12:30:18 +00:00
|
|
|
cerr << "." << flush;
|
2012-08-14 08:23:15 +00:00
|
|
|
}
|
2012-09-12 15:26:14 +00:00
|
|
|
|
2012-08-14 12:30:18 +00:00
|
|
|
post_process();
|
|
|
|
|
|
|
|
if(bg_renderer)
|
|
|
|
delete bg_renderer;
|
|
|
|
|
|
|
|
cerr << endl;
|
2012-08-14 08:23:15 +00:00
|
|
|
}
|
|
|
|
|
2012-08-14 12:30:18 +00:00
|
|
|
void HTMLRenderer::pre_process()
|
2012-08-14 08:23:15 +00:00
|
|
|
{
|
2012-09-12 15:26:14 +00:00
|
|
|
// we may output utf8 characters, so always use binary
|
2012-08-14 12:30:18 +00:00
|
|
|
{
|
2012-09-12 15:26:14 +00:00
|
|
|
/*
|
|
|
|
* If single-html && !split-pages
|
|
|
|
* we have to keep the generated css file into a temporary place
|
|
|
|
* and embed it into the main html later
|
|
|
|
*
|
|
|
|
*
|
|
|
|
* If single-html && split-page
|
|
|
|
* as there's no place to embed the css file, just leave it alone (into dest_dir)
|
|
|
|
*
|
|
|
|
* If !single-html
|
|
|
|
* leave it in dest_dir
|
|
|
|
*/
|
|
|
|
|
|
|
|
auto fn = (param->single_html && (!param->split_pages))
|
|
|
|
? str_fmt("%s/__css", tmp_dir.c_str())
|
|
|
|
: str_fmt("%s/%s", dest_dir.c_str(), param->css_filename.c_str());
|
2012-09-09 16:21:46 +00:00
|
|
|
|
2012-09-12 15:26:14 +00:00
|
|
|
if(param->single_html)
|
|
|
|
add_tmp_file((char*)fn);
|
2012-09-09 16:21:46 +00:00
|
|
|
|
2012-09-12 15:26:14 +00:00
|
|
|
css_path = (char*)fn,
|
|
|
|
css_fout.open(css_path, ofstream::binary);
|
|
|
|
fix_stream(css_fout);
|
2012-08-15 04:27:41 +00:00
|
|
|
}
|
|
|
|
|
2012-09-12 15:26:14 +00:00
|
|
|
// if split-pages is specified, open & close the file in the process loop
|
|
|
|
// if not, open the file here:
|
|
|
|
if(!param->split_pages);
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* If single-html
|
|
|
|
* we have to keep the html file (for page) into a temporary place
|
|
|
|
* because we'll have to embed css before it
|
|
|
|
*
|
|
|
|
* Otherwise just generate it
|
|
|
|
*/
|
|
|
|
auto fn = (param->single_html)
|
|
|
|
? str_fmt("%s/__pages", tmp_dir.c_str())
|
|
|
|
: str_fmt("%s/%s", dest_dir.c_str(), param->output_filename.c_str());
|
2012-08-14 12:30:18 +00:00
|
|
|
|
2012-09-12 15:26:14 +00:00
|
|
|
if(param->single_html)
|
|
|
|
add_tmp_file((char*)fn);
|
2012-08-21 20:22:56 +00:00
|
|
|
|
2012-09-12 15:26:14 +00:00
|
|
|
html_path = (char*)fn;
|
|
|
|
html_fout.open(html_path, ofstream::binary);
|
|
|
|
fix_stream(html_fout);
|
|
|
|
}
|
2012-08-14 08:23:15 +00:00
|
|
|
}
|
|
|
|
|
2012-08-14 12:30:18 +00:00
|
|
|
void HTMLRenderer::post_process()
|
2012-08-14 08:23:15 +00:00
|
|
|
{
|
2012-09-12 15:26:14 +00:00
|
|
|
// close files
|
2012-08-14 12:30:18 +00:00
|
|
|
html_fout.close();
|
2012-09-12 15:26:14 +00:00
|
|
|
css_fout.close();
|
2012-08-14 12:30:18 +00:00
|
|
|
|
2012-09-12 15:26:14 +00:00
|
|
|
//only when !split-page, do we have some work left to do
|
|
|
|
if(!param->split_pages)
|
|
|
|
return;
|
|
|
|
|
|
|
|
ofstream output((char*)str_fmt("%s/%s", dest_dir.c_str(), param->output_filename.c_str()));
|
|
|
|
fix_stream(output);
|
|
|
|
|
|
|
|
// apply manifest
|
|
|
|
ifstream manifest_fin((char*)str_fmt("%s/%s", PDF2HTMLEX_DATA_PATH.c_str(), MANIFEST_FILENAME.c_str()));
|
|
|
|
|
|
|
|
bool embed_string = false;
|
|
|
|
string line;
|
|
|
|
while(getline(manifest_fin, line))
|
2012-08-14 12:30:18 +00:00
|
|
|
{
|
2012-09-12 15:26:14 +00:00
|
|
|
if(embed_string)
|
|
|
|
{
|
|
|
|
output << line << endl;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(line.empty() || line[0] == '#')
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if(line == "\"\"\"")
|
|
|
|
{
|
|
|
|
embed_string = !embed_string;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(line[0] == '@')
|
|
|
|
{
|
|
|
|
embed_file(output, PDF2HTMLEX_DATA_PATH + "/" + line.substr(1), true);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(line[0] == '$')
|
|
|
|
{
|
|
|
|
if(line == "$css")
|
|
|
|
{
|
|
|
|
embed_file(output, css_path, false);
|
|
|
|
}
|
|
|
|
else if (line == "$pages")
|
|
|
|
{
|
|
|
|
output << ifstream(html_path, ifstream::binary).rdbuf();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
cerr << "Warning: unknown line in manifest: " << line << endl;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
cerr << "Warning: unknown line in manifest: " << line << endl;
|
2012-08-14 12:30:18 +00:00
|
|
|
}
|
2012-08-14 08:23:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void HTMLRenderer::startPage(int pageNum, GfxState *state)
|
|
|
|
{
|
|
|
|
this->pageNum = pageNum;
|
|
|
|
this->pageWidth = state->getPageWidth();
|
|
|
|
this->pageHeight = state->getPageHeight();
|
|
|
|
|
2012-09-04 15:33:15 +00:00
|
|
|
assert((!line_opened) && "Open line in startPage detected!");
|
2012-08-14 08:23:15 +00:00
|
|
|
|
2012-09-11 14:08:55 +00:00
|
|
|
html_fout << "<div class=\"b\" style=\"width:" << pageWidth << "px;height:" << pageHeight << "px;\">"
|
|
|
|
<< "<div id=\"p" << pageNum << "\" class=\"p\" style=\"width:" << pageWidth << "px;height:" << pageHeight << "px;";
|
2012-08-14 08:23:15 +00:00
|
|
|
|
2012-08-14 18:54:39 +00:00
|
|
|
html_fout << "background-image:url(";
|
2012-08-14 13:23:33 +00:00
|
|
|
|
|
|
|
{
|
2012-09-07 17:09:09 +00:00
|
|
|
if(param->single_html)
|
|
|
|
{
|
2012-09-12 15:26:14 +00:00
|
|
|
html_fout << "'data:image/png;base64," << base64stream(ifstream((char*)str_fmt("%s/p%llx.png", tmp_dir.c_str(), pageNum) , ifstream::binary)) << "'";
|
2012-09-07 17:09:09 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2012-09-09 16:21:46 +00:00
|
|
|
html_fout << str_fmt("p%llx.png", pageNum);
|
2012-09-07 17:09:09 +00:00
|
|
|
}
|
2012-08-14 13:23:33 +00:00
|
|
|
}
|
|
|
|
|
2012-09-07 16:38:41 +00:00
|
|
|
html_fout << ");background-position:0 0;background-size:" << pageWidth << "px " << pageHeight << "px;background-repeat:no-repeat;\">";
|
2012-08-14 08:23:15 +00:00
|
|
|
|
2012-08-21 18:37:25 +00:00
|
|
|
draw_scale = 1.0;
|
|
|
|
|
2012-08-27 15:09:01 +00:00
|
|
|
cur_font_info = install_font(nullptr);
|
2012-08-21 18:37:25 +00:00
|
|
|
cur_font_size = draw_font_size = 0;
|
|
|
|
cur_fs_id = install_font_size(cur_font_size);
|
2012-08-15 10:48:11 +00:00
|
|
|
|
2012-08-14 08:23:15 +00:00
|
|
|
memcpy(cur_ctm, id_matrix, sizeof(cur_ctm));
|
2012-08-21 18:37:25 +00:00
|
|
|
memcpy(draw_ctm, id_matrix, sizeof(draw_ctm));
|
|
|
|
cur_tm_id = install_transform_matrix(draw_ctm);
|
2012-08-15 10:48:11 +00:00
|
|
|
|
|
|
|
cur_letter_space = cur_word_space = 0;
|
2012-08-21 18:37:25 +00:00
|
|
|
cur_ls_id = install_letter_space(cur_letter_space);
|
|
|
|
cur_ws_id = install_word_space(cur_word_space);
|
2012-08-15 10:48:11 +00:00
|
|
|
|
|
|
|
cur_color.r = cur_color.g = cur_color.b = 0;
|
2012-08-21 18:37:25 +00:00
|
|
|
cur_color_id = install_color(&cur_color);
|
2012-08-15 10:48:11 +00:00
|
|
|
|
2012-08-24 17:40:43 +00:00
|
|
|
cur_rise = 0;
|
|
|
|
cur_rise_id = install_rise(cur_rise);
|
|
|
|
|
2012-08-15 10:48:11 +00:00
|
|
|
cur_tx = cur_ty = 0;
|
2012-08-14 08:23:15 +00:00
|
|
|
draw_tx = draw_ty = 0;
|
|
|
|
|
2012-08-16 12:26:09 +00:00
|
|
|
reset_state_change();
|
2012-08-20 22:20:20 +00:00
|
|
|
all_changed = true;
|
2012-08-14 08:23:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void HTMLRenderer::endPage() {
|
2012-08-16 10:11:22 +00:00
|
|
|
close_line();
|
2012-08-14 08:23:15 +00:00
|
|
|
// close page
|
2012-09-11 14:18:31 +00:00
|
|
|
html_fout << "</div></div>" << endl;
|
2012-08-14 08:23:15 +00:00
|
|
|
}
|
|
|
|
|
2012-09-11 13:52:46 +00:00
|
|
|
void HTMLRenderer::fix_stream (std::ostream & out)
|
|
|
|
{
|
|
|
|
out << fixed << hex;
|
|
|
|
}
|
|
|
|
|
2012-08-15 04:27:41 +00:00
|
|
|
void HTMLRenderer::add_tmp_file(const string & fn)
|
|
|
|
{
|
2012-08-15 07:43:49 +00:00
|
|
|
if(!param->clean_tmp)
|
|
|
|
return;
|
|
|
|
|
2012-08-15 04:27:41 +00:00
|
|
|
if(tmp_files.insert(fn).second && param->debug)
|
|
|
|
cerr << "Add new temporary file: " << fn << endl;
|
|
|
|
}
|
2012-08-14 08:23:15 +00:00
|
|
|
|
2012-08-15 04:27:41 +00:00
|
|
|
void HTMLRenderer::clean_tmp_files()
|
|
|
|
{
|
2012-08-15 07:43:49 +00:00
|
|
|
if(!param->clean_tmp)
|
|
|
|
return;
|
|
|
|
|
2012-09-09 06:48:10 +00:00
|
|
|
for(auto iter = tmp_files.begin(); iter != tmp_files.end(); ++iter)
|
2012-08-15 04:27:41 +00:00
|
|
|
{
|
2012-09-09 17:40:37 +00:00
|
|
|
const string & fn = *iter;
|
|
|
|
remove(fn.c_str());
|
2012-08-15 04:27:41 +00:00
|
|
|
if(param->debug)
|
2012-09-09 17:40:37 +00:00
|
|
|
cerr << "Remove temporary file: " << fn << endl;
|
2012-08-15 04:27:41 +00:00
|
|
|
}
|
2012-09-09 17:40:37 +00:00
|
|
|
|
|
|
|
remove(tmp_dir.c_str());
|
|
|
|
if(param->debug)
|
|
|
|
cerr << "Remove temporary directory: " << tmp_dir << endl;
|
2012-08-15 04:27:41 +00:00
|
|
|
}
|
2012-08-15 07:29:35 +00:00
|
|
|
|
2012-09-12 15:26:14 +00:00
|
|
|
void HTMLRenderer::embed_file(ostream & out, const string & path, bool copy)
|
|
|
|
{
|
|
|
|
string fn = get_filename(path);
|
|
|
|
string suffix = get_suffix(fn);
|
|
|
|
|
|
|
|
auto iter = EMBED_STRING_MAP.find(make_pair(suffix, param->single_html));
|
|
|
|
if(iter == EMBED_STRING_MAP.end())
|
|
|
|
{
|
|
|
|
cerr << "Warning: unknown suffix in manifest: " << suffix << endl;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(param->single_html)
|
|
|
|
{
|
|
|
|
cerr << iter->second.first << endl
|
|
|
|
<< ifstream(path, ifstream::binary).rdbuf()
|
|
|
|
<< iter->second.second << endl;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
cerr << iter->second.first
|
|
|
|
<< fn
|
|
|
|
<< iter->second.second;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const std::string HTMLRenderer::MANIFEST_FILENAME = "manifest";
|
|
|
|
|
|
|
|
}// namespace pdf2htmlEX
|