1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-09-28 17:21:29 +00:00

working on single-html

This commit is contained in:
Lu Wang 2012-08-14 20:30:18 +08:00
parent ed130cee60
commit 29a2d35202
7 changed files with 134 additions and 95 deletions

36
lib/base.css Normal file
View File

@ -0,0 +1,36 @@
#pdf-main {
font-family: sans-serif;
position:absolute;
top:0;
left:0;
bottom:0;
right:0;
overflow:auto;
background-color:grey;
/* for Chrome & Safari */
-webkit-text-stroke-width:0.2px;
}
#pdf-main > .p {
position:relative;
margin:13px auto;
background-color:white;
overflow:hidden;
display:none;
}
.p > .l {
position:absolute;
white-space:pre;
}
.l > .w {
display:inline-block;
font-family: monospace;
}
::selection{
background: rgba(168,209,255,0.5);
}
::-moz-selection{
background: rgba(168,209,255,0.5);
}
.p > .i {
position:absolute;
}

View File

@ -5,62 +5,3 @@
<html>
<head>
<meta charset="utf-8">
<style type="text/css">
#pdf-main {
font-family: sans-serif;
position:absolute;
top:0;
left:0;
bottom:0;
right:0;
overflow:auto;
background-color:grey;
/* for Chrome & Safari */
-webkit-text-stroke-width:0.2px;
}
#pdf-main > .p {
position:relative;
margin:13px auto;
background-color:white;
overflow:hidden;
display:none;
}
.p > .l {
position:absolute;
white-space:pre;
}
.l > .w {
display:inline-block;
font-family: monospace;
}
::selection{
background: rgba(168,209,255,0.5);
}
::-moz-selection{
background: rgba(168,209,255,0.5);
}
.p > .i {
position:absolute;
}
</style>
<link rel="stylesheet" type="text/css" href="all.css" />
<script type="text/javascript">
function show_pages()
{
var pages = document.getElementById('pdf-main').childNodes;
var idx = 0;
var f = function(){
if (idx < pages.length) {
try{
pages[idx].style.display='block';
}catch(e){}
++idx;
setTimeout(f,100);
}
};
f();
};
</script>
</head>
<body onload="show_pages();">
<div id="pdf-main">

20
lib/neck.html Normal file
View File

@ -0,0 +1,20 @@
<script type="text/javascript">
function show_pages()
{
var pages = document.getElementById('pdf-main').childNodes;
var idx = 0;
var f = function(){
if (idx < pages.length) {
try{
pages[idx].style.display='block';
}catch(e){}
++idx;
setTimeout(f,100);
}
};
f();
};
</script>
</head>
<body onload="show_pages();">
<div id="pdf-main">

View File

@ -56,8 +56,11 @@ class HTMLRenderer : public OutputDev
// Does this device need non-text content?
virtual GBool needNonText() { return gFalse; }
virtual void write_html_head();
virtual void write_html_tail();
virtual void pre_process();
virtual void post_process();
virtual void process_single_html();
virtual boost::filesystem::path working_dir() const { return (param->single_html ? tmp_dir : dest_dir); }
// Start a page.
virtual void startPage(int pageNum, GfxState *state);

View File

@ -9,10 +9,7 @@
* 2012.08.14
*/
#include <iostream>
#include <boost/format.hpp>
#include <boost/filesystem/fstream.hpp>
#include <iomanip>
#include <splash/SplashBitmap.h>
@ -21,15 +18,14 @@
#include "config.h"
#include "namespace.h"
using std::flush;
HTMLRenderer::HTMLRenderer(const Param * param)
:line_opened(false)
,image_count(0)
,param(param)
,dest_dir(param->dest_dir)
,tmp_dir(TMP_DIR)
,html_fout(dest_dir / param->output_filename, ofstream::binary) // we may output utf8 characters, so use binary
,allcss_fout(dest_dir / "all.css", ofstream::binary)
,fontscript_fout(tmp_dir / "convert.pe", ofstream::binary)
{
// install default font & size
install_font(nullptr);
@ -47,55 +43,80 @@ HTMLRenderer::~HTMLRenderer()
void HTMLRenderer::process(PDFDoc *doc)
{
cerr << "Processing Text: ";
write_html_head();
xref = doc->getXRef();
for(int i = param->first_page; i <= param->last_page ; ++i)
{
doc->displayPage(this, i, param->h_dpi, param->v_dpi,
0, true, false, false,
nullptr, nullptr, nullptr, nullptr);
cerr << "Working: ";
cerr << ".";
cerr.flush();
}
write_html_tail();
cerr << endl;
xref = doc->getXRef();
BackgroundRenderer * bg_renderer = nullptr;
if(param->process_nontext)
{
// Render non-text objects as image
cerr << "Processing Others: ";
// copied from poppler
SplashColor color;
color[0] = color[1] = color[2] = 255;
auto bg_renderer = new BackgroundRenderer(splashModeRGB8, 4, gFalse, color);
bg_renderer = new BackgroundRenderer(splashModeRGB8, 4, gFalse, color);
bg_renderer->startDoc(doc);
}
for(int i = param->first_page; i <= param->last_page ; ++i)
pre_process();
for(int i = param->first_page; i <= param->last_page ; ++i)
{
if(param->process_nontext)
{
doc->displayPage(bg_renderer, i, param->h_dpi2, param->v_dpi2,
0, true, false, false,
nullptr, nullptr, nullptr, nullptr);
bg_renderer->getBitmap()->writeImgFile(splashFormatPng, (char*)(dest_dir / (format("p%|1$x|.png")%i).str()).c_str(), param->h_dpi2, param->v_dpi2);
cerr << ".";
cerr.flush();
bg_renderer->getBitmap()->writeImgFile(splashFormatPng, (char*)(working_dir() / (format("p%|1$x|.png")%i).str()).c_str(), param->h_dpi2, param->v_dpi2);
}
delete bg_renderer;
cerr << endl;
doc->displayPage(this, i, param->h_dpi, param->v_dpi,
0, true, false, false,
nullptr, nullptr, nullptr, nullptr);
cerr << "." << flush;
}
post_process();
if(bg_renderer)
delete bg_renderer;
cerr << endl;
}
void HTMLRenderer::write_html_head()
void HTMLRenderer::pre_process()
{
html_fout << ifstream(PDF2HTMLEX_LIB_PATH / "head.html", ifstream::binary).rdbuf();
html_fout.open(working_dir() / param->output_filename, ofstream::binary); // we may output utf8 characters, so use binary
allcss_fout.open(working_dir() / "all.css", ofstream::binary);
fontscript_fout.open(tmp_dir / "pdf2htmlEX.pe", ofstream::binary);
if(!param->single_html)
{
html_fout << ifstream(PDF2HTMLEX_LIB_PATH / "head.html", ifstream::binary).rdbuf();
html_fout << "<link rel=\"stylesheet\" type=\"text/css\" href=\"all.css\"/>" << endl;
html_fout << ifstream(PDF2HTMLEX_LIB_PATH / "neck.html", ifstream::binary).rdbuf();
}
allcss_fout << ifstream(PDF2HTMLEX_LIB_PATH / "base.css", ifstream::binary).rdbuf();
}
void HTMLRenderer::write_html_tail()
void HTMLRenderer::post_process()
{
html_fout << ifstream(PDF2HTMLEX_LIB_PATH / "tail.html", ifstream::binary).rdbuf();
if(!param->single_html)
{
html_fout << ifstream(PDF2HTMLEX_LIB_PATH / "tail.html", ifstream::binary).rdbuf();
}
html_fout.close();
allcss_fout.close();
fontscript_fout.close();
if(param->single_html)
{
process_single_html();
}
}
void HTMLRenderer::startPage(int pageNum, GfxState *state)
@ -133,7 +154,22 @@ void HTMLRenderer::endPage() {
html_fout << "</div>" << endl;
}
void HTMLRenderer::process_single_html()
{
ofstream out (dest_dir / param->output_filename, ofstream::binary);
out << ifstream(PDF2HTMLEX_LIB_PATH / "head.html", ifstream::binary).rdbuf();
out << "<style type=\"text/css\">" << endl;
out << ifstream(tmp_dir / "all.css", ifstream::binary).rdbuf();
out << "</style>" << endl;
out << ifstream(PDF2HTMLEX_LIB_PATH / "neck.html", ifstream::binary).rdbuf();
out << ifstream(tmp_dir / param->output_filename, ifstream::binary).rdbuf();
out << ifstream(PDF2HTMLEX_LIB_PATH / "tail.html", ifstream::binary).rdbuf();
}

View File

@ -27,6 +27,8 @@ struct Param
int process_nontext;
int debug;
int single_html;
};

View File

@ -132,6 +132,7 @@ po::variables_map parse_options (int argc, char **argv)
("veps", po::value<double>(&param.v_eps)->default_value(1.0), "max tolerated vertical offset (in pixels)")
("process-nontext", po::value<int>(&param.process_nontext)->default_value(1), "process nontext objects")
("debug", po::value<int>(&param.debug)->default_value(0), "output debug information")
("single-html", po::value<int>(&param.single_html)->default_value(0), "combine everything into one single HTML file")
;
opt_hidden.add_options()