mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 13:00:08 +00:00
working on single-html
This commit is contained in:
parent
ed130cee60
commit
29a2d35202
36
lib/base.css
Normal file
36
lib/base.css
Normal file
@ -0,0 +1,36 @@
|
||||
#pdf-main {
|
||||
font-family: sans-serif;
|
||||
position:absolute;
|
||||
top:0;
|
||||
left:0;
|
||||
bottom:0;
|
||||
right:0;
|
||||
overflow:auto;
|
||||
background-color:grey;
|
||||
/* for Chrome & Safari */
|
||||
-webkit-text-stroke-width:0.2px;
|
||||
}
|
||||
#pdf-main > .p {
|
||||
position:relative;
|
||||
margin:13px auto;
|
||||
background-color:white;
|
||||
overflow:hidden;
|
||||
display:none;
|
||||
}
|
||||
.p > .l {
|
||||
position:absolute;
|
||||
white-space:pre;
|
||||
}
|
||||
.l > .w {
|
||||
display:inline-block;
|
||||
font-family: monospace;
|
||||
}
|
||||
::selection{
|
||||
background: rgba(168,209,255,0.5);
|
||||
}
|
||||
::-moz-selection{
|
||||
background: rgba(168,209,255,0.5);
|
||||
}
|
||||
.p > .i {
|
||||
position:absolute;
|
||||
}
|
@ -5,62 +5,3 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<style type="text/css">
|
||||
#pdf-main {
|
||||
font-family: sans-serif;
|
||||
position:absolute;
|
||||
top:0;
|
||||
left:0;
|
||||
bottom:0;
|
||||
right:0;
|
||||
overflow:auto;
|
||||
background-color:grey;
|
||||
/* for Chrome & Safari */
|
||||
-webkit-text-stroke-width:0.2px;
|
||||
}
|
||||
#pdf-main > .p {
|
||||
position:relative;
|
||||
margin:13px auto;
|
||||
background-color:white;
|
||||
overflow:hidden;
|
||||
display:none;
|
||||
}
|
||||
.p > .l {
|
||||
position:absolute;
|
||||
white-space:pre;
|
||||
}
|
||||
.l > .w {
|
||||
display:inline-block;
|
||||
font-family: monospace;
|
||||
}
|
||||
::selection{
|
||||
background: rgba(168,209,255,0.5);
|
||||
}
|
||||
::-moz-selection{
|
||||
background: rgba(168,209,255,0.5);
|
||||
}
|
||||
.p > .i {
|
||||
position:absolute;
|
||||
}
|
||||
</style>
|
||||
<link rel="stylesheet" type="text/css" href="all.css" />
|
||||
<script type="text/javascript">
|
||||
function show_pages()
|
||||
{
|
||||
var pages = document.getElementById('pdf-main').childNodes;
|
||||
var idx = 0;
|
||||
var f = function(){
|
||||
if (idx < pages.length) {
|
||||
try{
|
||||
pages[idx].style.display='block';
|
||||
}catch(e){}
|
||||
++idx;
|
||||
setTimeout(f,100);
|
||||
}
|
||||
};
|
||||
f();
|
||||
};
|
||||
</script>
|
||||
</head>
|
||||
<body onload="show_pages();">
|
||||
<div id="pdf-main">
|
||||
|
20
lib/neck.html
Normal file
20
lib/neck.html
Normal file
@ -0,0 +1,20 @@
|
||||
<script type="text/javascript">
|
||||
function show_pages()
|
||||
{
|
||||
var pages = document.getElementById('pdf-main').childNodes;
|
||||
var idx = 0;
|
||||
var f = function(){
|
||||
if (idx < pages.length) {
|
||||
try{
|
||||
pages[idx].style.display='block';
|
||||
}catch(e){}
|
||||
++idx;
|
||||
setTimeout(f,100);
|
||||
}
|
||||
};
|
||||
f();
|
||||
};
|
||||
</script>
|
||||
</head>
|
||||
<body onload="show_pages();">
|
||||
<div id="pdf-main">
|
@ -56,8 +56,11 @@ class HTMLRenderer : public OutputDev
|
||||
// Does this device need non-text content?
|
||||
virtual GBool needNonText() { return gFalse; }
|
||||
|
||||
virtual void write_html_head();
|
||||
virtual void write_html_tail();
|
||||
virtual void pre_process();
|
||||
virtual void post_process();
|
||||
virtual void process_single_html();
|
||||
|
||||
virtual boost::filesystem::path working_dir() const { return (param->single_html ? tmp_dir : dest_dir); }
|
||||
|
||||
// Start a page.
|
||||
virtual void startPage(int pageNum, GfxState *state);
|
||||
|
@ -9,10 +9,7 @@
|
||||
* 2012.08.14
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <boost/format.hpp>
|
||||
#include <boost/filesystem/fstream.hpp>
|
||||
#include <iomanip>
|
||||
|
||||
#include <splash/SplashBitmap.h>
|
||||
|
||||
@ -21,15 +18,14 @@
|
||||
#include "config.h"
|
||||
#include "namespace.h"
|
||||
|
||||
using std::flush;
|
||||
|
||||
HTMLRenderer::HTMLRenderer(const Param * param)
|
||||
:line_opened(false)
|
||||
,image_count(0)
|
||||
,param(param)
|
||||
,dest_dir(param->dest_dir)
|
||||
,tmp_dir(TMP_DIR)
|
||||
,html_fout(dest_dir / param->output_filename, ofstream::binary) // we may output utf8 characters, so use binary
|
||||
,allcss_fout(dest_dir / "all.css", ofstream::binary)
|
||||
,fontscript_fout(tmp_dir / "convert.pe", ofstream::binary)
|
||||
{
|
||||
// install default font & size
|
||||
install_font(nullptr);
|
||||
@ -47,57 +43,82 @@ HTMLRenderer::~HTMLRenderer()
|
||||
|
||||
void HTMLRenderer::process(PDFDoc *doc)
|
||||
{
|
||||
cerr << "Processing Text: ";
|
||||
write_html_head();
|
||||
xref = doc->getXRef();
|
||||
for(int i = param->first_page; i <= param->last_page ; ++i)
|
||||
{
|
||||
doc->displayPage(this, i, param->h_dpi, param->v_dpi,
|
||||
0, true, false, false,
|
||||
nullptr, nullptr, nullptr, nullptr);
|
||||
cerr << "Working: ";
|
||||
|
||||
cerr << ".";
|
||||
cerr.flush();
|
||||
}
|
||||
write_html_tail();
|
||||
cerr << endl;
|
||||
xref = doc->getXRef();
|
||||
|
||||
BackgroundRenderer * bg_renderer = nullptr;
|
||||
|
||||
if(param->process_nontext)
|
||||
{
|
||||
// Render non-text objects as image
|
||||
cerr << "Processing Others: ";
|
||||
// copied from poppler
|
||||
SplashColor color;
|
||||
color[0] = color[1] = color[2] = 255;
|
||||
|
||||
auto bg_renderer = new BackgroundRenderer(splashModeRGB8, 4, gFalse, color);
|
||||
bg_renderer = new BackgroundRenderer(splashModeRGB8, 4, gFalse, color);
|
||||
bg_renderer->startDoc(doc);
|
||||
}
|
||||
|
||||
pre_process();
|
||||
for(int i = param->first_page; i <= param->last_page ; ++i)
|
||||
{
|
||||
if(param->process_nontext)
|
||||
{
|
||||
doc->displayPage(bg_renderer, i, param->h_dpi2, param->v_dpi2,
|
||||
0, true, false, false,
|
||||
nullptr, nullptr, nullptr, nullptr);
|
||||
bg_renderer->getBitmap()->writeImgFile(splashFormatPng, (char*)(dest_dir / (format("p%|1$x|.png")%i).str()).c_str(), param->h_dpi2, param->v_dpi2);
|
||||
|
||||
cerr << ".";
|
||||
cerr.flush();
|
||||
bg_renderer->getBitmap()->writeImgFile(splashFormatPng, (char*)(working_dir() / (format("p%|1$x|.png")%i).str()).c_str(), param->h_dpi2, param->v_dpi2);
|
||||
}
|
||||
|
||||
|
||||
doc->displayPage(this, i, param->h_dpi, param->v_dpi,
|
||||
0, true, false, false,
|
||||
nullptr, nullptr, nullptr, nullptr);
|
||||
|
||||
cerr << "." << flush;
|
||||
}
|
||||
post_process();
|
||||
|
||||
if(bg_renderer)
|
||||
delete bg_renderer;
|
||||
|
||||
cerr << endl;
|
||||
}
|
||||
}
|
||||
|
||||
void HTMLRenderer::write_html_head()
|
||||
void HTMLRenderer::pre_process()
|
||||
{
|
||||
html_fout.open(working_dir() / param->output_filename, ofstream::binary); // we may output utf8 characters, so use binary
|
||||
allcss_fout.open(working_dir() / "all.css", ofstream::binary);
|
||||
fontscript_fout.open(tmp_dir / "pdf2htmlEX.pe", ofstream::binary);
|
||||
|
||||
if(!param->single_html)
|
||||
{
|
||||
html_fout << ifstream(PDF2HTMLEX_LIB_PATH / "head.html", ifstream::binary).rdbuf();
|
||||
html_fout << "<link rel=\"stylesheet\" type=\"text/css\" href=\"all.css\"/>" << endl;
|
||||
html_fout << ifstream(PDF2HTMLEX_LIB_PATH / "neck.html", ifstream::binary).rdbuf();
|
||||
}
|
||||
|
||||
void HTMLRenderer::write_html_tail()
|
||||
allcss_fout << ifstream(PDF2HTMLEX_LIB_PATH / "base.css", ifstream::binary).rdbuf();
|
||||
}
|
||||
|
||||
void HTMLRenderer::post_process()
|
||||
{
|
||||
if(!param->single_html)
|
||||
{
|
||||
html_fout << ifstream(PDF2HTMLEX_LIB_PATH / "tail.html", ifstream::binary).rdbuf();
|
||||
}
|
||||
|
||||
html_fout.close();
|
||||
allcss_fout.close();
|
||||
fontscript_fout.close();
|
||||
|
||||
if(param->single_html)
|
||||
{
|
||||
process_single_html();
|
||||
}
|
||||
}
|
||||
|
||||
void HTMLRenderer::startPage(int pageNum, GfxState *state)
|
||||
{
|
||||
this->pageNum = pageNum;
|
||||
@ -133,7 +154,22 @@ void HTMLRenderer::endPage() {
|
||||
html_fout << "</div>" << endl;
|
||||
}
|
||||
|
||||
|
||||
void HTMLRenderer::process_single_html()
|
||||
{
|
||||
ofstream out (dest_dir / param->output_filename, ofstream::binary);
|
||||
|
||||
out << ifstream(PDF2HTMLEX_LIB_PATH / "head.html", ifstream::binary).rdbuf();
|
||||
|
||||
out << "<style type=\"text/css\">" << endl;
|
||||
out << ifstream(tmp_dir / "all.css", ifstream::binary).rdbuf();
|
||||
out << "</style>" << endl;
|
||||
|
||||
out << ifstream(PDF2HTMLEX_LIB_PATH / "neck.html", ifstream::binary).rdbuf();
|
||||
|
||||
out << ifstream(tmp_dir / param->output_filename, ifstream::binary).rdbuf();
|
||||
|
||||
out << ifstream(PDF2HTMLEX_LIB_PATH / "tail.html", ifstream::binary).rdbuf();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -27,6 +27,8 @@ struct Param
|
||||
int process_nontext;
|
||||
|
||||
int debug;
|
||||
|
||||
int single_html;
|
||||
};
|
||||
|
||||
|
||||
|
@ -132,6 +132,7 @@ po::variables_map parse_options (int argc, char **argv)
|
||||
("veps", po::value<double>(¶m.v_eps)->default_value(1.0), "max tolerated vertical offset (in pixels)")
|
||||
("process-nontext", po::value<int>(¶m.process_nontext)->default_value(1), "process nontext objects")
|
||||
("debug", po::value<int>(¶m.debug)->default_value(0), "output debug information")
|
||||
("single-html", po::value<int>(¶m.single_html)->default_value(0), "combine everything into one single HTML file")
|
||||
;
|
||||
|
||||
opt_hidden.add_options()
|
||||
|
Loading…
Reference in New Issue
Block a user