1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-07-02 16:25:41 +00:00

Merge remote-tracking branch 'origin/lazyload'

This commit is contained in:
Lu Wang 2013-06-13 22:45:39 +08:00
commit b6df9d5763
7 changed files with 200 additions and 118 deletions

View File

@ -1,5 +1,6 @@
Developing v0.9
* Lazy loading of entire pages
* Licensed changed
- Additional terms for usage in online services
- Remove GPLv2

View File

@ -39,11 +39,6 @@ pdf2htmlEX.defaultViewer = new pdf2htmlEX.Viewer({
container_id : 'page-container',
sidebar_id : 'sidebar',
outline_id : 'outline',
page_urls : [
"""
$page_urls
"""
]
});
</script>
"""
@ -54,7 +49,7 @@ $page_urls
<body>
"""
# The sidbar
# The sidebar
# By default this is hidden, pdf2htmlEX.js will add the 'opened' class if it is not empty
# You can add a class 'opened' here if you want it always opened or you don't use pdf2htmlEX.js
# e.g.

View File

@ -18,6 +18,7 @@ var pdf2htmlEX = (function(){
link : '@CSS_LINK_CN@',
__dummy__ : 'no comma'
};
var DEFAULT_PAGES_TO_PRELOAD = 3;
var pdf2htmlEX = new Object();
@ -40,24 +41,36 @@ var pdf2htmlEX = (function(){
var Page = function(page, container) {
if(page == undefined) return;
this.loaded = false;
this.p = $(page);
this.container = container;
this.n = parseInt(this.p.attr('data-page-no'), 16);
this.b = $('.'+CSS_CLASS_NAMES['page_content_box'], this.p);
this.d = this.p.parents('.'+CSS_CLASS_NAMES['page_decoration']);
/*
* scale ratios
*
* default_r : the first one
* set_r : last set
* cur_r : currently using
*/
this.default_r = this.set_r = this.cur_r = this.p.height() / this.b.height();
this.h = this.p.height(); // Need to make rescale work when page_content_box is not loaded, yet
this.w = this.p.width();
this.data = JSON.parse($($('.'+CSS_CLASS_NAMES['page_data'], this.p)[0]).attr('data-data'));
// if page is loaded
if (this.b[0]) {
/*
* scale ratios
*
* default_r : the first one
* set_r : last set
* cur_r : currently using
*/
this.default_r = this.set_r = this.cur_r = this.p.height() / this.b.height();
this.ctm = this.data.ctm;
this.ictm = invert(this.ctm);
this.container = container;
this.data = JSON.parse($($('.'+CSS_CLASS_NAMES['page_data'], this.p)[0]).attr('data-data'));
this.ctm = this.data.ctm;
this.ictm = invert(this.ctm);
this.loaded = true;
}
};
$.extend(Page.prototype, {
/* hide & show are for contents, the page frame is still there */
@ -67,6 +80,7 @@ var pdf2htmlEX = (function(){
show : function(){
if(Math.abs(this.set_r - this.cur_r) > EPS) {
this.cur_r = this.set_r;
//TODO make it cross-browser compliant
this.b.css('transform', 'scale('+this.cur_r.toFixed(3)+')');
}
this.b.addClass('opened');
@ -116,7 +130,8 @@ var pdf2htmlEX = (function(){
this.container_id = config['container_id'];
this.sidebar_id = config['sidebar_id'];
this.outline_id = config['outline_id'];
this.page_urls = config['page_urls'];
this.pages_to_preload = config['pages_to_preload'] || DEFAULT_PAGES_TO_PRELOAD;
this.pages_loading = {};
this.init_before_loading_content();
var _ = this;
@ -141,7 +156,7 @@ var pdf2htmlEX = (function(){
if(this.outline.children().length > 0) {
this.sidebar.addClass('opened');
}
this.find_pages();
// register schedule rendering
@ -153,14 +168,7 @@ var pdf2htmlEX = (function(){
// handle links
this.container.add(this.outline).on('click', '.'+CSS_CLASS_NAMES['link'], this, this.link_handler);
// disable background image draging
$('img', this.container).on('dragstart', function(e){return false;});
this.render();
// load split pages
// has no effect if --split-pages is 0
this.load_page(0);
},
find_pages : function() {
var new_pages = new Array();
@ -172,19 +180,62 @@ var pdf2htmlEX = (function(){
}
this.pages = new_pages;
},
load_page : function(idx) {
if(idx < this.page_urls.length){
var _ = this;
load_page : function(idx, pages_to_preload, successCallback, errorCallback) {
if (idx >= this.pages.length)
return; // Page does not exist
if (this.pages[idx].loaded)
return; // Page is loaded
if (this.pages_loading[idx])
return; // Page is already loading
var page_no_hex = idx.toString(16);
var $pf = this.container.find('#' + CSS_CLASS_NAMES['page_frame'] + page_no_hex);
if($pf.length == 0)
return; // Page does not exist
var _ = this;
var url = $pf.data('page-url');
if (url && url.length > 0) {
this.pages_loading[idx] = true; // Set semaphore
$.ajax({
url: this.page_urls[idx],
url: url,
dataType: 'text'
}).done(function(data){
$('#'+_.container_id).append(data);
_.find_pages();
_.schedule_render();
_.load_page(idx+1);
_.pages[idx].p.parent().replaceWith(data);
var $new_pf = _.container.find('#' + CSS_CLASS_NAMES['page_frame'] + page_no_hex);
_.pages[idx] = new Page($new_pf, _.container);
_.pages[idx].rescale(_.scale);
_.schedule_render();
// disable background image dragging
$new_pf.find('.'+CSS_CLASS_NAMES['background_image']).on('dragstart', function(e){return false;});
// Reset loading token
delete _.pages_loading[idx];
if (successCallback) successCallback();
}
).fail(function(jqXHR, textStatus, errorThrown){
console.error('error loading page ' + idx + ': ' + textStatus);
// Reset loading token
delete _.pages_loading[idx];
if (errorCallback) errorCallback();
});
}
// Concurrent prefetch of the next pages
if (pages_to_preload === undefined)
pages_to_preload = this.pages_to_preload;
if (--pages_to_preload > 0)
_.load_page(idx+1, pages_to_preload);
},
pre_hide_pages : function() {
/* pages might have not been loaded yet, so add a CSS rule */
@ -211,7 +262,10 @@ var pdf2htmlEX = (function(){
for(var i in pl) {
var p = pl[i];
if(p.is_nearly_visible()){
p.show();
if (p.loaded) {
p.show();
} else
this.load_page(p.n);
} else {
p.hide();
}
@ -284,7 +338,6 @@ var pdf2htmlEX = (function(){
},
link_handler : function (e) {
console.log('here');
var _ = e.data;
var t = $(e.currentTarget);
@ -345,11 +398,26 @@ var pdf2htmlEX = (function(){
}
if(ok) {
pos = transform(target_page.ctm, pos);
if(upside_down) {
pos[1] = target_page.height() - pos[1];
var transform_and_scroll = function() {
pos = transform(target_page.ctm, pos);
if(upside_down) {
pos[1] = target_page.height() - pos[1];
}
_.scroll_to(detail[0], pos);
}
if (target_page.loaded) {
transform_and_scroll();
} else {
// Scroll to the exact position once loaded.
_.load_page(target_page.n, 1, function() {
target_page = _.pages[target_page.n]; // Refresh reference
transform_and_scroll();
});
// In the meantime page gets loaded, scroll approximately position for maximum responsiveness.
_.scroll_to(detail[0], [0,0]);
}
_.scroll_to(detail[0], pos);
e.preventDefault();
}
},

View File

@ -308,8 +308,6 @@ protected:
Preprocessor preprocessor;
TmpFiles tmp_files;
// for splitted pages
std::vector<std::string> page_filenames;
// for string formatting
StringFormatter str_fmt;
@ -318,6 +316,8 @@ protected:
std::ofstream fs;
std::string path;
} f_outline, f_pages, f_css;
std::ofstream * f_curpage;
std::string cur_page_filename;
static const std::string MANIFEST_FILENAME;
};

View File

@ -370,50 +370,50 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, co
}
}
f_pages.fs << "<div class=\"" << CSS::CSS_DRAW_CN
(*f_curpage) << "<div class=\"" << CSS::CSS_DRAW_CN
<< ' ' << CSS::TRANSFORM_MATRIX_CN << all_manager.transform_matrix.install(new_tm)
<< "\" style=\"";
if(line_color)
{
f_pages.fs << "border-color:" << *line_color << ";";
(*f_curpage) << "border-color:" << *line_color << ";";
f_pages.fs << "border-width:";
(*f_curpage) << "border-width:";
for(int i = 0; i < line_width_count; ++i)
{
if(i > 0) f_pages.fs << ' ';
if(i > 0) (*f_curpage) << ' ';
double lw = line_width_array[i] * scale;
f_pages.fs << round(lw);
if(is_positive(lw)) f_pages.fs << "px";
(*f_curpage) << round(lw);
if(is_positive(lw)) (*f_curpage) << "px";
}
f_pages.fs << ";";
(*f_curpage) << ";";
}
else
{
f_pages.fs << "border:none;";
(*f_curpage) << "border:none;";
}
if(fill_color)
{
f_pages.fs << "background-color:" << (*fill_color) << ";";
(*f_curpage) << "background-color:" << (*fill_color) << ";";
}
else
{
f_pages.fs << "background-color:transparent;";
(*f_curpage) << "background-color:transparent;";
}
if(style_function)
{
style_function(style_function_data, f_pages.fs);
style_function(style_function_data, (*f_curpage));
}
f_pages.fs << "bottom:" << round(y) << "px;"
(*f_curpage) << "bottom:" << round(y) << "px;"
<< "left:" << round(x) << "px;"
<< "width:" << round(w * scale) << "px;"
<< "height:" << round(h * scale) << "px;";
f_pages.fs << "\"></div>";
(*f_curpage) << "\"></div>";
}

View File

@ -112,12 +112,12 @@ void HTMLRenderer::process(PDFDoc *doc)
{
string filled_template_filename = (char*)str_fmt(param.page_filename.c_str(), i);
auto page_fn = str_fmt("%s/%s", param.dest_dir.c_str(), filled_template_filename.c_str());
f_pages.fs.open((char*)page_fn, ofstream::binary);
if(!f_pages.fs)
f_curpage = new ofstream((char*)page_fn, ofstream::binary);
if(!(*f_curpage))
throw string("Cannot open ") + (char*)page_fn + " for writing";
set_stream_flags(f_pages.fs);
set_stream_flags((*f_curpage));
page_filenames.push_back(filled_template_filename);
cur_page_filename = filled_template_filename;
}
if(param.process_nontext)
@ -139,7 +139,8 @@ void HTMLRenderer::process(PDFDoc *doc)
if(param.split_pages)
{
f_pages.fs.close();
delete f_curpage;
f_curpage = nullptr;
}
}
if(page_count >= 0)
@ -180,7 +181,7 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
long long wid = all_manager.width.install(pageWidth);
long long hid = all_manager.height.install(pageHeight);
f_pages.fs
(*f_curpage)
<< "<div class=\"" << CSS::PAGE_DECORATION_CN
<< " " << CSS::WIDTH_CN << wid
<< " " << CSS::HEIGHT_CN << hid
@ -192,9 +193,29 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
<< " " << CSS::PAGE_CONTENT_BOX_CN << pageNum
<< "\">";
/*
* When split_pages is on, f_curpage points to the current page file
* and we want to output empty frames in f_pages.fs
*/
if(param.split_pages)
{
f_pages.fs
<< "<div class=\"" << CSS::PAGE_DECORATION_CN
<< " " << CSS::WIDTH_CN << wid
<< " " << CSS::HEIGHT_CN << hid
<< "\">"
<< "<div id=\"" << CSS::PAGE_FRAME_CN << pageNum
<< "\" class=\"" << CSS::PAGE_FRAME_CN
<< "\" data-page-no=\"" << pageNum
<< "\" data-page-url=\"";
outputURL(f_pages.fs, cur_page_filename);
f_pages.fs << "\">";
}
if(param.process_nontext)
{
f_pages.fs << "<img class=\"" << CSS::BACKGROUND_IMAGE_CN
(*f_curpage) << "<img class=\"" << CSS::BACKGROUND_IMAGE_CN
<< "\" alt=\"\" src=\"";
if(param.embed_image)
{
@ -202,13 +223,13 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
ifstream fin((char*)path, ifstream::binary);
if(!fin)
throw string("Cannot read background image ") + (char*)path;
f_pages.fs << "data:image/png;base64," << Base64Stream(fin);
(*f_curpage) << "data:image/png;base64," << Base64Stream(fin);
}
else
{
f_pages.fs << (char*)str_fmt("bg%x.png", pageNum);
(*f_curpage) << (char*)str_fmt("bg%x.png", pageNum);
}
f_pages.fs << "\"/>";
(*f_curpage) << "\"/>";
}
reset_state();
@ -216,7 +237,7 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
void HTMLRenderer::endPage() {
// dump all text
html_text_page.dump_text(f_pages.fs);
html_text_page.dump_text(*f_curpage);
html_text_page.dump_css(f_css.fs);
html_text_page.clear();
@ -224,26 +245,31 @@ void HTMLRenderer::endPage() {
cur_doc->processLinks(this, pageNum);
// close box
f_pages.fs << "</div>";
(*f_curpage) << "</div>";
// dump info for js
// TODO: create a function for this
// BE CAREFUL WITH ESCAPES
f_pages.fs << "<div class=\"" << CSS::PAGE_DATA_CN << "\" data-data='{";
(*f_curpage) << "<div class=\"" << CSS::PAGE_DATA_CN << "\" data-data='{";
//default CTM
f_pages.fs << "\"ctm\":[";
(*f_curpage) << "\"ctm\":[";
for(int i = 0; i < 6; ++i)
{
if(i > 0) f_pages.fs << ",";
f_pages.fs << round(default_ctm[i]);
if(i > 0) (*f_curpage) << ",";
(*f_curpage) << round(default_ctm[i]);
}
f_pages.fs << "]";
(*f_curpage) << "]";
f_pages.fs << "}'></div>";
(*f_curpage) << "}'></div>";
// close page
f_pages.fs << "</div></div>" << endl;
(*f_curpage) << "</div></div>" << endl;
if(param.split_pages)
{
f_pages.fs << "</div></div>" << endl;
}
}
void HTMLRenderer::pre_process(PDFDoc * doc)
@ -324,9 +350,6 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
set_stream_flags(f_outline.fs);
}
// if split-pages is specified, open & close the file in the process loop
// if not, open the file here:
if(!param.split_pages)
{
/*
* we have to keep the html file for pages into a temporary place
@ -343,6 +366,15 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
throw string("Cannot open ") + (char*)fn + " for writing";
set_stream_flags(f_pages.fs);
}
if(param.split_pages)
{
f_curpage = nullptr;
}
else
{
f_curpage = &f_pages.fs;
}
}
void HTMLRenderer::post_process(void)
@ -436,25 +468,11 @@ void HTMLRenderer::post_process(void)
}
else if (line == "$pages")
{
if(!param.split_pages)
{
ifstream fin(f_pages.path, ifstream::binary);
if(!fin)
throw "Cannot open pages for reading";
output << fin.rdbuf();
output.clear(); // output will set fail bit if fin is empty
}
}
else if (line == "$page_urls")
{
for(auto iter = page_filenames.begin(); iter != page_filenames.end(); ++iter)
{
if(iter != page_filenames.begin())
output << ",";
output << "'";
outputJSON(output, *iter);
output << "'";
}
ifstream fin(f_pages.path, ifstream::binary);
if(!fin)
throw "Cannot open pages for reading";
output << fin.rdbuf();
output.clear(); // output will set fail bit if fin is empty
}
else
{

View File

@ -195,17 +195,17 @@ void HTMLRenderer::processLink(AnnotLink * al)
if(!dest_str.empty())
{
f_pages.fs << "<a class=\"" << CSS::LINK_CN << "\" href=\"";
outputURL(f_pages.fs, dest_str);
f_pages.fs << "\"";
(*f_curpage) << "<a class=\"" << CSS::LINK_CN << "\" href=\"";
outputURL((*f_curpage), dest_str);
(*f_curpage) << "\"";
if(!dest_detail_str.empty())
f_pages.fs << " data-dest-detail='" << dest_detail_str << "'";
(*f_curpage) << " data-dest-detail='" << dest_detail_str << "'";
f_pages.fs << ">";
(*f_curpage) << ">";
}
f_pages.fs << "<div class=\"" << CSS::CSS_DRAW_CN << ' ' << CSS::TRANSFORM_MATRIX_CN
(*f_curpage) << "<div class=\"" << CSS::CSS_DRAW_CN << ' ' << CSS::TRANSFORM_MATRIX_CN
<< all_manager.transform_matrix.install(default_ctm)
<< "\" style=\"";
@ -232,31 +232,31 @@ void HTMLRenderer::processLink(AnnotLink * al)
border_top_bottom_width, border_left_right_width);
if(abs(border_top_bottom_width - border_left_right_width) < EPS)
f_pages.fs << "border-width:" << round(border_top_bottom_width) << "px;";
(*f_curpage) << "border-width:" << round(border_top_bottom_width) << "px;";
else
f_pages.fs << "border-width:" << round(border_top_bottom_width) << "px " << round(border_left_right_width) << "px;";
(*f_curpage) << "border-width:" << round(border_top_bottom_width) << "px " << round(border_left_right_width) << "px;";
}
auto style = border->getStyle();
switch(style)
{
case AnnotBorder::borderSolid:
f_pages.fs << "border-style:solid;";
(*f_curpage) << "border-style:solid;";
break;
case AnnotBorder::borderDashed:
f_pages.fs << "border-style:dashed;";
(*f_curpage) << "border-style:dashed;";
break;
case AnnotBorder::borderBeveled:
f_pages.fs << "border-style:outset;";
(*f_curpage) << "border-style:outset;";
break;
case AnnotBorder::borderInset:
f_pages.fs << "border-style:inset;";
(*f_curpage) << "border-style:inset;";
break;
case AnnotBorder::borderUnderlined:
f_pages.fs << "border-style:none;border-bottom-style:solid;";
(*f_curpage) << "border-style:none;border-bottom-style:solid;";
break;
default:
cerr << "Warning:Unknown annotation border style: " << style << endl;
f_pages.fs << "border-style:solid;";
(*f_curpage) << "border-style:solid;";
}
@ -274,36 +274,36 @@ void HTMLRenderer::processLink(AnnotLink * al)
r = g = b = 0;
}
f_pages.fs << "border-color:rgb("
(*f_curpage) << "border-color:rgb("
<< dec << (int)dblToByte(r) << "," << (int)dblToByte(g) << "," << (int)dblToByte(b) << hex
<< ");";
}
else
{
f_pages.fs << "border-style:none;";
(*f_curpage) << "border-style:none;";
}
}
else
{
f_pages.fs << "border-style:none;";
(*f_curpage) << "border-style:none;";
}
tm_transform(default_ctm, x, y);
f_pages.fs << "position:absolute;"
(*f_curpage) << "position:absolute;"
<< "left:" << round(x) << "px;"
<< "bottom:" << round(y) << "px;"
<< "width:" << round(w) << "px;"
<< "height:" << round(h) << "px;";
// fix for IE
f_pages.fs << "background-color:rgba(255,255,255,0.000001);";
(*f_curpage) << "background-color:rgba(255,255,255,0.000001);";
f_pages.fs << "\"></div>";
(*f_curpage) << "\"></div>";
if(dest_str != "")
{
f_pages.fs << "</a>";
(*f_curpage) << "</a>";
}
}