diff --git a/ChangeLog b/ChangeLog index 9c4ba08..6309d52 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,6 @@ Developing v0.9 +* Lazy loading of entire pages * Licensed changed - Additional terms for usage in online services - Remove GPLv2 diff --git a/share/manifest b/share/manifest index 0fd9f4c..8b4401d 100644 --- a/share/manifest +++ b/share/manifest @@ -39,11 +39,6 @@ pdf2htmlEX.defaultViewer = new pdf2htmlEX.Viewer({ container_id : 'page-container', sidebar_id : 'sidebar', outline_id : 'outline', - page_urls : [ -""" -$page_urls -""" -] }); """ @@ -54,7 +49,7 @@ $page_urls """ -# The sidbar +# The sidebar # By default this is hidden, pdf2htmlEX.js will add the 'opened' class if it is not empty # You can add a class 'opened' here if you want it always opened or you don't use pdf2htmlEX.js # e.g. diff --git a/share/pdf2htmlEX.js.in b/share/pdf2htmlEX.js.in index e157418..7b5d5c5 100644 --- a/share/pdf2htmlEX.js.in +++ b/share/pdf2htmlEX.js.in @@ -18,6 +18,7 @@ var pdf2htmlEX = (function(){ link : '@CSS_LINK_CN@', __dummy__ : 'no comma' }; + var DEFAULT_PAGES_TO_PRELOAD = 3; var pdf2htmlEX = new Object(); @@ -40,24 +41,36 @@ var pdf2htmlEX = (function(){ var Page = function(page, container) { if(page == undefined) return; + this.loaded = false; this.p = $(page); + + this.container = container; + this.n = parseInt(this.p.attr('data-page-no'), 16); this.b = $('.'+CSS_CLASS_NAMES['page_content_box'], this.p); + this.d = this.p.parents('.'+CSS_CLASS_NAMES['page_decoration']); - /* - * scale ratios - * - * default_r : the first one - * set_r : last set - * cur_r : currently using - */ - this.default_r = this.set_r = this.cur_r = this.p.height() / this.b.height(); + this.h = this.p.height(); // Need to make rescale work when page_content_box is not loaded, yet + this.w = this.p.width(); - this.data = JSON.parse($($('.'+CSS_CLASS_NAMES['page_data'], this.p)[0]).attr('data-data')); + // if page is loaded + if (this.b[0]) { + /* + * scale ratios + * + * default_r : the first one + * set_r : last set + * cur_r : currently using + */ + this.default_r = this.set_r = this.cur_r = this.p.height() / this.b.height(); - this.ctm = this.data.ctm; - this.ictm = invert(this.ctm); - this.container = container; + this.data = JSON.parse($($('.'+CSS_CLASS_NAMES['page_data'], this.p)[0]).attr('data-data')); + + this.ctm = this.data.ctm; + this.ictm = invert(this.ctm); + + this.loaded = true; + } }; $.extend(Page.prototype, { /* hide & show are for contents, the page frame is still there */ @@ -67,6 +80,7 @@ var pdf2htmlEX = (function(){ show : function(){ if(Math.abs(this.set_r - this.cur_r) > EPS) { this.cur_r = this.set_r; + //TODO make it cross-browser compliant this.b.css('transform', 'scale('+this.cur_r.toFixed(3)+')'); } this.b.addClass('opened'); @@ -116,7 +130,8 @@ var pdf2htmlEX = (function(){ this.container_id = config['container_id']; this.sidebar_id = config['sidebar_id']; this.outline_id = config['outline_id']; - this.page_urls = config['page_urls']; + this.pages_to_preload = config['pages_to_preload'] || DEFAULT_PAGES_TO_PRELOAD; + this.pages_loading = {}; this.init_before_loading_content(); var _ = this; @@ -141,7 +156,7 @@ var pdf2htmlEX = (function(){ if(this.outline.children().length > 0) { this.sidebar.addClass('opened'); } - + this.find_pages(); // register schedule rendering @@ -153,14 +168,7 @@ var pdf2htmlEX = (function(){ // handle links this.container.add(this.outline).on('click', '.'+CSS_CLASS_NAMES['link'], this, this.link_handler); - // disable background image draging - $('img', this.container).on('dragstart', function(e){return false;}); - this.render(); - - // load split pages - // has no effect if --split-pages is 0 - this.load_page(0); }, find_pages : function() { var new_pages = new Array(); @@ -172,19 +180,62 @@ var pdf2htmlEX = (function(){ } this.pages = new_pages; }, - load_page : function(idx) { - if(idx < this.page_urls.length){ - var _ = this; + load_page : function(idx, pages_to_preload, successCallback, errorCallback) { + if (idx >= this.pages.length) + return; // Page does not exist + + if (this.pages[idx].loaded) + return; // Page is loaded + + if (this.pages_loading[idx]) + return; // Page is already loading + + var page_no_hex = idx.toString(16); + var $pf = this.container.find('#' + CSS_CLASS_NAMES['page_frame'] + page_no_hex); + if($pf.length == 0) + return; // Page does not exist + + var _ = this; + + var url = $pf.data('page-url'); + if (url && url.length > 0) { + this.pages_loading[idx] = true; // Set semaphore + $.ajax({ - url: this.page_urls[idx], + url: url, dataType: 'text' }).done(function(data){ - $('#'+_.container_id).append(data); - _.find_pages(); - _.schedule_render(); - _.load_page(idx+1); + _.pages[idx].p.parent().replaceWith(data); + + var $new_pf = _.container.find('#' + CSS_CLASS_NAMES['page_frame'] + page_no_hex); + _.pages[idx] = new Page($new_pf, _.container); + _.pages[idx].rescale(_.scale); + _.schedule_render(); + + // disable background image dragging + $new_pf.find('.'+CSS_CLASS_NAMES['background_image']).on('dragstart', function(e){return false;}); + + // Reset loading token + delete _.pages_loading[idx]; + + if (successCallback) successCallback(); + } + ).fail(function(jqXHR, textStatus, errorThrown){ + console.error('error loading page ' + idx + ': ' + textStatus); + + // Reset loading token + delete _.pages_loading[idx]; + + if (errorCallback) errorCallback(); }); } + // Concurrent prefetch of the next pages + if (pages_to_preload === undefined) + pages_to_preload = this.pages_to_preload; + + if (--pages_to_preload > 0) + _.load_page(idx+1, pages_to_preload); + }, pre_hide_pages : function() { /* pages might have not been loaded yet, so add a CSS rule */ @@ -211,7 +262,10 @@ var pdf2htmlEX = (function(){ for(var i in pl) { var p = pl[i]; if(p.is_nearly_visible()){ - p.show(); + if (p.loaded) { + p.show(); + } else + this.load_page(p.n); } else { p.hide(); } @@ -284,7 +338,6 @@ var pdf2htmlEX = (function(){ }, link_handler : function (e) { - console.log('here'); var _ = e.data; var t = $(e.currentTarget); @@ -345,11 +398,26 @@ var pdf2htmlEX = (function(){ } if(ok) { - pos = transform(target_page.ctm, pos); - if(upside_down) { - pos[1] = target_page.height() - pos[1]; + var transform_and_scroll = function() { + pos = transform(target_page.ctm, pos); + if(upside_down) { + pos[1] = target_page.height() - pos[1]; + } + _.scroll_to(detail[0], pos); + } + + if (target_page.loaded) { + transform_and_scroll(); + } else { + // Scroll to the exact position once loaded. + _.load_page(target_page.n, 1, function() { + target_page = _.pages[target_page.n]; // Refresh reference + transform_and_scroll(); + }); + + // In the meantime page gets loaded, scroll approximately position for maximum responsiveness. + _.scroll_to(detail[0], [0,0]); } - _.scroll_to(detail[0], pos); e.preventDefault(); } }, diff --git a/src/HTMLRenderer/HTMLRenderer.h b/src/HTMLRenderer/HTMLRenderer.h index 4634592..4d07f7d 100644 --- a/src/HTMLRenderer/HTMLRenderer.h +++ b/src/HTMLRenderer/HTMLRenderer.h @@ -308,8 +308,6 @@ protected: Preprocessor preprocessor; TmpFiles tmp_files; - // for splitted pages - std::vector page_filenames; // for string formatting StringFormatter str_fmt; @@ -318,6 +316,8 @@ protected: std::ofstream fs; std::string path; } f_outline, f_pages, f_css; + std::ofstream * f_curpage; + std::string cur_page_filename; static const std::string MANIFEST_FILENAME; }; diff --git a/src/HTMLRenderer/draw.cc b/src/HTMLRenderer/draw.cc index 9926916..9bc0ead 100644 --- a/src/HTMLRenderer/draw.cc +++ b/src/HTMLRenderer/draw.cc @@ -370,50 +370,50 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, co } } - f_pages.fs << "
0) f_pages.fs << ' '; + if(i > 0) (*f_curpage) << ' '; double lw = line_width_array[i] * scale; - f_pages.fs << round(lw); - if(is_positive(lw)) f_pages.fs << "px"; + (*f_curpage) << round(lw); + if(is_positive(lw)) (*f_curpage) << "px"; } - f_pages.fs << ";"; + (*f_curpage) << ";"; } else { - f_pages.fs << "border:none;"; + (*f_curpage) << "border:none;"; } if(fill_color) { - f_pages.fs << "background-color:" << (*fill_color) << ";"; + (*f_curpage) << "background-color:" << (*fill_color) << ";"; } else { - f_pages.fs << "background-color:transparent;"; + (*f_curpage) << "background-color:transparent;"; } if(style_function) { - style_function(style_function_data, f_pages.fs); + style_function(style_function_data, (*f_curpage)); } - f_pages.fs << "bottom:" << round(y) << "px;" + (*f_curpage) << "bottom:" << round(y) << "px;" << "left:" << round(x) << "px;" << "width:" << round(w * scale) << "px;" << "height:" << round(h * scale) << "px;"; - f_pages.fs << "\">
"; + (*f_curpage) << "\">"; } diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index d94e732..7918abe 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -112,12 +112,12 @@ void HTMLRenderer::process(PDFDoc *doc) { string filled_template_filename = (char*)str_fmt(param.page_filename.c_str(), i); auto page_fn = str_fmt("%s/%s", param.dest_dir.c_str(), filled_template_filename.c_str()); - f_pages.fs.open((char*)page_fn, ofstream::binary); - if(!f_pages.fs) + f_curpage = new ofstream((char*)page_fn, ofstream::binary); + if(!(*f_curpage)) throw string("Cannot open ") + (char*)page_fn + " for writing"; - set_stream_flags(f_pages.fs); + set_stream_flags((*f_curpage)); - page_filenames.push_back(filled_template_filename); + cur_page_filename = filled_template_filename; } if(param.process_nontext) @@ -139,7 +139,8 @@ void HTMLRenderer::process(PDFDoc *doc) if(param.split_pages) { - f_pages.fs.close(); + delete f_curpage; + f_curpage = nullptr; } } if(page_count >= 0) @@ -180,7 +181,7 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref) long long wid = all_manager.width.install(pageWidth); long long hid = all_manager.height.install(pageHeight); - f_pages.fs + (*f_curpage) << "
"; + /* + * When split_pages is on, f_curpage points to the current page file + * and we want to output empty frames in f_pages.fs + */ + if(param.split_pages) + { + f_pages.fs + << "
" + << "
"; + } + if(param.process_nontext) { - f_pages.fs << "\"\""; + (*f_curpage) << "\"/>"; } reset_state(); @@ -216,7 +237,7 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref) void HTMLRenderer::endPage() { // dump all text - html_text_page.dump_text(f_pages.fs); + html_text_page.dump_text(*f_curpage); html_text_page.dump_css(f_css.fs); html_text_page.clear(); @@ -224,26 +245,31 @@ void HTMLRenderer::endPage() { cur_doc->processLinks(this, pageNum); // close box - f_pages.fs << "
"; + (*f_curpage) << "
"; // dump info for js // TODO: create a function for this // BE CAREFUL WITH ESCAPES - f_pages.fs << "
0) f_pages.fs << ","; - f_pages.fs << round(default_ctm[i]); + if(i > 0) (*f_curpage) << ","; + (*f_curpage) << round(default_ctm[i]); } - f_pages.fs << "]"; + (*f_curpage) << "]"; - f_pages.fs << "}'>
"; + (*f_curpage) << "}'>
"; // close page - f_pages.fs << "" << endl; + (*f_curpage) << "" << endl; + + if(param.split_pages) + { + f_pages.fs << "" << endl; + } } void HTMLRenderer::pre_process(PDFDoc * doc) @@ -324,9 +350,6 @@ void HTMLRenderer::pre_process(PDFDoc * doc) set_stream_flags(f_outline.fs); } - // if split-pages is specified, open & close the file in the process loop - // if not, open the file here: - if(!param.split_pages) { /* * we have to keep the html file for pages into a temporary place @@ -343,6 +366,15 @@ void HTMLRenderer::pre_process(PDFDoc * doc) throw string("Cannot open ") + (char*)fn + " for writing"; set_stream_flags(f_pages.fs); } + + if(param.split_pages) + { + f_curpage = nullptr; + } + else + { + f_curpage = &f_pages.fs; + } } void HTMLRenderer::post_process(void) @@ -436,25 +468,11 @@ void HTMLRenderer::post_process(void) } else if (line == "$pages") { - if(!param.split_pages) - { - ifstream fin(f_pages.path, ifstream::binary); - if(!fin) - throw "Cannot open pages for reading"; - output << fin.rdbuf(); - output.clear(); // output will set fail bit if fin is empty - } - } - else if (line == "$page_urls") - { - for(auto iter = page_filenames.begin(); iter != page_filenames.end(); ++iter) - { - if(iter != page_filenames.begin()) - output << ","; - output << "'"; - outputJSON(output, *iter); - output << "'"; - } + ifstream fin(f_pages.path, ifstream::binary); + if(!fin) + throw "Cannot open pages for reading"; + output << fin.rdbuf(); + output.clear(); // output will set fail bit if fin is empty } else { diff --git a/src/HTMLRenderer/link.cc b/src/HTMLRenderer/link.cc index c185d61..76446a4 100644 --- a/src/HTMLRenderer/link.cc +++ b/src/HTMLRenderer/link.cc @@ -195,17 +195,17 @@ void HTMLRenderer::processLink(AnnotLink * al) if(!dest_str.empty()) { - f_pages.fs << ""; + (*f_curpage) << ">"; } - f_pages.fs << "
getStyle(); switch(style) { case AnnotBorder::borderSolid: - f_pages.fs << "border-style:solid;"; + (*f_curpage) << "border-style:solid;"; break; case AnnotBorder::borderDashed: - f_pages.fs << "border-style:dashed;"; + (*f_curpage) << "border-style:dashed;"; break; case AnnotBorder::borderBeveled: - f_pages.fs << "border-style:outset;"; + (*f_curpage) << "border-style:outset;"; break; case AnnotBorder::borderInset: - f_pages.fs << "border-style:inset;"; + (*f_curpage) << "border-style:inset;"; break; case AnnotBorder::borderUnderlined: - f_pages.fs << "border-style:none;border-bottom-style:solid;"; + (*f_curpage) << "border-style:none;border-bottom-style:solid;"; break; default: cerr << "Warning:Unknown annotation border style: " << style << endl; - f_pages.fs << "border-style:solid;"; + (*f_curpage) << "border-style:solid;"; } @@ -274,36 +274,36 @@ void HTMLRenderer::processLink(AnnotLink * al) r = g = b = 0; } - f_pages.fs << "border-color:rgb(" + (*f_curpage) << "border-color:rgb(" << dec << (int)dblToByte(r) << "," << (int)dblToByte(g) << "," << (int)dblToByte(b) << hex << ");"; } else { - f_pages.fs << "border-style:none;"; + (*f_curpage) << "border-style:none;"; } } else { - f_pages.fs << "border-style:none;"; + (*f_curpage) << "border-style:none;"; } tm_transform(default_ctm, x, y); - f_pages.fs << "position:absolute;" + (*f_curpage) << "position:absolute;" << "left:" << round(x) << "px;" << "bottom:" << round(y) << "px;" << "width:" << round(w) << "px;" << "height:" << round(h) << "px;"; // fix for IE - f_pages.fs << "background-color:rgba(255,255,255,0.000001);"; + (*f_curpage) << "background-color:rgba(255,255,255,0.000001);"; - f_pages.fs << "\">
"; + (*f_curpage) << "\">"; if(dest_str != "") { - f_pages.fs << "
"; + (*f_curpage) << ""; } }