From 8c09bb056cbc0362a32ff222a70a5bf06cb39eeb Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Sat, 26 Jan 2013 14:23:28 +0800 Subject: [PATCH 01/45] changelog --- ChangeLog | 9 +++++++-- TODO | 1 - test/test.py | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8a89b88..494c28c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,13 @@ -Latest v0.6 +Latest v0.7 +v0.6 +2013.01.26 + +* new option --no-drm [John Hewson] +* Travis CI integration [John Hewson] * Add a class for 'left' * Fixed a bug of hashing/finding GfxRGB -* new option -v, --version [Thanks to John Hewson] +* new option -v, --version [John Hewson] * Render Type 3 fonts as image * New parameter: --use-cropbox * Progress indicator diff --git a/TODO b/TODO index 9c0e03e..0608877 100644 --- a/TODO +++ b/TODO @@ -1,5 +1,4 @@ word space/offset before the first letter (calendar pdf) -add class for "left" == Future: == diff --git a/test/test.py b/test/test.py index 79a09f8..79ab79d 100755 --- a/test/test.py +++ b/test/test.py @@ -13,7 +13,7 @@ with open('out.html','w') as outf: if not f.lower().endswith('.pdf'): continue print f - if os.system('pdf2htmlEX --dest-dir html --auto-hint=1 --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) != 0: + if os.system('pdf2htmlEX -l 7 --dest-dir html --auto-hint=1 --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) != 0: print "error on ", f sys.exit(-1) From 9efb9fce3fd320ac052cd13eab5be400787b4841 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Sat, 26 Jan 2013 14:25:25 +0800 Subject: [PATCH 02/45] bump version --- CMakeLists.txt | 2 +- pdf2htmlEX.1.in | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3fa6c52..c97b1b1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,7 +7,7 @@ cmake_minimum_required(VERSION 2.6.0 FATAL_ERROR) include_directories(${CMAKE_SOURCE_DIR}/src) -set(PDF2HTMLEX_VERSION "0.6") +set(PDF2HTMLEX_VERSION "0.7") set(ARCHIVE_NAME pdf2htmlex-${PDF2HTMLEX_VERSION}) add_custom_target(dist COMMAND git archive --prefix=${ARCHIVE_NAME}/ HEAD diff --git a/pdf2htmlEX.1.in b/pdf2htmlEX.1.in index 3850590..bbc7e16 100644 --- a/pdf2htmlEX.1.in +++ b/pdf2htmlEX.1.in @@ -1,4 +1,4 @@ -.TH pdf2htmlEX 1 "Aug 31, 2012" "pdf2htmlEX 0.1" +.TH pdf2htmlEX 1 "Aug 31, 2012" "pdf2htmlEX @PDF2HTMLEX_VERSION@" .SH NAME .PP .nf @@ -164,7 +164,7 @@ Convert file.pdf into out/file.html and leave font/image files separated. .SH COPYRIGHT .PP -Copyright 2012 Lu Wang +Copyright 2012,2013 Lu Wang pdf2htmlEX is GPLv2 & GPLv3 dual licensed From 639c7e1495122a4a1f71e0234f59b1ba0dbc6d01 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Mon, 28 Jan 2013 01:51:30 +0800 Subject: [PATCH 03/45] debian --- build_for_ppa.py | 2 +- debian/changelog | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/build_for_ppa.py b/build_for_ppa.py index ac3d4b1..90ac1e7 100755 --- a/build_for_ppa.py +++ b/build_for_ppa.py @@ -5,7 +5,7 @@ Dirty script for building package for PPA by WangLu 2011.01.13 -modified by pdf2htmlEX +modified for pdf2htmlEX 2012.08.28 """ diff --git a/debian/changelog b/debian/changelog index 6228965..78274ae 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,7 +1,12 @@ +pdf2htmlex (0.7-1~git201301261427r2595c-0ubuntu1) quantal; urgency=low + + * New version, see Changelog for changelog + + -- WANG Lu Sat, 26 Jan 2013 14:27:18 +0800 + pdf2htmlex (0.6-1~git201212182148rd76af-0ubuntu1) quantal; urgency=low * fix dependency of poppler for quantal - * -- WANG Lu Tue, 18 Dec 2012 21:48:35 +0800 From 4d94c1d072e6abd68f386d55ffc2317e237bf27e Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Mon, 28 Jan 2013 18:16:01 +0800 Subject: [PATCH 04/45] fix build with git version of poppler --- share/manifest | 7 ++++++- src/HTMLRenderer/general.cc | 10 ++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/share/manifest b/share/manifest index b1d3af7..6be8300 100644 --- a/share/manifest +++ b/share/manifest @@ -21,9 +21,13 @@ """ - +# base CSS styles @base.css + +# PDF specific CSS styles $css + +# necessary Javascript codes @jquery.js @pdf2htmlEX.js @@ -38,6 +42,7 @@ new pdf2htmlEX.Viewer('pdf-main');
""" +# PDF pages $pages """ diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 1aa2d21..7d35f6c 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -13,6 +13,8 @@ #include #include +#include + #include "HTMLRenderer.h" #include "TextLineBuffer.h" #include "pdf2htmlEX-config.h" @@ -35,10 +37,6 @@ using std::abs; using std::cerr; using std::endl; -static void dummy(void *, enum ErrorCategory, int pos, char *) -{ -} - HTMLRenderer::HTMLRenderer(const Param * param) :OutputDev() ,line_opened(false) @@ -49,8 +47,8 @@ HTMLRenderer::HTMLRenderer(const Param * param) { if(!(param->debug)) { - //disable error function of poppler - setErrorCallback(&dummy, nullptr); + //disable error messages of poppler + globalParams->setErrQuiet(gTrue); } ffw_init(param->debug); From 014ef7ecc2435da9723d19ce45a330d3cfddc8e1 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Mon, 28 Jan 2013 18:31:02 +0800 Subject: [PATCH 05/45] expose get_linkdest_str --- src/HTMLRenderer/HTMLRenderer.h | 6 ++++- src/HTMLRenderer/general.cc | 1 + src/HTMLRenderer/link.cc | 39 ++++++++++++++++++--------------- 3 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/HTMLRenderer/HTMLRenderer.h b/src/HTMLRenderer/HTMLRenderer.h index 04b190a..59f17b3 100644 --- a/src/HTMLRenderer/HTMLRenderer.h +++ b/src/HTMLRenderer/HTMLRenderer.h @@ -210,12 +210,14 @@ class HTMLRenderer : public OutputDev void pre_process(PDFDoc * doc); void post_process(); - // set flags void set_stream_flags (std::ostream & out); std::string dump_embedded_font (GfxFont * font, long long fn_id); void embed_font(const std::string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only = false); + // convert a LinkDest to a string that our Javascript code can understand + std::string get_linkdest_str(int & pageno, LinkDest * dest); + //////////////////////////////////////////////////// // manage styles //////////////////////////////////////////////////// @@ -300,6 +302,8 @@ class HTMLRenderer : public OutputDev XRef * xref; PDFDoc * cur_doc; + Catalog * cur_catalog; + double default_ctm[6]; // page info diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 7d35f6c..f588f1b 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -69,6 +69,7 @@ HTMLRenderer::~HTMLRenderer() void HTMLRenderer::process(PDFDoc *doc) { cur_doc = doc; + cur_catalog = doc->getCatalog(); xref = doc->getXRef(); pre_process(doc); diff --git a/src/HTMLRenderer/link.cc b/src/HTMLRenderer/link.cc index fa78b9e..d64d513 100644 --- a/src/HTMLRenderer/link.cc +++ b/src/HTMLRenderer/link.cc @@ -30,8 +30,24 @@ using std::endl; * The detailed rectangle area of the link destination * Will be parsed and performed by Javascript */ -static string get_dest_detail_str(int pageno, LinkDest * dest) +string HTMLRenderer::get_linkdest_str(int & pageno, LinkDest * dest) { + pageno = 0; + if(dest->isPageRef()) + { + auto pageref = dest->getPageRef(); + pageno = cur_catalog->findPage(pageref.num, pageref.gen); + } + else + { + pageno = dest->getPageNum(); + } + + if(pageno <= 0) + { + return ""; + } + ostringstream sout; // dec sout << "[" << pageno; @@ -125,34 +141,21 @@ void HTMLRenderer::processLink(AnnotLink * al) { case actionGoTo: { - auto catalog = cur_doc->getCatalog(); auto * real_action = dynamic_cast(action); LinkDest * dest = nullptr; if(auto _ = real_action->getDest()) dest = _->copy(); else if (auto _ = real_action->getNamedDest()) - dest = catalog->findDest(_); + dest = cur_catalog->findDest(_); if(dest) { int pageno = 0; - if(dest->isPageRef()) - { - auto pageref = dest->getPageRef(); - pageno = catalog->findPage(pageref.num, pageref.gen); - } - else - { - pageno = dest->getPageNum(); - } - + dest_detail_str = get_linkdest_str(pageno, dest); if(pageno > 0) { dest_str = (char*)str_fmt("#p%x", pageno); - dest_detail_str = get_dest_detail_str(pageno, dest); } - delete dest; - } } break; @@ -178,11 +181,11 @@ void HTMLRenderer::processLink(AnnotLink * al) } } - if(dest_str != "") + if(!dest_str.empty()) { html_fout << ""; From d38697d890bf0d6e918fac8c62749c0b1a141330 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Mon, 28 Jan 2013 18:46:44 +0800 Subject: [PATCH 06/45] refactor file objects --- share/manifest | 7 ++++ src/HTMLRenderer/HTMLRenderer.h | 7 +++- src/HTMLRenderer/TextLineBuffer.cc | 2 +- src/HTMLRenderer/draw.cc | 26 ++++++------- src/HTMLRenderer/export.cc | 58 ++++++++++++++-------------- src/HTMLRenderer/general.cc | 62 +++++++++++++++--------------- src/HTMLRenderer/link.cc | 38 +++++++++--------- 7 files changed, 105 insertions(+), 95 deletions(-) diff --git a/share/manifest b/share/manifest index 6be8300..abffd73 100644 --- a/share/manifest +++ b/share/manifest @@ -39,6 +39,13 @@ new pdf2htmlEX.Viewer('pdf-main'); +
+""" + +$outlines + +""" +
""" diff --git a/src/HTMLRenderer/HTMLRenderer.h b/src/HTMLRenderer/HTMLRenderer.h index 59f17b3..09e6ec9 100644 --- a/src/HTMLRenderer/HTMLRenderer.h +++ b/src/HTMLRenderer/HTMLRenderer.h @@ -428,8 +428,11 @@ class HTMLRenderer : public OutputDev std::map left_map; const Param * param; - std::ofstream html_fout, css_fout; - std::string html_path, css_path; + + struct { + std::ofstream fs; + std::string path; + } f_pages, f_css, f_outlines; static const std::string MANIFEST_FILENAME; }; diff --git a/src/HTMLRenderer/TextLineBuffer.cc b/src/HTMLRenderer/TextLineBuffer.cc index 4c17afe..7fefea5 100644 --- a/src/HTMLRenderer/TextLineBuffer.cc +++ b/src/HTMLRenderer/TextLineBuffer.cc @@ -83,7 +83,7 @@ void HTMLRenderer::TextLineBuffer::flush(void) max_ascent = max(max_ascent, s.ascent * s.draw_font_size); } - ostream & out = renderer->html_fout; + ostream & out = renderer->f_pages.fs; out << "
0) html_fout << ' '; + if(i > 0) f_pages.fs << ' '; double lw = line_width_array[i] * scale; - html_fout << round(lw); - if(is_positive(lw)) html_fout << "px"; + f_pages.fs << round(lw); + if(is_positive(lw)) f_pages.fs << "px"; } - html_fout << ";"; + f_pages.fs << ";"; } else { - html_fout << "border:none;"; + f_pages.fs << "border:none;"; } if(fill_color) { - html_fout << "background-color:" << (*fill_color) << ";"; + f_pages.fs << "background-color:" << (*fill_color) << ";"; } else { - html_fout << "background-color:transparent;"; + f_pages.fs << "background-color:transparent;"; } if(style_function) { - style_function(style_function_data, html_fout); + style_function(style_function_data, f_pages.fs); } - html_fout << "bottom:" << round(y) << "px;" + f_pages.fs << "bottom:" << round(y) << "px;" << "left:" << round(x) << "px;" << "width:" << round(w * scale) << "px;" << "height:" << round(h * scale) << "px;"; - html_fout << "\">
"; + f_pages.fs << "\">
"; } diff --git a/src/HTMLRenderer/export.cc b/src/HTMLRenderer/export.cc index ef69417..148f741 100644 --- a/src/HTMLRenderer/export.cc +++ b/src/HTMLRenderer/export.cc @@ -20,7 +20,7 @@ namespace pdf2htmlEX { void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suffix, const string & fontfileformat, GfxFont * font) { - css_fout << "@font-face{" + f_css.fs << "@font-face{" << "font-family:f" << info.id << ";" << "src:url("; @@ -32,15 +32,15 @@ void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suff ifstream fin(path, ifstream::binary); if(!fin) throw "Cannot locate font file: " + path; - css_fout << "'data:font/" + fontfileformat + ";base64," << base64stream(fin) << "'"; + f_css.fs << "'data:font/" + fontfileformat + ";base64," << base64stream(fin) << "'"; } else { - css_fout << (char*)fn; + f_css.fs << (char*)fn; } } - css_fout << ")" + f_css.fs << ")" << "format(\"" << fontfileformat << "\");" << "}" // end of @font-face << ".f" << info.id << "{" @@ -66,45 +66,45 @@ static string general_font_family(GfxFont * font) // TODO: this function is called when some font is unable to process, may use the name there as a hint void HTMLRenderer::export_remote_default_font(long long fn_id) { - css_fout << ".f" << fn_id << "{font-family:sans-serif;visibility:hidden;}" << endl; + f_css.fs << ".f" << fn_id << "{font-family:sans-serif;visibility:hidden;}" << endl; } void HTMLRenderer::export_local_font(const FontInfo & info, GfxFont * font, const string & original_font_name, const string & cssfont) { - css_fout << ".f" << info.id << "{"; - css_fout << "font-family:" << ((cssfont == "") ? (original_font_name + "," + general_font_family(font)) : cssfont) << ";"; + f_css.fs << ".f" << info.id << "{"; + f_css.fs << "font-family:" << ((cssfont == "") ? (original_font_name + "," + general_font_family(font)) : cssfont) << ";"; string fn = original_font_name; for(auto iter = fn.begin(); iter != fn.end(); ++iter) *iter = tolower(*iter); if(font->isBold() || (fn.find("bold") != string::npos)) - css_fout << "font-weight:bold;"; + f_css.fs << "font-weight:bold;"; else - css_fout << "font-weight:normal;"; + f_css.fs << "font-weight:normal;"; if(fn.find("oblique") != string::npos) - css_fout << "font-style:oblique;"; + f_css.fs << "font-style:oblique;"; else if(font->isItalic() || (fn.find("italic") != string::npos)) - css_fout << "font-style:italic;"; + f_css.fs << "font-style:italic;"; else - css_fout << "font-style:normal;"; + f_css.fs << "font-style:normal;"; - css_fout << "line-height:" << round(info.ascent - info.descent) << ";"; + f_css.fs << "line-height:" << round(info.ascent - info.descent) << ";"; - css_fout << "visibility:visible;"; + f_css.fs << "visibility:visible;"; - css_fout << "}" << endl; + f_css.fs << "}" << endl; } void HTMLRenderer::export_font_size (long long fs_id, double font_size) { - css_fout << ".s" << fs_id << "{font-size:" << round(font_size) << "px;}" << endl; + f_css.fs << ".s" << fs_id << "{font-size:" << round(font_size) << "px;}" << endl; } void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm) { - css_fout << ".t" << tm_id << "{"; + f_css.fs << ".t" << tm_id << "{"; // always ignore tm[4] and tm[5] because // we have already shifted the origin @@ -114,7 +114,7 @@ void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm) { auto prefixes = {"", "-ms-", "-moz-", "-webkit-", "-o-"}; for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter) - css_fout << *iter << "transform:none;"; + f_css.fs << *iter << "transform:none;"; } else { @@ -122,53 +122,53 @@ void HTMLRenderer::export_transform_matrix (long long tm_id, const double * tm) for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter) { // PDF use a different coordinate system from Web - css_fout << *iter << "transform:matrix(" + f_css.fs << *iter << "transform:matrix(" << round(tm[0]) << ',' << round(-tm[1]) << ',' << round(-tm[2]) << ',' << round(tm[3]) << ','; - css_fout << "0,0);"; + f_css.fs << "0,0);"; } } - css_fout << "}" << endl; + f_css.fs << "}" << endl; } void HTMLRenderer::export_letter_space (long long ls_id, double letter_space) { - css_fout << ".l" << ls_id << "{letter-spacing:" << round(letter_space) << "px;}" << endl; + f_css.fs << ".l" << ls_id << "{letter-spacing:" << round(letter_space) << "px;}" << endl; } void HTMLRenderer::export_word_space (long long ws_id, double word_space) { - css_fout << ".w" << ws_id << "{word-spacing:" << round(word_space) << "px;}" << endl; + f_css.fs << ".w" << ws_id << "{word-spacing:" << round(word_space) << "px;}" << endl; } void HTMLRenderer::export_color (long long color_id, const GfxRGB * rgb) { - css_fout << ".c" << color_id << "{color:" << (*rgb) << ";}" << endl; + f_css.fs << ".c" << color_id << "{color:" << (*rgb) << ";}" << endl; } void HTMLRenderer::export_whitespace (long long ws_id, double ws_width) { if(ws_width > 0) - css_fout << "._" << ws_id << "{display:inline-block;width:" << round(ws_width) << "px;}" << endl; + f_css.fs << "._" << ws_id << "{display:inline-block;width:" << round(ws_width) << "px;}" << endl; else - css_fout << "._" << ws_id << "{display:inline;margin-left:" << round(ws_width) << "px;}" << endl; + f_css.fs << "._" << ws_id << "{display:inline;margin-left:" << round(ws_width) << "px;}" << endl; } void HTMLRenderer::export_rise (long long rise_id, double rise) { - css_fout << ".r" << rise_id << "{top:" << round(-rise) << "px;}" << endl; + f_css.fs << ".r" << rise_id << "{top:" << round(-rise) << "px;}" << endl; } void HTMLRenderer::export_height (long long height_id, double height) { - css_fout << ".h" << height_id << "{height:" << round(height) << "px;}" << endl; + f_css.fs << ".h" << height_id << "{height:" << round(height) << "px;}" << endl; } void HTMLRenderer::export_left (long long left_id, double left) { - css_fout << ".L" << left_id << "{left:" << round(left) << "px;}" << endl; + f_css.fs << ".L" << left_id << "{left:" << round(left) << "px;}" << endl; } } diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index f588f1b..801345f 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -89,10 +89,10 @@ void HTMLRenderer::process(PDFDoc *doc) if(param->split_pages) { auto page_fn = str_fmt("%s/%s%d.page", param->dest_dir.c_str(), param->output_filename.c_str(), i); - html_fout.open((char*)page_fn, ofstream::binary); - if(!html_fout) + f_pages.fs.open((char*)page_fn, ofstream::binary); + if(!f_pages.fs) throw string("Cannot open ") + (char*)page_fn + " for writing"; - set_stream_flags(html_fout); + set_stream_flags(f_pages.fs); } if(param->process_nontext) @@ -113,7 +113,7 @@ void HTMLRenderer::process(PDFDoc *doc) if(param->split_pages) { - html_fout.close(); + f_pages.fs.close(); } } if(page_count >= 0) @@ -141,7 +141,7 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state) assert((!line_opened) && "Open line in startPage detected!"); - html_fout + f_pages.fs << "
" @@ -150,7 +150,7 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state) if(param->process_nontext) { - html_fout << "background-image:url("; + f_pages.fs << "background-image:url("; { if(param->single_html) @@ -159,18 +159,18 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state) ifstream fin((char*)path, ifstream::binary); if(!fin) throw string("Cannot read background image ") + (char*)path; - html_fout << "'data:image/png;base64," << base64stream(fin) << "'"; + f_pages.fs << "'data:image/png;base64," << base64stream(fin) << "'"; } else { - html_fout << str_fmt("p%x.png", pageNum); + f_pages.fs << str_fmt("p%x.png", pageNum); } } - html_fout << ");background-position:0 0;background-size:" << pageWidth << "px " << pageHeight << "px;background-repeat:no-repeat;"; + f_pages.fs << ");background-position:0 0;background-size:" << pageWidth << "px " << pageHeight << "px;background-repeat:no-repeat;"; } - html_fout << "\">"; + f_pages.fs << "\">"; draw_text_scale = 1.0; cur_font_info = install_font(nullptr); @@ -205,26 +205,26 @@ void HTMLRenderer::endPage() { cur_doc->processLinks(this, pageNum); // close box - html_fout << "
"; + f_pages.fs << "
"; // dump info for js // TODO: create a function for this // BE CAREFUL WITH ESCAPES - html_fout << "
0) html_fout << ","; - html_fout << round(default_ctm[i]); + if(i > 0) f_pages.fs << ","; + f_pages.fs << round(default_ctm[i]); } - html_fout << "]"; + f_pages.fs << "]"; - html_fout << "}'>
"; + f_pages.fs << "}'>"; // close page - html_fout << "" << endl; + f_pages.fs << "" << endl; } void HTMLRenderer::pre_process(PDFDoc * doc) @@ -289,11 +289,11 @@ void HTMLRenderer::pre_process(PDFDoc * doc) if(param->single_html && (!param->split_pages)) tmp_files.add((char*)fn); - css_path = (char*)fn, - css_fout.open(css_path, ofstream::binary); - if(!css_fout) + f_css.path = (char*)fn, + f_css.fs.open(f_css.path, ofstream::binary); + if(!f_css.fs) throw string("Cannot open ") + (char*)fn + " for writing"; - set_stream_flags(css_fout); + set_stream_flags(f_css.fs); } // if split-pages is specified, open & close the file in the process loop @@ -310,21 +310,21 @@ void HTMLRenderer::pre_process(PDFDoc * doc) auto fn = str_fmt("%s/__pages", param->tmp_dir.c_str()); tmp_files.add((char*)fn); - html_path = (char*)fn; - html_fout.open(html_path, ofstream::binary); - if(!html_fout) + f_pages.path = (char*)fn; + f_pages.fs.open(f_pages.path, ofstream::binary); + if(!f_pages.fs) throw string("Cannot open ") + (char*)fn + " for writing"; - set_stream_flags(html_fout); + set_stream_flags(f_pages.fs); } } void HTMLRenderer::post_process() { // close files - html_fout.close(); - css_fout.close(); + f_pages.fs.close(); + f_css.fs.close(); - //only when split-page, do we have some work left to do + //only when split-page == 0, do we have some work left to do if(param->split_pages) return; @@ -372,11 +372,11 @@ void HTMLRenderer::post_process() { if(line == "$css") { - embed_file(output, css_path, ".css", false); + embed_file(output, f_css.path, ".css", false); } else if (line == "$pages") { - ifstream fin(html_path, ifstream::binary); + ifstream fin(f_pages.path, ifstream::binary); if(!fin) throw "Cannot open read the pages"; output << fin.rdbuf(); diff --git a/src/HTMLRenderer/link.cc b/src/HTMLRenderer/link.cc index d64d513..fb76998 100644 --- a/src/HTMLRenderer/link.cc +++ b/src/HTMLRenderer/link.cc @@ -183,15 +183,15 @@ void HTMLRenderer::processLink(AnnotLink * al) if(!dest_str.empty()) { - html_fout << "
"; + f_pages.fs << ">"; } - html_fout << "
getStyle(); switch(style) { case AnnotBorder::borderSolid: - html_fout << "border-style:solid;"; + f_pages.fs << "border-style:solid;"; break; case AnnotBorder::borderDashed: - html_fout << "border-style:dashed;"; + f_pages.fs << "border-style:dashed;"; break; case AnnotBorder::borderBeveled: - html_fout << "border-style:outset;"; + f_pages.fs << "border-style:outset;"; break; case AnnotBorder::borderInset: - html_fout << "border-style:inset;"; + f_pages.fs << "border-style:inset;"; break; case AnnotBorder::borderUnderlined: - html_fout << "border-style:none;border-bottom-style:solid;"; + f_pages.fs << "border-style:none;border-bottom-style:solid;"; break; default: cerr << "Warning:Unknown annotation border style: " << style << endl; - html_fout << "border-style:solid;"; + f_pages.fs << "border-style:solid;"; } @@ -260,36 +260,36 @@ void HTMLRenderer::processLink(AnnotLink * al) r = g = b = 0; } - html_fout << "border-color:rgb(" + f_pages.fs << "border-color:rgb(" << dec << (int)dblToByte(r) << "," << (int)dblToByte(g) << "," << (int)dblToByte(b) << hex << ");"; } else { - html_fout << "border-style:none;"; + f_pages.fs << "border-style:none;"; } } else { - html_fout << "border-style:none;"; + f_pages.fs << "border-style:none;"; } tm_transform(default_ctm, x, y); - html_fout << "position:absolute;" + f_pages.fs << "position:absolute;" << "left:" << round(x) << "px;" << "bottom:" << round(y) << "px;" << "width:" << round(w) << "px;" << "height:" << round(h) << "px;"; // fix for IE - html_fout << "background-color:rgba(255,255,255,0.000001);"; + f_pages.fs << "background-color:rgba(255,255,255,0.000001);"; - html_fout << "\">
"; + f_pages.fs << "\">"; if(dest_str != "") { - html_fout << "
"; + f_pages.fs << ""; } } From b7ffd969e57cf217ee14c1880a1d3a1949ae2e44 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Mon, 28 Jan 2013 19:45:40 +0800 Subject: [PATCH 07/45] fix build with git version of poppler --- src/HTMLRenderer/HTMLRenderer.h | 2 ++ src/HTMLRenderer/general.cc | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/src/HTMLRenderer/HTMLRenderer.h b/src/HTMLRenderer/HTMLRenderer.h index 09e6ec9..4b162c0 100644 --- a/src/HTMLRenderer/HTMLRenderer.h +++ b/src/HTMLRenderer/HTMLRenderer.h @@ -147,7 +147,9 @@ class HTMLRenderer : public OutputDev virtual void setDefaultCTM(double *ctm); // Start a page. + // UGLY: These 2 versions are for different versions of poppler virtual void startPage(int pageNum, GfxState *state); + virtual void startPage(int pageNum, GfxState *state, XRef * xref); // End a page. virtual void endPage(); diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 801345f..932cc1e 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -134,6 +134,11 @@ void HTMLRenderer::setDefaultCTM(double *ctm) } void HTMLRenderer::startPage(int pageNum, GfxState *state) +{ + startPage(pageNum, state, nullptr); +} + +void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref) { this->pageNum = pageNum; this->pageWidth = state->getPageWidth(); From e0c859188e336d541cd0ac0cee4a0246fd241159 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Mon, 28 Jan 2013 20:00:20 +0800 Subject: [PATCH 08/45] new parameter for outline --- share/manifest | 2 +- src/HTMLRenderer/HTMLRenderer.h | 2 +- src/HTMLRenderer/general.cc | 33 +++++++++++++++++++++++++++++++-- src/Param.h | 1 + src/pdf2htmlEX.cc | 20 ++++++++++++++++++-- 5 files changed, 52 insertions(+), 6 deletions(-) diff --git a/share/manifest b/share/manifest index abffd73..962e4fe 100644 --- a/share/manifest +++ b/share/manifest @@ -42,7 +42,7 @@ new pdf2htmlEX.Viewer('pdf-main');
""" -$outlines +$outline """
diff --git a/src/HTMLRenderer/HTMLRenderer.h b/src/HTMLRenderer/HTMLRenderer.h index 4b162c0..087cae9 100644 --- a/src/HTMLRenderer/HTMLRenderer.h +++ b/src/HTMLRenderer/HTMLRenderer.h @@ -434,7 +434,7 @@ class HTMLRenderer : public OutputDev struct { std::ofstream fs; std::string path; - } f_pages, f_css, f_outlines; + } f_outline, f_pages, f_css; static const std::string MANIFEST_FILENAME; }; diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 932cc1e..a81ed09 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -294,20 +294,41 @@ void HTMLRenderer::pre_process(PDFDoc * doc) if(param->single_html && (!param->split_pages)) tmp_files.add((char*)fn); - f_css.path = (char*)fn, + f_css.path = (char*)fn; f_css.fs.open(f_css.path, ofstream::binary); if(!f_css.fs) throw string("Cannot open ") + (char*)fn + " for writing"; set_stream_flags(f_css.fs); } + { + /* + * The logic for outline is similar to css + */ + + auto fn = (param->single_html && (!param->split_pages)) + ? str_fmt("%s/__outline", param->tmp_dir.c_str()) + : str_fmt("%s/%s", param->dest_dir.c_str(), param->outline_filename.c_str()); + + if(param->single_html && (!param->split_pages)) + tmp_files.add((char*)fn); + + f_outline.path = (char*)fn; + f_outline.fs.open(f_outline.path, ofstream::binary); + if(!f_outline.fs) + throw string("Cannot open") + (char*)fn + " for writing"; + + // might not be necessary + set_stream_flags(f_outline.fs); + } + // if split-pages is specified, open & close the file in the process loop // if not, open the file here: if(!param->split_pages) { /* * If single-html - * we have to keep the html file (for page) into a temporary place + * we have to keep the html file for pages into a temporary place * because we'll have to embed css before it * * Otherwise just generate it @@ -326,6 +347,7 @@ void HTMLRenderer::pre_process(PDFDoc * doc) void HTMLRenderer::post_process() { // close files + f_outline.fs.close(); f_pages.fs.close(); f_css.fs.close(); @@ -379,6 +401,13 @@ void HTMLRenderer::post_process() { embed_file(output, f_css.path, ".css", false); } + else if (line == "$outline") + { + ifstream fin(f_outline.path, ifstream::binary); + if(!fin) + throw "Cannot open read the pages"; + output << fin.rdbuf(); + } else if (line == "$pages") { ifstream fin(f_pages.path, ifstream::binary); diff --git a/src/Param.h b/src/Param.h index 2d22396..2a49ddd 100644 --- a/src/Param.h +++ b/src/Param.h @@ -63,6 +63,7 @@ struct Param * Output */ std::string css_filename; + std::string outline_filename; /* * Debug diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 162ffea..b337871 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -101,6 +101,7 @@ void parse_options (int argc, char **argv) .add("font-format", ¶m.font_format, "opentype", "format for extracted font files") .add("external-hint-tool", ¶m.external_hint_tool, "", "external tool for hintting fonts.(overrides --auto-hint)") .add("css-filename", ¶m.css_filename, "", "Specify the file name of the generated css file") + .add("outline-filename", ¶m.outline_filename, "", "Specify the file name of the generated outline file") .add("debug", ¶m.debug, 0, "output debug information") .add("clean-tmp", ¶m.clean_tmp, 1, "clean temporary files after processing") @@ -202,7 +203,7 @@ int main(int argc, char **argv) param.first_page = min(max(param.first_page, 1), doc->getNumPages()); param.last_page = min(max(param.last_page, param.first_page), doc->getNumPages()); - if(param.output_filename == "") + if(param.output_filename.empty()) { const string s = get_filename(param.input_filename); @@ -223,7 +224,7 @@ int main(int argc, char **argv) } } - if(param.css_filename == "") + if(param.css_filename.empty()) { const string s = get_filename(param.input_filename); @@ -237,6 +238,21 @@ int main(int argc, char **argv) param.css_filename = s + ".css"; } } + if(param.outline_filename.empty()) + { + const string s = get_filename(param.input_filename); + + if(get_suffix(param.input_filename) == ".pdf") + { + param.outline_filename = s.substr(0, s.size() - 4) + ".outline"; + } + else + { + if(!param.split_pages) + param.outline_filename = s + ".outline"; + } + + } HTMLRenderer * htmlOut = new HTMLRenderer(¶m); htmlOut->process(doc); From a9bc242c1c669ca0d8348a498042ebb8e0adf62e Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Mon, 28 Jan 2013 21:01:02 +0800 Subject: [PATCH 09/45] process outline --- CMakeLists.txt | 1 + share/base.css | 10 ++++- share/pdf2htmlEX.js | 5 ++- src/HTMLRenderer/HTMLRenderer.h | 7 +++- src/HTMLRenderer/general.cc | 7 ++++ src/HTMLRenderer/link.cc | 34 ++++++++++------ src/HTMLRenderer/outline.cc | 72 +++++++++++++++++++++++++++++++++ src/util/unicode.h | 3 ++ 8 files changed, 122 insertions(+), 17 deletions(-) create mode 100644 src/HTMLRenderer/outline.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index c97b1b1..0a6839d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -154,6 +154,7 @@ add_executable(pdf2htmlEX src/HTMLRenderer/TextLineBuffer.h src/HTMLRenderer/TextLineBuffer.cc src/HTMLRenderer/link.cc + src/HTMLRenderer/outline.cc src/HTMLRenderer/state.cc src/HTMLRenderer/text.cc src/BackgroundRenderer/BackgroundRenderer.h diff --git a/share/base.css b/share/base.css index dec7559..9492e14 100644 --- a/share/base.css +++ b/share/base.css @@ -1,10 +1,18 @@ /* Base CSS */ /* Copyright 2012 Lu Wang */ -#pdf-main { /* PDF container */ +#pdf-outline { /* PDF Outline */ position:absolute; top:0; left:0; bottom:0; + width:200px; + overflow:auto; +} +#pdf-main { /* PDF container */ + position:absolute; + top:0; + left:200px; + bottom:0; right:0; overflow:auto; background-color:grey; diff --git a/share/pdf2htmlEX.js b/share/pdf2htmlEX.js index 59a1ee4..cd850d8 100644 --- a/share/pdf2htmlEX.js +++ b/share/pdf2htmlEX.js @@ -129,7 +129,8 @@ var pdf2htmlEX = (function(){ //this.zoom_fixer(); - this.container.on('click', '.a', this, this.annot_link_handler); + // used by outline/annot_link etc + this.container.on('click', '.a', this, this.link_handler); this.render(); }, @@ -231,7 +232,7 @@ var pdf2htmlEX = (function(){ return this.pages[(new Page(obj.closest('.p')[0])).n]; }, - annot_link_handler : function (e) { + link_handler : function (e) { var _ = e.data; var t = $(e.currentTarget); var cur_page = _.get_containing_page(t); diff --git a/src/HTMLRenderer/HTMLRenderer.h b/src/HTMLRenderer/HTMLRenderer.h index 087cae9..532eedb 100644 --- a/src/HTMLRenderer/HTMLRenderer.h +++ b/src/HTMLRenderer/HTMLRenderer.h @@ -212,13 +212,16 @@ class HTMLRenderer : public OutputDev void pre_process(PDFDoc * doc); void post_process(); + void process_outline(); + void process_outline_items(GooList * items); + void set_stream_flags (std::ostream & out); std::string dump_embedded_font (GfxFont * font, long long fn_id); void embed_font(const std::string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only = false); - // convert a LinkDest to a string that our Javascript code can understand - std::string get_linkdest_str(int & pageno, LinkDest * dest); + // convert a LinkAction to a string that our Javascript code can understand + std::string get_linkaction_str(LinkAction *, std::string & detail); //////////////////////////////////////////////////// // manage styles diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index a81ed09..574e9b5 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -74,6 +74,9 @@ void HTMLRenderer::process(PDFDoc *doc) pre_process(doc); + /////////////////// + // Process pages + BackgroundRenderer * bg_renderer = nullptr; if(param->process_nontext) { @@ -120,6 +123,10 @@ void HTMLRenderer::process(PDFDoc *doc) cerr << "Working: " << page_count << "/" << page_count; cerr << endl; + //////////////////////// + // Process Outline + process_outline(); + post_process(); if(bg_renderer) diff --git a/src/HTMLRenderer/link.cc b/src/HTMLRenderer/link.cc index fb76998..ec35f26 100644 --- a/src/HTMLRenderer/link.cc +++ b/src/HTMLRenderer/link.cc @@ -29,14 +29,16 @@ using std::endl; /* * The detailed rectangle area of the link destination * Will be parsed and performed by Javascript + * The string will be put into a HTML attribute, surrounded by single quotes + * So pay attention to the characters used here */ -string HTMLRenderer::get_linkdest_str(int & pageno, LinkDest * dest) +static string get_linkdest_detail_str(LinkDest * dest, Catalog * catalog, int & pageno) { pageno = 0; if(dest->isPageRef()) { auto pageref = dest->getPageRef(); - pageno = cur_catalog->findPage(pageref.num, pageref.gen); + pageno = catalog->findPage(pageref.num, pageref.gen); } else { @@ -124,16 +126,11 @@ string HTMLRenderer::get_linkdest_str(int & pageno, LinkDest * dest) return sout.str(); } - -/* - * Based on pdftohtml from poppler - * TODO: CSS for link rectangles - * TODO: share rectangle draw with css-draw - */ -void HTMLRenderer::processLink(AnnotLink * al) + +string HTMLRenderer::get_linkaction_str(LinkAction * action, string & detail) { - std::string dest_str, dest_detail_str; - auto action = al->getAction(); + string dest_str; + detail = ""; if(action) { auto kind = action->getKind(); @@ -150,7 +147,7 @@ void HTMLRenderer::processLink(AnnotLink * al) if(dest) { int pageno = 0; - dest_detail_str = get_linkdest_str(pageno, dest); + detail = get_linkdest_detail_str(dest, cur_catalog, pageno); if(pageno > 0) { dest_str = (char*)str_fmt("#p%x", pageno); @@ -181,6 +178,19 @@ void HTMLRenderer::processLink(AnnotLink * al) } } + return dest_str; +} + +/* + * Based on pdftohtml from poppler + * TODO: CSS for link rectangles + * TODO: share rectangle draw with css-draw + */ +void HTMLRenderer::processLink(AnnotLink * al) +{ + string dest_detail_str; + string dest_str = get_linkaction_str(al->getAction(), dest_detail_str); + if(!dest_str.empty()) { f_pages.fs << " + +#include +#include + +#include "HTMLRenderer.h" +#include "util/namespace.h" +#include "util/unicode.h" + +namespace pdf2htmlEX { + +using std::ostream; + +void HTMLRenderer::process_outline_items(GooList * items) +{ + if((!items) || (items->getLength() == 0)) + return; + + f_outline.fs << ""; +} + +void HTMLRenderer::process_outline() +{ + Outline * outline = cur_doc->getOutline(); + if(!outline) + return; + + process_outline_items(outline->getItems()); +} + +}// namespace pdf2htmlEX diff --git a/src/util/unicode.h b/src/util/unicode.h index 9cc9dc6..6b527da 100644 --- a/src/util/unicode.h +++ b/src/util/unicode.h @@ -33,6 +33,9 @@ Unicode unicode_from_font (CharCode code, GfxFont * font); */ Unicode check_unicode(Unicode * u, int len, CharCode code, GfxFont * font); +/* + * Escape necessary characters, and map Unicode to UTF-8 + */ void outputUnicodes(std::ostream & out, const Unicode * u, int uLen); From f3b6c2c889db09008778bf82290e291aac3ba218 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Mon, 28 Jan 2013 21:38:21 +0800 Subject: [PATCH 10/45] tweak css for outline --- share/base.css | 51 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/share/base.css b/share/base.css index 9492e14..1c7767f 100644 --- a/share/base.css +++ b/share/base.css @@ -5,8 +5,33 @@ top:0; left:0; bottom:0; - width:200px; + width:193px; overflow:auto; + margin:0px; + padding:0 0 0 7px; + background-color:#707070; +} +#pdf-outline ul { + margin-left:13px; + margin-right:3px; + padding-left:3px; +} +#pdf-outline li { + list-style-type:disc; + list-style-position:outside; +} +#pdf-outline a { + font-size:13px; + color:#e8e8e8; +} +#pdf-outline a:visited { + color:#e8e8e8; +} +#pdf-outline a:hover{ + color:#e8e8e8; +} +#pdf-outline a:active{ + color:#e8e8e8; } #pdf-main { /* PDF container */ position:absolute; @@ -15,21 +40,21 @@ bottom:0; right:0; overflow:auto; - background-color:grey; + background-color:#808080; /* margin & border-width have to be 0, * otherwise pdf2htmlEX may not calculate the coordinates correctly */ margin:0; border-width:0; } -.d { /* page decoration */ +#pdf-main .d { /* page decoration */ position:relative; margin: 13px auto; border-width: 0; box-shadow: 1px 1px 3px 1px #333; overflow: hidden; } -.p { /* page */ +#pdf-main .p { /* page */ position:absolute; top:0; left:0; @@ -43,7 +68,7 @@ margin:0; border-width:0; } -.b { /* content of a page */ +#pdf-main .b { /* content of a page */ position:absolute; border-width:0; top:0; @@ -58,7 +83,7 @@ -webkit-transform-origin:0% 0%; -o-transform-origin:0% 0%; } -.l { /* text line */ +#pdf-main .l { /* text line */ position:absolute; white-space:pre; font-size:1px; @@ -68,28 +93,28 @@ -webkit-transform-origin:0% 100%; -o-transform-origin:0% 100%; } -span { +#pdf-main span { position:relative; vertical-align: baseline; /* _ for spaces may need display:inline, which will override this */ display:inline-block; } -._ { /* text shift */ +#pdf-main ._ { /* text shift */ color:transparent; z-index:-1; } -::selection{ +#pdf-main ::selection{ background: rgba(127,255,255,1); } -::-moz-selection{ +#pdf-main ::-moz-selection{ background: rgba(127,255,255,1); } -.j { /* info for Javascript */ +#pdf-main .j { /* info for Javascript */ display:none; } -.a { +#pdf-main .a { } -.Cd { /* css drawing */ +#pdf-main .Cd { /* css drawing */ position:absolute; transform-origin:0% 100%; -ms-transform-origin:0% 100%; From 272fb57755cd7f77c8b1c3bbe4c74f6b87b0b4e1 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Mon, 28 Jan 2013 21:56:22 +0800 Subject: [PATCH 11/45] fix file copy for empty input --- src/HTMLRenderer/general.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 574e9b5..a3bfae2 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -414,6 +414,7 @@ void HTMLRenderer::post_process() if(!fin) throw "Cannot open read the pages"; output << fin.rdbuf(); + output.clear(); // output will set fail big if fin is empty } else if (line == "$pages") { @@ -421,6 +422,7 @@ void HTMLRenderer::post_process() if(!fin) throw "Cannot open read the pages"; output << fin.rdbuf(); + output.clear(); // output will set fail big if fin is empty } else { @@ -458,8 +460,9 @@ void HTMLRenderer::embed_file(ostream & out, const string & path, const string & if(!fin) throw string("Cannot open file ") + path + " for embedding"; out << iter->second.first << endl - << fin.rdbuf() - << iter->second.second << endl; + << fin.rdbuf(); + out.clear(); // out will set fail big if fin is empty + out << iter->second.second << endl; } else { @@ -477,6 +480,7 @@ void HTMLRenderer::embed_file(ostream & out, const string & path, const string & if(!out) throw string("Cannot open file ") + path + " for embedding"; out << fin.rdbuf(); + out.clear(); // out will set fail big if fin is empty } } } From f16e346e652a85f3cf10946e3128970eab101094 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Mon, 28 Jan 2013 21:59:06 +0800 Subject: [PATCH 12/45] readme --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index dac1653..c1f5bcd 100644 --- a/README.md +++ b/README.md @@ -43,11 +43,12 @@ Readers can also be benefitted - Color - Transformation * Links +* Outline * [EXPERIMENTAL] Path drawing with CSS - Orthogonal lines - Rectangles - Linear gradients -* Not fully supported, and rendered as images +* Not fully supported (Rendered as images) - Type 3 fonts - Non-text object From 821ae624d284712ccd6a5b01ecc4b1a24fc609db Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Mon, 28 Jan 2013 22:11:42 +0800 Subject: [PATCH 13/45] auto hide sidebar if empty --- share/manifest | 2 +- share/pdf2htmlEX.js | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/share/manifest b/share/manifest index 962e4fe..c21a58e 100644 --- a/share/manifest +++ b/share/manifest @@ -33,7 +33,7 @@ $css """ diff --git a/share/pdf2htmlEX.js b/share/pdf2htmlEX.js index cd850d8..5f9f339 100644 --- a/share/pdf2htmlEX.js +++ b/share/pdf2htmlEX.js @@ -94,8 +94,9 @@ var pdf2htmlEX = (function(){ } }); - pdf2htmlEX.Viewer = function(container_id) { + pdf2htmlEX.Viewer = function(container_id, outline_id) { this.container_id = container_id; + this.outline_id = outline_id; this.init_before_loading_content(); var _ = this; @@ -115,6 +116,17 @@ var pdf2htmlEX = (function(){ init_after_loading_content : function() { this.container = $('#'+this.container_id); + // hide sidebar if there is no outline items + { + // need a better design + // e.g. class for sidebar on/off & selector rule for pdf-main + var sidebar = $('#'+this.outline_id); + if(sidebar.children().length == 0) { + sidebar.hide(); + this.container.offset({left:0}); + } + } + var new_pages = new Array(); var pl= $('.p', this.container); /* don't use for(..in..) */ From c7383400acfd03355e0db70ddf0c10f9c12be693 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Mon, 28 Jan 2013 22:32:12 +0800 Subject: [PATCH 14/45] .. --- debian/changelog | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/debian/changelog b/debian/changelog index 78274ae..17613da 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +pdf2htmlex (0.7-1~git201301282229r2595c-0ubuntu1) quantal; urgency=low + + * Process PDF Outline + + -- WANG Lu Mon, 28 Jan 2013 22:29:35 +0800 + pdf2htmlex (0.7-1~git201301261427r2595c-0ubuntu1) quantal; urgency=low * New version, see Changelog for changelog From f45e650e7d06cdee181fbe6d99dd14f9649c8c01 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Mon, 28 Jan 2013 23:58:00 +0800 Subject: [PATCH 15/45] fix link jump from outline --- share/pdf2htmlEX.js | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/share/pdf2htmlEX.js b/share/pdf2htmlEX.js index 5f9f339..d3edd1a 100644 --- a/share/pdf2htmlEX.js +++ b/share/pdf2htmlEX.js @@ -30,7 +30,7 @@ var pdf2htmlEX = (function(){ ,ctm[1] * pos[0] + ctm[3] * pos[1] + ctm[5]]; }; var Page = function(page, container) { - if(page == undefined) return undefined; + if(page == undefined) return; this.p = $(page); this.n = parseInt(this.p.attr('data-page-no'), 16); @@ -241,18 +241,24 @@ var pdf2htmlEX = (function(){ get_containing_page : function(obj) { /* get the page obj containing obj */ - return this.pages[(new Page(obj.closest('.p')[0])).n]; + var p = obj.closest('.p')[0]; + return p && this.pages[(new Page(p).n]; }, link_handler : function (e) { var _ = e.data; var t = $(e.currentTarget); - var cur_page = _.get_containing_page(t); - if(cur_page == undefined) return; - var cur_pos = cur_page.position(); - //get the coordinates in default user system - cur_pos = transform(cur_page.ictm, [cur_pos[0], cur_page.height()-cur_pos[1]]); + var cur_pos = [0,0]; + + // cur_page might be undefined, e.g. from Outline + var cur_page = _.get_containing_page(t); + if(cur_page != undefined) + { + cur_pos = cur_page.position(); + //get the coordinates in default user system + cur_pos = transform(cur_page.ictm, [cur_pos[0], cur_page.height()-cur_pos[1]]); + } var detail_str = t.attr('data-dest-detail'); if(detail_str == undefined) return; @@ -294,9 +300,6 @@ var pdf2htmlEX = (function(){ upside_down = false; ok = true; break; - pos = [0,0]; - ok = true; - break; default: ok = false; break; From e09a4cd78e36a3b06a22ee12610beb0b16480a30 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 29 Jan 2013 00:38:07 +0800 Subject: [PATCH 16/45] better sidebar on/off class; precise jump from outline --- share/base.css | 9 ++++++++- share/manifest | 2 +- share/pdf2htmlEX.js | 17 +++++++---------- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/share/base.css b/share/base.css index 1c7767f..9f08f31 100644 --- a/share/base.css +++ b/share/base.css @@ -10,6 +10,10 @@ margin:0px; padding:0 0 0 7px; background-color:#707070; + display:none; +} +#pdf-outline.opened { + display:block; } #pdf-outline ul { margin-left:13px; @@ -36,7 +40,7 @@ #pdf-main { /* PDF container */ position:absolute; top:0; - left:200px; + left:0px; bottom:0; right:0; overflow:auto; @@ -47,6 +51,9 @@ margin:0; border-width:0; } +#pdf-outline.opened + #pdf-main { + left:200px; +} #pdf-main .d { /* page decoration */ position:relative; margin: 13px auto; diff --git a/share/manifest b/share/manifest index c21a58e..6afd97d 100644 --- a/share/manifest +++ b/share/manifest @@ -39,7 +39,7 @@ new pdf2htmlEX.Viewer('pdf-main', 'pdf-outline'); -
+
""" $outline diff --git a/share/pdf2htmlEX.js b/share/pdf2htmlEX.js index d3edd1a..84e0eb2 100644 --- a/share/pdf2htmlEX.js +++ b/share/pdf2htmlEX.js @@ -114,17 +114,12 @@ var pdf2htmlEX = (function(){ }, init_after_loading_content : function() { + this.outline = $('#'+this.outline_id); this.container = $('#'+this.container_id); - // hide sidebar if there is no outline items - { - // need a better design - // e.g. class for sidebar on/off & selector rule for pdf-main - var sidebar = $('#'+this.outline_id); - if(sidebar.children().length == 0) { - sidebar.hide(); - this.container.offset({left:0}); - } + // need a better design + if(this.outline.children().length == 0) { + this.outline.toggleClass('opened'); } var new_pages = new Array(); @@ -142,7 +137,9 @@ var pdf2htmlEX = (function(){ //this.zoom_fixer(); // used by outline/annot_link etc + // note that one is for the class 'a' and the other is for the tag 'a' this.container.on('click', '.a', this, this.link_handler); + this.outline.on('click', 'a', this, this.link_handler); this.render(); }, @@ -242,7 +239,7 @@ var pdf2htmlEX = (function(){ get_containing_page : function(obj) { /* get the page obj containing obj */ var p = obj.closest('.p')[0]; - return p && this.pages[(new Page(p).n]; + return p && this.pages[(new Page(p)).n]; }, link_handler : function (e) { From 40343ebb2f32733dbcdbbc457f76d6446e100c0a Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 29 Jan 2013 00:49:11 +0800 Subject: [PATCH 17/45] manpage --- pdf2htmlEX.1.in | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pdf2htmlEX.1.in b/pdf2htmlEX.1.in index bbc7e16..a04a18b 100644 --- a/pdf2htmlEX.1.in +++ b/pdf2htmlEX.1.in @@ -1,4 +1,4 @@ -.TH pdf2htmlEX 1 "Aug 31, 2012" "pdf2htmlEX @PDF2HTMLEX_VERSION@" +.TH pdf2htmlEX 1 "pdf2htmlEX @PDF2HTMLEX_VERSION@" .SH NAME .PP .nf @@ -70,6 +70,10 @@ Whether to process non-text objects (as images) Whether to embed everything into one HTML file. If switched off, there will be several files generated along with the HTML file including files for fonts, css, images. + +Note that the outline will always be embedded into the main HTML file no matter if this switch is on or not. +And only when this switch is off will there be a separate .outline file contains the outline. +You need to modify the manifest if you do not want outline embedded. .TP .B --split-pages <0|1> (Default: 0) If turned on, each page is saved in a separated files, also the generated css file will be store separatedly as if single-html=0 From c00a83723cfd0ea25e0bfae2b91936238fc3a500 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 29 Jan 2013 02:35:34 +0800 Subject: [PATCH 18/45] fix css, don't increase specificity --- TODO | 2 -- share/base.css | 22 +++++++++++----------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/TODO b/TODO index 0608877..9c3771a 100644 --- a/TODO +++ b/TODO @@ -1,5 +1,3 @@ -word space/offset before the first letter (calendar pdf) - == Future: == Too difficult/complicated to implement: diff --git a/share/base.css b/share/base.css index 9f08f31..caa1e61 100644 --- a/share/base.css +++ b/share/base.css @@ -54,14 +54,14 @@ #pdf-outline.opened + #pdf-main { left:200px; } -#pdf-main .d { /* page decoration */ +.d { /* page decoration */ position:relative; margin: 13px auto; border-width: 0; box-shadow: 1px 1px 3px 1px #333; overflow: hidden; } -#pdf-main .p { /* page */ +.p { /* page */ position:absolute; top:0; left:0; @@ -75,7 +75,7 @@ margin:0; border-width:0; } -#pdf-main .b { /* content of a page */ +.b { /* content of a page */ position:absolute; border-width:0; top:0; @@ -90,7 +90,7 @@ -webkit-transform-origin:0% 0%; -o-transform-origin:0% 0%; } -#pdf-main .l { /* text line */ +.l { /* text line */ position:absolute; white-space:pre; font-size:1px; @@ -100,28 +100,28 @@ -webkit-transform-origin:0% 100%; -o-transform-origin:0% 100%; } -#pdf-main span { +span { position:relative; vertical-align: baseline; /* _ for spaces may need display:inline, which will override this */ display:inline-block; } -#pdf-main ._ { /* text shift */ +._ { /* text shift */ color:transparent; z-index:-1; } -#pdf-main ::selection{ +::selection{ background: rgba(127,255,255,1); } -#pdf-main ::-moz-selection{ +::-moz-selection{ background: rgba(127,255,255,1); } -#pdf-main .j { /* info for Javascript */ +.j { /* info for Javascript */ display:none; } -#pdf-main .a { +.a { } -#pdf-main .Cd { /* css drawing */ +.Cd { /* css drawing */ position:absolute; transform-origin:0% 100%; -ms-transform-origin:0% 100%; From f3b31362e1352160e6efa679b63d9c6c40073e2e Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 29 Jan 2013 02:42:39 +0800 Subject: [PATCH 19/45] .. --- share/base.css | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/share/base.css b/share/base.css index caa1e61..d56463c 100644 --- a/share/base.css +++ b/share/base.css @@ -54,6 +54,10 @@ #pdf-outline.opened + #pdf-main { left:200px; } +/* + * The followings are base classes, which are meant to be override by PDF specific classes + * So do not increase the specificity + */ .d { /* page decoration */ position:relative; margin: 13px auto; From ccc4866b2e2c47ef26332786f2e237f1ed7cec0a Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 29 Jan 2013 02:47:51 +0800 Subject: [PATCH 20/45] manpage --- pdf2htmlEX.1.in | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pdf2htmlEX.1.in b/pdf2htmlEX.1.in index a04a18b..baff339 100644 --- a/pdf2htmlEX.1.in +++ b/pdf2htmlEX.1.in @@ -76,9 +76,11 @@ And only when this switch is off will there be a separate .outline file contains You need to modify the manifest if you do not want outline embedded. .TP .B --split-pages <0|1> (Default: 0) -If turned on, each page is saved in a separated files, also the generated css file will be store separatedly as if single-html=0 +If turned on, pages will be stored into separated files named as 0.page, 1.page, ... -The output files will be named as 0.page, 1.page, ... +Also the css and outline will be stored into separated files, and the will be no .html generated. + +This switch is useful if you want pages to be loaded separately & dynamically -- in which case you need to compose the page yourself, and a supporting backend might be necessary. .TP .B --embed-base-font <0|1> (Default: 1) Whether to embed base 14 fonts. From 952a3409f3843aedef558b549cd21b884a9b2c38 Mon Sep 17 00:00:00 2001 From: John Hewson Date: Mon, 28 Jan 2013 22:16:38 +0000 Subject: [PATCH 21/45] cleaned up usage beahviour --- src/pdf2htmlEX.cc | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index b337871..94d41db 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -36,9 +36,7 @@ ArgParser argparser; void show_usage_and_exit(const char * dummy = nullptr) { - cerr << "Usage: pdf2htmlEX [Options] []" << endl; - cerr << endl; - cerr << "Options:" << endl; + cerr << "Usage: pdf2htmlEX [options] []" << endl; argparser.show_usage(cerr); cerr << endl; cerr << "Run 'man pdf2htmlEX' for detailed information" << endl; @@ -141,8 +139,7 @@ int main(int argc, char **argv) parse_options(argc, argv); if (param.input_filename == "") { - cerr << "Missing input filename" << endl; - exit(EXIT_FAILURE); + show_usage_and_exit(); } //prepare the directories From 40e9f8983320b5949e845490e16b8f53ba27443d Mon Sep 17 00:00:00 2001 From: John Hewson Date: Mon, 28 Jan 2013 22:17:57 +0000 Subject: [PATCH 22/45] don't need every program telling people how to use unix --- src/pdf2htmlEX.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 94d41db..18a1d6c 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -38,9 +38,6 @@ void show_usage_and_exit(const char * dummy = nullptr) { cerr << "Usage: pdf2htmlEX [options] []" << endl; argparser.show_usage(cerr); - cerr << endl; - cerr << "Run 'man pdf2htmlEX' for detailed information" << endl; - cerr << endl; exit(EXIT_FAILURE); } From 35d9668e90e60a09d1a46cf77165d9aa301abe7e Mon Sep 17 00:00:00 2001 From: John Hewson Date: Mon, 28 Jan 2013 22:26:07 +0000 Subject: [PATCH 23/45] pdftohtml/pdftocairo/etc style usage defaults --- src/util/ArgParser.cc | 2 +- src/util/ArgParser.h | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/util/ArgParser.cc b/src/util/ArgParser.cc index 04381e6..de3b270 100644 --- a/src/util/ArgParser.cc +++ b/src/util/ArgParser.cc @@ -164,6 +164,6 @@ ArgParser::ArgEntryBase::ArgEntryBase(const char * name, const char * descriptio } } -const int ArgParser::arg_col_width = 40; +const int ArgParser::arg_col_width = 31; } // namespace pdf2htmlEX diff --git a/src/util/ArgParser.h b/src/util/ArgParser.h index 432ec59..24a7db9 100644 --- a/src/util/ArgParser.h +++ b/src/util/ArgParser.h @@ -162,12 +162,6 @@ void ArgParser::ArgEntry::show_usage(std::ostream & out) const if(need_arg) { sout << " "; - if(!dont_show_default) - { - sout << " (="; - dump_value(sout, default_value); - sout << ")"; - } } std::string s = sout.str(); @@ -175,8 +169,17 @@ void ArgParser::ArgEntry::show_usage(std::ostream & out) const for(int i = s.size(); i < arg_col_width; ++i) out << ' '; - - out << " " << description << std::endl; + + out << " " << description; + + if(need_arg && !dont_show_default) + { + out << " (default is "; + dump_value(out, default_value); + out << ")"; + } + + out << std::endl; } } // namespace ArgParser From 81ee37e4ae0274a1e0ec2a18d3802e4659453461 Mon Sep 17 00:00:00 2001 From: John Hewson Date: Mon, 28 Jan 2013 22:30:04 +0000 Subject: [PATCH 24/45] show argument types, like pdftohtml/pdftocairo/etc --- src/util/ArgParser.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/util/ArgParser.h b/src/util/ArgParser.h index 24a7db9..d89dc35 100644 --- a/src/util/ArgParser.h +++ b/src/util/ArgParser.h @@ -39,6 +39,23 @@ void dump_value(std::ostream & out, const T & v) extern void dump_value(std::ostream & out, const std::string & v); +// type names helper +template +struct type_name { + static char const* value() { return "unknown"; } +}; + +template<> struct type_name { + static char const* value() { return "int"; } +}; + +template<> struct type_name { + static char const* value() { return "fp"; } +}; + +template<> struct type_name { + static char const* value() { return "string"; } +}; class ArgParser { @@ -161,7 +178,7 @@ void ArgParser::ArgEntry::show_usage(std::ostream & out) const if(need_arg) { - sout << " "; + sout << " <" << type_name::value() << ">"; } std::string s = sout.str(); From d250ccf829a429f49bc6a70d31d2374011903d2c Mon Sep 17 00:00:00 2001 From: John Hewson Date: Mon, 28 Jan 2013 22:45:12 +0000 Subject: [PATCH 25/45] cleaned-up usage descriptions --- src/pdf2htmlEX.cc | 60 +++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 18a1d6c..602a164 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -54,8 +54,8 @@ void show_version_and_exit(const char * dummy = nullptr) void parse_options (int argc, char **argv) { argparser - .add("help,h", "show all options", &show_usage_and_exit) - .add("version,v", "show copyright and version info", &show_version_and_exit) + .add("help,h", "print usage information", &show_usage_and_exit) + .add("version,v", "print copyright and version info", &show_version_and_exit) .add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", nullptr, true) .add("user-password,u", ¶m.user_password, "", "user password (for encrypted files)", nullptr, true) @@ -64,43 +64,43 @@ void parse_options (int argc, char **argv) .add("dest-dir", ¶m.dest_dir, ".", "specify destination directory") .add("data-dir", ¶m.data_dir, PDF2HTMLEX_DATA_PATH, "specify data directory") - .add("first-page,f", ¶m.first_page, 1, "first page to process") - .add("last-page,l", ¶m.last_page, numeric_limits::max(), "last page to process") + .add("first-page,f", ¶m.first_page, 1, "first page to convert") + .add("last-page,l", ¶m.last_page, numeric_limits::max(), "last page to convert") .add("zoom", ¶m.zoom, 0, "zoom ratio", nullptr, true) - .add("fit-width", ¶m.fit_width, 0, "fit width to pixels", nullptr, true) - .add("fit-height", ¶m.fit_height, 0, "fit height to pixels", nullptr, true) - .add("hdpi", ¶m.h_dpi, 144.0, "horizontal DPI for non-text") - .add("vdpi", ¶m.v_dpi, 144.0, "vertical DPI for non-text") + .add("fit-width", ¶m.fit_width, 0, "fit width to pixels", nullptr, true) + .add("fit-height", ¶m.fit_height, 0, "fit height to pixels", nullptr, true) + .add("hdpi", ¶m.h_dpi, 144.0, "horizontal resolution for graphics in DPI") + .add("vdpi", ¶m.v_dpi, 144.0, "vertical resolution for graphics in DPI") .add("use-cropbox", ¶m.use_cropbox, 0, "use CropBox instead of MediaBox") - .add("process-nontext", ¶m.process_nontext, 1, "process nontext objects") - .add("single-html", ¶m.single_html, 1, "combine everything into one single HTML file") - .add("split-pages", ¶m.split_pages, 0, "split pages into separated files") - .add("embed-base-font", ¶m.embed_base_font, 0, "embed local matched font for base 14 fonts in the PDF file") - .add("embed-external-font", ¶m.embed_external_font, 0, "embed local matched font for external fonts in the PDF file") - .add("decompose-ligature", ¶m.decompose_ligature, 0, "decompose ligatures, for example 'fi' -> 'f''i'") + .add("process-nontext", ¶m.process_nontext, 1, "render graphics in addition to text") + .add("single-html", ¶m.single_html, 1, "generate a single HTML file") + .add("split-pages", ¶m.split_pages, 0, "split pages into separate files") + .add("embed-base-font", ¶m.embed_base_font, 0, "embed local match for standard 14 fonts") + .add("embed-external-font", ¶m.embed_external_font, 0, "embed local match for external fonts") + .add("decompose-ligature", ¶m.decompose_ligature, 0, "decompose ligatures, such as \uFB01 -> fi") - .add("heps", ¶m.h_eps, 1.0, "max tolerated horizontal offset (in pixels)") - .add("veps", ¶m.v_eps, 1.0, "max tolerated vertical offset (in pixels)") - .add("space-threshold", ¶m.space_threshold, (1.0/8), "distance no thiner than (threshold * em) will be considered as a space character") - .add("font-size-multiplier", ¶m.font_size_multiplier, 4.0, "setting a value greater than 1 would increase the rendering accuracy") - .add("auto-hint", ¶m.auto_hint, 0, "Whether to generate hints for fonts") - .add("tounicode", ¶m.tounicode, 0, "Specify how to deal with ToUnicode map, 0 for auto, 1 for forced, -1 for disabled") + .add("heps", ¶m.h_eps, 1.0, "horizontal threshold for merging text, in pixels") + .add("veps", ¶m.v_eps, 1.0, "vertical threshold for merging text, in pixels") + .add("space-threshold", ¶m.space_threshold, (1.0/8), "word break threshold (threshold * em)") + .add("font-size-multiplier", ¶m.font_size_multiplier, 4.0, "a value greater than 1 increases the rendering accuracy") + .add("auto-hint", ¶m.auto_hint, 0, "use fontforge autohint on fonts without hints") + .add("tounicode", ¶m.tounicode, 0, "how to handle ToUnicode CMaps (0=auto, 1=force, -1=ignore)") .add("space-as-offset", ¶m.space_as_offset, 0, "treat space characters as offsets") - .add("stretch-narrow-glyph", ¶m.stretch_narrow_glyph, 0, "stretch narrow glyphs instead of padding space") - .add("squeeze-wide-glyph", ¶m.squeeze_wide_glyph, 1, "squeeze wide glyphs instead of truncating") + .add("stretch-narrow-glyph", ¶m.stretch_narrow_glyph, 0, "stretch narrow glyphs instead of padding them") + .add("squeeze-wide-glyph", ¶m.squeeze_wide_glyph, 1, "shrink wide glyphs instead of truncating them") .add("remove-unused-glyph", ¶m.remove_unused_glyph, 1, "remove unused glyphs in embedded fonts") - .add("font-suffix", ¶m.font_suffix, ".ttf", "suffix for extracted font files") - .add("font-format", ¶m.font_format, "opentype", "format for extracted font files") - .add("external-hint-tool", ¶m.external_hint_tool, "", "external tool for hintting fonts.(overrides --auto-hint)") - .add("css-filename", ¶m.css_filename, "", "Specify the file name of the generated css file") - .add("outline-filename", ¶m.outline_filename, "", "Specify the file name of the generated outline file") + .add("font-suffix", ¶m.font_suffix, ".ttf", "suffix for embedded font files (.ttf,.otf,.woff,.svg)") + .add("font-format", ¶m.font_format, "opentype", "CSS @font-face format for embedded fonts") + .add("external-hint-tool", ¶m.external_hint_tool, "", "external tool for hinting fonts (overrides --auto-hint)") + .add("css-filename", ¶m.css_filename, "", "filename of the generated css file") + .add("outline-filename", ¶m.outline_filename, "", "filename of the generated outline file") - .add("debug", ¶m.debug, 0, "output debug information") - .add("clean-tmp", ¶m.clean_tmp, 1, "clean temporary files after processing") - .add("css-draw", ¶m.css_draw, 0, "[Experimental and Unsupported] CSS Drawing") + .add("debug", ¶m.debug, 0, "print debugging information") + .add("clean-tmp", ¶m.clean_tmp, 1, "remove temporary files after conversion") + .add("css-draw", ¶m.css_draw, 0, "[experimental and unsupported] CSS drawing") .add("", ¶m.input_filename, "", "") .add("", ¶m.output_filename, "", "") ; From c80a732ea50e068443134495b99026d3865adb06 Mon Sep 17 00:00:00 2001 From: John Hewson Date: Mon, 28 Jan 2013 22:54:42 +0000 Subject: [PATCH 26/45] re-ordered usage descriptions to be more like pdftocairo,etc --- src/pdf2htmlEX.cc | 64 ++++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 602a164..a5be7af 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -54,53 +54,61 @@ void show_version_and_exit(const char * dummy = nullptr) void parse_options (int argc, char **argv) { argparser - .add("help,h", "print usage information", &show_usage_and_exit) - .add("version,v", "print copyright and version info", &show_version_and_exit) - - .add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", nullptr, true) - .add("user-password,u", ¶m.user_password, "", "user password (for encrypted files)", nullptr, true) - .add("no-drm", ¶m.no_drm, 0, "override document DRM settings") - - .add("dest-dir", ¶m.dest_dir, ".", "specify destination directory") - .add("data-dir", ¶m.data_dir, PDF2HTMLEX_DATA_PATH, "specify data directory") - + // pages .add("first-page,f", ¶m.first_page, 1, "first page to convert") .add("last-page,l", ¶m.last_page, numeric_limits::max(), "last page to convert") - + + // dimensions .add("zoom", ¶m.zoom, 0, "zoom ratio", nullptr, true) .add("fit-width", ¶m.fit_width, 0, "fit width to pixels", nullptr, true) .add("fit-height", ¶m.fit_height, 0, "fit height to pixels", nullptr, true) + .add("use-cropbox", ¶m.use_cropbox, 0, "use CropBox instead of MediaBox") .add("hdpi", ¶m.h_dpi, 144.0, "horizontal resolution for graphics in DPI") .add("vdpi", ¶m.v_dpi, 144.0, "vertical resolution for graphics in DPI") - .add("use-cropbox", ¶m.use_cropbox, 0, "use CropBox instead of MediaBox") - - .add("process-nontext", ¶m.process_nontext, 1, "render graphics in addition to text") + + // output files .add("single-html", ¶m.single_html, 1, "generate a single HTML file") .add("split-pages", ¶m.split_pages, 0, "split pages into separate files") + .add("dest-dir", ¶m.dest_dir, ".", "specify destination directory") + .add("css-filename", ¶m.css_filename, "", "filename of the generated css file") + .add("outline-filename", ¶m.outline_filename, "", "filename of the generated outline file") + + // embedded fonts .add("embed-base-font", ¶m.embed_base_font, 0, "embed local match for standard 14 fonts") .add("embed-external-font", ¶m.embed_external_font, 0, "embed local match for external fonts") + .add("font-suffix", ¶m.font_suffix, ".ttf", "suffix for embedded font files (.ttf,.otf,.woff,.svg)") + .add("font-format", ¶m.font_format, "opentype", "CSS @font-face format for embedded fonts") .add("decompose-ligature", ¶m.decompose_ligature, 0, "decompose ligatures, such as \uFB01 -> fi") - + .add("remove-unused-glyph", ¶m.remove_unused_glyph, 1, "remove unused glyphs in embedded fonts") + .add("auto-hint", ¶m.auto_hint, 0, "use fontforge autohint on fonts without hints") + .add("external-hint-tool", ¶m.external_hint_tool, "", "external tool for hinting fonts (overrides --auto-hint)") + .add("stretch-narrow-glyph", ¶m.stretch_narrow_glyph, 0, "stretch narrow glyphs instead of padding them") + .add("squeeze-wide-glyph", ¶m.squeeze_wide_glyph, 1, "shrink wide glyphs instead of truncating them") + + // text .add("heps", ¶m.h_eps, 1.0, "horizontal threshold for merging text, in pixels") .add("veps", ¶m.v_eps, 1.0, "vertical threshold for merging text, in pixels") .add("space-threshold", ¶m.space_threshold, (1.0/8), "word break threshold (threshold * em)") .add("font-size-multiplier", ¶m.font_size_multiplier, 4.0, "a value greater than 1 increases the rendering accuracy") - .add("auto-hint", ¶m.auto_hint, 0, "use fontforge autohint on fonts without hints") - .add("tounicode", ¶m.tounicode, 0, "how to handle ToUnicode CMaps (0=auto, 1=force, -1=ignore)") .add("space-as-offset", ¶m.space_as_offset, 0, "treat space characters as offsets") - .add("stretch-narrow-glyph", ¶m.stretch_narrow_glyph, 0, "stretch narrow glyphs instead of padding them") - .add("squeeze-wide-glyph", ¶m.squeeze_wide_glyph, 1, "shrink wide glyphs instead of truncating them") - .add("remove-unused-glyph", ¶m.remove_unused_glyph, 1, "remove unused glyphs in embedded fonts") - - .add("font-suffix", ¶m.font_suffix, ".ttf", "suffix for embedded font files (.ttf,.otf,.woff,.svg)") - .add("font-format", ¶m.font_format, "opentype", "CSS @font-face format for embedded fonts") - .add("external-hint-tool", ¶m.external_hint_tool, "", "external tool for hinting fonts (overrides --auto-hint)") - .add("css-filename", ¶m.css_filename, "", "filename of the generated css file") - .add("outline-filename", ¶m.outline_filename, "", "filename of the generated outline file") - - .add("debug", ¶m.debug, 0, "print debugging information") + .add("tounicode", ¶m.tounicode, 0, "how to handle ToUnicode CMaps (0=auto, 1=force, -1=ignore)") + + // encryption + .add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", nullptr, true) + .add("user-password,u", ¶m.user_password, "", "user password (for encrypted files)", nullptr, true) + .add("no-drm", ¶m.no_drm, 0, "override document DRM settings") + + // misc. .add("clean-tmp", ¶m.clean_tmp, 1, "remove temporary files after conversion") + .add("process-nontext", ¶m.process_nontext, 1, "render graphics in addition to text") + .add("data-dir", ¶m.data_dir, PDF2HTMLEX_DATA_PATH, "specify data directory") .add("css-draw", ¶m.css_draw, 0, "[experimental and unsupported] CSS drawing") + .add("debug", ¶m.debug, 0, "print debugging information") + + // meta + .add("version,v", "print copyright and version info", &show_version_and_exit) + .add("help,h", "print usage information", &show_usage_and_exit) + .add("", ¶m.input_filename, "", "") .add("", ¶m.output_filename, "", "") ; From 4cddb4dbb3ed6403e0fd6febec0658e05d8c1344 Mon Sep 17 00:00:00 2001 From: John Hewson Date: Mon, 28 Jan 2013 23:11:29 +0000 Subject: [PATCH 27/45] re-ordered Param.h to match new usage order --- src/Param.h | 78 ++++++++++++++++++++++++----------------------------- 1 file changed, 35 insertions(+), 43 deletions(-) diff --git a/src/Param.h b/src/Param.h index 2a49ddd..f883a67 100644 --- a/src/Param.h +++ b/src/Param.h @@ -15,64 +15,56 @@ namespace pdf2htmlEX { struct Param { - // PDF stuff - std::string owner_password, user_password; - std::string input_filename, output_filename; - int no_drm; - - // path - std::string dest_dir, tmp_dir, data_dir; - - // normal parameters + // pages int first_page, last_page; - + + // dimensions double zoom; double fit_width, fit_height; - double h_dpi, v_dpi; int use_cropbox; - - int process_nontext; + double h_dpi, v_dpi; + + // output files int single_html; int split_pages; + std::string dest_dir; + std::string css_filename; + std::string outline_filename; + + // embedded fonts int embed_base_font; int embed_external_font; + std::string font_suffix, font_format; int decompose_ligature; - - // Advanced tweak - /* - * Position & Size - */ + int remove_unused_glyph; + int auto_hint; + std::string external_hint_tool; + int stretch_narrow_glyph; + int squeeze_wide_glyph; + + // text double h_eps, v_eps; double space_threshold; double font_size_multiplier; - - /* - * Font - */ - int auto_hint; - int tounicode; int space_as_offset; - int stretch_narrow_glyph; - int squeeze_wide_glyph; - int remove_unused_glyph; - - std::string font_suffix, font_format; - std::string external_hint_tool; - - /* - * Output - */ - std::string css_filename; - std::string outline_filename; - - /* - * Debug - */ - int debug; + int tounicode; + + // encryption + std::string owner_password, user_password; + int no_drm; + + // misc. int clean_tmp; - - // experimental + int process_nontext; + std::string data_dir; int css_draw; + int debug; + + // non-optional + std::string input_filename, output_filename; + + // not a paramater + std::string tmp_dir; }; } // namespace pdf2htmlEX From 6c16aedb634a8358371143db94e1f8a2c30a2bf2 Mon Sep 17 00:00:00 2001 From: John Hewson Date: Tue, 29 Jan 2013 00:04:32 +0000 Subject: [PATCH 28/45] re-ordered manpage to match new usage order and grouping --- pdf2htmlEX.1.in | 182 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 128 insertions(+), 54 deletions(-) diff --git a/pdf2htmlEX.1.in b/pdf2htmlEX.1.in index baff339..38af18e 100644 --- a/pdf2htmlEX.1.in +++ b/pdf2htmlEX.1.in @@ -22,49 +22,42 @@ Fonts are extracted form PDF and then embedded into HTML (Type 3 fonts are not s Other objects are rendered as images and also embedded. .SH OPTIONS + + .TP -.B --help -Show all options -.TP -.B -v, --version -Show copyright and version -.TP -.B -o, --owner-password -Specify owner password -.TP -.B -u, --user-password -Specify user password -.TP -.B --no-drm <0|1> (Default: 0) -Override document DRM settings -.TP -.B --dest-dir (Default: .) -Specify destination folder -.TP -.B --data-dir (Default: @CMAKE_INSTALL_PREFIX@/share/pdf2htmlEX) -Specify the folder holding the manifest and other files +.B Pages + .TP .B -f, --first-page (Default: 1) Specify the first page to process + .TP .B -l, --last-page (Default: last page) Specify the last page to process + + .TP +.B Dimensions + .B --zoom , --fit-width , --fit-height --zoom specifies the zoom factor directly; --fit-width/height specifies the maximum width/height of a page, the values are in pixels. If multiple values are specified, the minimum one will be used. If none is specified, pages will be rendered as 72DPI. -.TP -.B --hdpi , --vdpi (Default: 144) -Specify the horizontal and vertical DPI for images + .TP .B --use-cropbox <0|1> (Default: 0) Use CropBox instead of MediaBox for output. + .TP -.B --process-nontext <0|1> (Default: 1) -Whether to process non-text objects (as images) +.B --hdpi , --vdpi (Default: 144) +Specify the horizontal and vertical DPI for images + + +.TP +.B Output Files + .TP .B --single-html <0|1> (Default: 1) Whether to embed everything into one HTML file. @@ -74,6 +67,7 @@ If switched off, there will be several files generated along with the HTML file Note that the outline will always be embedded into the main HTML file no matter if this switch is on or not. And only when this switch is off will there be a separate .outline file contains the outline. You need to modify the manifest if you do not want outline embedded. + .TP .B --split-pages <0|1> (Default: 0) If turned on, pages will be stored into separated files named as 0.page, 1.page, ... @@ -81,6 +75,27 @@ If turned on, pages will be stored into separated files named as .html generated. This switch is useful if you want pages to be loaded separately & dynamically -- in which case you need to compose the page yourself, and a supporting backend might be necessary. + +.TP +.B --dest-dir (Default: .) +Specify destination folder + +.TP +.B --css-filename (Default: ) +Specify the filename of the generated css file, if not embedded. + +If it's empty, the file name will be determined automatically. + +.TP +.B --outline-filename (Default: ) +Specify the filename of the generated outline file, if not embedded. + +If it's empty, the file name will be determined automatically. + + +.TP +.B Embedded Fonts + .TP .B --embed-base-font <0|1> (Default: 1) Whether to embed base 14 fonts. @@ -88,20 +103,57 @@ Whether to embed base 14 fonts. There are several base font defined in PDF standards, which are supposed to be provided by the PDF reader. If this switch is on, local matched font will be used and embedded; otherwise only font names are exported such that web browsers may try to find proper fonts themselves. + .TP .B --embed-external-font <0|1> (Default: 0) Similar as above but for non-base fonts. + +.TP +.B --font-suffix (Default: .ttf), --font-format (Default: truetype) +Specify the suffix and format of fonts extracted from the PDF file. They should be consistent. + .TP .B --decompose-ligature <0|1> (Default: 0) Decompose ligatures. For example 'fi' -> 'f''i'. + +.TP +.B --remove-unused-glyph <0|1> (Default: 1) +If set to 1, remove unused glyphs in embedded fonts in order to reduce the file size. + +.TP +.B --auto-hint <0|1> (Default: 0) +If set to 1, hints will be generated for the fonts using fontforge. + +This may be preceded by --external-hint-tool. + +.TP +.B --external-hint-tool (Default: ) +If specified, the tool will be called in order to enhanced hinting for fonts, this will precede --auto-hint. + +The tool will be called as ' ', where suffix will be the same as specified for --font-suffix. + +.TP +.B --stretch-narrow-glyph <0|1> (Default: 0) +If set to 1, glyphs narrower than described in PDF will be stretched; otherwise space will be padded to the right of the glyphs + +.TP +.B --squeeze-wide-glyph <0|1> (Default: 1) +If set to 1, glyphs wider than described in PDF will be squeezed; otherwise it will be truncated. + + +.TP +.B Text + .TP .B --heps , --veps (Default: 1) Specify the maximum tolerable horizontal/vertical offset (in pixels). pdf2htmlEX would try to optimize the generated HTML file moving Text within this distance. + .TP .B --space-threshold (Default: 1.0/6) pdf2htmlEX would insert a whitespace character ' ' if the distance between two consecutive letters in the same line is wider than ratio * font_size. + .TP .B --font-size-multiplier (Default: 4.0) Many web browsers limit the minimum font size, and many would round the given font size, which results in incorrect rendering. @@ -109,11 +161,13 @@ Many web browsers limit the minimum font size, and many would round the given fo Specify a ratio greater than 1 would resolve this issue, however it might freeze some browsers. For some versions of Firefox, however, there will be a problem when the font size is too large, in which case a smaller value should be specified here. -.TP -.B --auto-hint <0|1> (Default: 0) -If set to 1, hints will be generated for the fonts using fontforge. -This may be preceded by --external-hint-tool. +.TP +.B --space-as-offset <0|1> (Default: 0) +Treat space characters as offsets, which may increase the size of the output. + +Turn it on if space characters are not displayed correctly, or you want to remove positional spaces. + .TP .B --tounicode <-1|0|1> (Default: 0) A ToUnicode map may be provided for each font in PDF which indicates the 'meaning' of the characters. However often there is better "ToUnicode" info in Type 0/1 fonts, and sometimes the ToUnicode map provided is wrong. @@ -123,40 +177,60 @@ If this value is set to 1, the ToUnicode Map is always applied, if provided in P If set to -1, a customized map is used such that rendering will be correct in HTML (visually the same), but you may not get correct characters by select & copy & paste. If set to 0, pdf2htmlEX would try its best to balance the two methods above. -.TP -.B --space-as-offset <0|1> (Default: 0) -Treat space characters as offsets, which may increase the size of the output. -Turn it on if space characters are not displayed correctly, or you want to remove positional spaces. -.TP -.B --stretch-narrow-glyph <0|1> (Default: 0) -If set to 1, glyphs narrower than described in PDF will be stretched; otherwise space will be padded to the right of the glyphs -.TP -.B --squeeze-wide-glyph <0|1> (Default: 1) -If set to 1, glyphs wider than described in PDF will be squeezed; otherwise it will be truncated. -.TP -.B --remove-unused-glyph <0|1> (Default: 1) -If set to 1, remove unused glyphs in embedded fonts in order to reduce the file size. -.TP -.B --font-suffix (Default: .ttf), --font-format (Default: truetype) -Specify the suffix and format of fonts extracted from the PDF file. They should be consistent. -.TP -.B --external-hint-tool (Default: ) -If specified, the tool will be called in order to enhanced hinting for fonts, this will precede --auto-hint. -The tool will be called as ' ', where suffix will be the same as specified for --font-suffix. .TP -.B --css-filename (Default: ) -Specify the filename of the generated css file, if not embedded. +.B Encryption -If it's empty, the file name will be determined automatically. .TP -.B --debug <0|1> (Default: 0) -Show debug information. +.B -o, --owner-password +Specify owner password + +.TP +.B -u, --user-password +Specify user password + +.TP +.B --no-drm <0|1> (Default: 0) +Override document DRM settings + + +.TP +.B Misc. + .TP .B --clean-tmp <0|1> (Default: 1) If switched off, intermediate files won't be cleaned in the end. +.TP +.B --process-nontext <0|1> (Default: 1) +Whether to process non-text objects (as images) + +.TP +.B --data-dir (Default: @CMAKE_INSTALL_PREFIX@/share/pdf2htmlEX) +Specify the folder holding the manifest and other files + +.TP +.B --css-draw <0|1> (Default: 0) +Experimental and unsupported CSS drawing + +.TP +.B --debug <0|1> (Default: 0) +Print debug information. + + +.TP +.B Meta + +.TP +.B -v, --version +Print copyright and version info + +.TP +.B --help +Print usage information + + .SH EXAMPLE .TP .B pdf2htmlEX /path/to/file.pdf From 444f5a759f2f47c2fa9c50864fba58589492579d Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 29 Jan 2013 17:40:09 +0800 Subject: [PATCH 29/45] credit authors --- AUTHORS | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 AUTHORS diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..0a4053e --- /dev/null +++ b/AUTHORS @@ -0,0 +1,14 @@ +Deepak +filodej +hasufell +Herbert Jones +Hongliang Tian +John Hewson +Lu Wang + +Packagers: +Arthur Titeica +Deepak Thukral +Jamie Ly +Lu Wang + From e79c2884bbee8cbcd793c7992c7a1a65797b3971 Mon Sep 17 00:00:00 2001 From: John Hewson Date: Tue, 29 Jan 2013 10:38:39 +0000 Subject: [PATCH 30/45] usage: embedded fonts -> fonts --- pdf2htmlEX.1.in | 2 +- src/Param.h | 2 +- src/pdf2htmlEX.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pdf2htmlEX.1.in b/pdf2htmlEX.1.in index 38af18e..630ddb6 100644 --- a/pdf2htmlEX.1.in +++ b/pdf2htmlEX.1.in @@ -94,7 +94,7 @@ If it's empty, the file name will be determined automatically. .TP -.B Embedded Fonts +.B Fonts .TP .B --embed-base-font <0|1> (Default: 1) diff --git a/src/Param.h b/src/Param.h index f883a67..fdabc8c 100644 --- a/src/Param.h +++ b/src/Param.h @@ -31,7 +31,7 @@ struct Param std::string css_filename; std::string outline_filename; - // embedded fonts + // fonts int embed_base_font; int embed_external_font; std::string font_suffix, font_format; diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index a5be7af..ba3eaec 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -73,7 +73,7 @@ void parse_options (int argc, char **argv) .add("css-filename", ¶m.css_filename, "", "filename of the generated css file") .add("outline-filename", ¶m.outline_filename, "", "filename of the generated outline file") - // embedded fonts + // fonts .add("embed-base-font", ¶m.embed_base_font, 0, "embed local match for standard 14 fonts") .add("embed-external-font", ¶m.embed_external_font, 0, "embed local match for external fonts") .add("font-suffix", ¶m.font_suffix, ".ttf", "suffix for embedded font files (.ttf,.otf,.woff,.svg)") From efbd9c8c1effb3f7d71d6b79a548642c9be8f247 Mon Sep 17 00:00:00 2001 From: John Hewson Date: Tue, 29 Jan 2013 10:39:25 +0000 Subject: [PATCH 31/45] usage: default is -> default: --- src/util/ArgParser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/util/ArgParser.h b/src/util/ArgParser.h index d89dc35..2f68991 100644 --- a/src/util/ArgParser.h +++ b/src/util/ArgParser.h @@ -191,7 +191,7 @@ void ArgParser::ArgEntry::show_usage(std::ostream & out) const if(need_arg && !dont_show_default) { - out << " (default is "; + out << " (default: "; dump_value(out, default_value); out << ")"; } From 6fda07bec3fb2c111abf866372c61477831887bc Mon Sep 17 00:00:00 2001 From: John Hewson Date: Tue, 29 Jan 2013 12:07:51 +0000 Subject: [PATCH 32/45] automatically infer @font-face src format --- pdf2htmlEX.1.in | 4 +- src/HTMLRenderer/HTMLRenderer.h | 2 +- src/HTMLRenderer/export.cc | 82 +++++++++++++++++++++------------ src/HTMLRenderer/install.cc | 6 +-- src/Param.h | 2 +- src/pdf2htmlEX.cc | 8 +++- 6 files changed, 66 insertions(+), 38 deletions(-) diff --git a/pdf2htmlEX.1.in b/pdf2htmlEX.1.in index 630ddb6..57dc1ee 100644 --- a/pdf2htmlEX.1.in +++ b/pdf2htmlEX.1.in @@ -109,8 +109,8 @@ If this switch is on, local matched font will be used and embedded; otherwise on Similar as above but for non-base fonts. .TP -.B --font-suffix (Default: .ttf), --font-format (Default: truetype) -Specify the suffix and format of fonts extracted from the PDF file. They should be consistent. +.B --font-suffix (Default: .ttf) +Specify the suffix of fonts extracted from the PDF file. .TP .B --decompose-ligature <0|1> (Default: 0) diff --git a/src/HTMLRenderer/HTMLRenderer.h b/src/HTMLRenderer/HTMLRenderer.h index 532eedb..f94e013 100644 --- a/src/HTMLRenderer/HTMLRenderer.h +++ b/src/HTMLRenderer/HTMLRenderer.h @@ -248,7 +248,7 @@ class HTMLRenderer : public OutputDev * remote font: to be retrieved from the web server * local font: to be substituted with a local (client side) font */ - void export_remote_font(const FontInfo & info, const std::string & suffix, const std::string & fontfileformat, GfxFont * font); + void export_remote_font(const FontInfo & info, const std::string & suffix, GfxFont * font); void export_remote_default_font(long long fn_id); void export_local_font(const FontInfo & info, GfxFont * font, const std::string & original_font_name, const std::string & cssfont); diff --git a/src/HTMLRenderer/export.cc b/src/HTMLRenderer/export.cc index 148f741..a83709b 100644 --- a/src/HTMLRenderer/export.cc +++ b/src/HTMLRenderer/export.cc @@ -18,40 +18,62 @@ namespace pdf2htmlEX { -void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suffix, const string & fontfileformat, GfxFont * font) -{ - f_css.fs << "@font-face{" - << "font-family:f" << info.id << ";" - << "src:url("; - + void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suffix, GfxFont * font) { - auto fn = str_fmt("f%llx%s", info.id, suffix.c_str()); - if(param->single_html) - { - auto path = param->tmp_dir + "/" + (char*)fn; - ifstream fin(path, ifstream::binary); - if(!fin) - throw "Cannot locate font file: " + path; - f_css.fs << "'data:font/" + fontfileformat + ";base64," << base64stream(fin) << "'"; + string mime_type, format; + if (suffix == ".ttf") { + format = "truetype"; + mime_type = "application/x-font-ttf"; } - else - { - f_css.fs << (char*)fn; + else if (suffix == ".otf") { + format = "opentype"; + mime_type = "application/x-font-otf"; + } + else if (suffix == ".woff") { + format = "woff"; + mime_type = "application/font-woff"; + } + else if (suffix == ".eot") { + format = "embedded-opentype"; + mime_type = "application/vnd.ms-fontobject"; + } + else if (suffix == ".svg") { + format = "svg"; + mime_type = "image/svg+xml"; } - } - f_css.fs << ")" - << "format(\"" << fontfileformat << "\");" - << "}" // end of @font-face - << ".f" << info.id << "{" - << "font-family:f" << info.id << ";" - << "line-height:" << round(info.ascent - info.descent) << ";" - << "font-style:normal;" - << "font-weight:normal;" - << "visibility:visible;" - << "}" // end of .f - << endl; -} + f_css.fs << "@font-face{" + << "font-family:f" << info.id << ";" + << "src:url("; + + { + auto fn = str_fmt("f%llx%s", info.id, suffix.c_str()); + if(param->single_html) + { + auto path = param->tmp_dir + "/" + (char*)fn; + ifstream fin(path, ifstream::binary); + if(!fin) + throw "Cannot locate font file: " + path; + f_css.fs << "'data:font/" + mime_type + ";base64," << base64stream(fin) << "'"; + } + else + { + f_css.fs << (char*)fn; + } + } + + f_css.fs << ")" + << "format(\"" << format << "\");" + << "}" // end of @font-face + << ".f" << info.id << "{" + << "font-family:f" << info.id << ";" + << "line-height:" << round(info.ascent - info.descent) << ";" + << "font-style:normal;" + << "font-weight:normal;" + << "visibility:visible;" + << "}" // end of .f + << endl; + } static string general_font_family(GfxFont * font) { diff --git a/src/HTMLRenderer/install.cc b/src/HTMLRenderer/install.cc index 4dac8d2..1ec80ca 100644 --- a/src/HTMLRenderer/install.cc +++ b/src/HTMLRenderer/install.cc @@ -110,7 +110,7 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, FontInfo & info) if(path != "") { embed_font(path, font, info); - export_remote_font(info, param->font_suffix, param->font_format, font); + export_remote_font(info, param->font_suffix, font); } else { @@ -129,7 +129,7 @@ void HTMLRenderer::install_base_font(GfxFont * font, GfxFontLoc * font_loc, Font if(localfontloc != nullptr) { embed_font(localfontloc->path->getCString(), font, info); - export_remote_font(info, param->font_suffix, param->font_format, font); + export_remote_font(info, param->font_suffix, font); delete localfontloc; return; } @@ -186,7 +186,7 @@ void HTMLRenderer::install_external_font(GfxFont * font, FontInfo & info) if(localfontloc != nullptr) { embed_font(string(localfontloc->path->getCString()), font, info); - export_remote_font(info, param->font_suffix, param->font_format, font); + export_remote_font(info, param->font_suffix, font); delete localfontloc; return; } diff --git a/src/Param.h b/src/Param.h index fdabc8c..e8d6b90 100644 --- a/src/Param.h +++ b/src/Param.h @@ -34,7 +34,7 @@ struct Param // fonts int embed_base_font; int embed_external_font; - std::string font_suffix, font_format; + std::string font_suffix; int decompose_ligature; int remove_unused_glyph; int auto_hint; diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index ba3eaec..20d0cee 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -53,6 +53,8 @@ void show_version_and_exit(const char * dummy = nullptr) void parse_options (int argc, char **argv) { + string deprecated_string; + argparser // pages .add("first-page,f", ¶m.first_page, 1, "first page to convert") @@ -77,7 +79,6 @@ void parse_options (int argc, char **argv) .add("embed-base-font", ¶m.embed_base_font, 0, "embed local match for standard 14 fonts") .add("embed-external-font", ¶m.embed_external_font, 0, "embed local match for external fonts") .add("font-suffix", ¶m.font_suffix, ".ttf", "suffix for embedded font files (.ttf,.otf,.woff,.svg)") - .add("font-format", ¶m.font_format, "opentype", "CSS @font-face format for embedded fonts") .add("decompose-ligature", ¶m.decompose_ligature, 0, "decompose ligatures, such as \uFB01 -> fi") .add("remove-unused-glyph", ¶m.remove_unused_glyph, 1, "remove unused glyphs in embedded fonts") .add("auto-hint", ¶m.auto_hint, 0, "use fontforge autohint on fonts without hints") @@ -111,6 +112,11 @@ void parse_options (int argc, char **argv) .add("", ¶m.input_filename, "", "") .add("", ¶m.output_filename, "", "") + + // deprecated + .add("font-format", &deprecated_string, "", "", [] (const char*) { + cerr << "warning: --font-format is deprecated, @font-face format is inferred from --font-suffix" << endl; + }) ; try From b2d28e2490c3c032983454f7817584d1eae64c18 Mon Sep 17 00:00:00 2001 From: John Hewson Date: Tue, 29 Jan 2013 13:38:34 +0000 Subject: [PATCH 33/45] fixed formatting --- src/HTMLRenderer/export.cc | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/HTMLRenderer/export.cc b/src/HTMLRenderer/export.cc index a83709b..ff76fc6 100644 --- a/src/HTMLRenderer/export.cc +++ b/src/HTMLRenderer/export.cc @@ -43,8 +43,8 @@ namespace pdf2htmlEX { } f_css.fs << "@font-face{" - << "font-family:f" << info.id << ";" - << "src:url("; + << "font-family:f" << info.id << ";" + << "src:url("; { auto fn = str_fmt("f%llx%s", info.id, suffix.c_str()); @@ -63,23 +63,23 @@ namespace pdf2htmlEX { } f_css.fs << ")" - << "format(\"" << format << "\");" - << "}" // end of @font-face - << ".f" << info.id << "{" - << "font-family:f" << info.id << ";" - << "line-height:" << round(info.ascent - info.descent) << ";" - << "font-style:normal;" - << "font-weight:normal;" - << "visibility:visible;" - << "}" // end of .f - << endl; + << "format(\"" << format << "\");" + << "}" // end of @font-face + << ".f" << info.id << "{" + << "font-family:f" << info.id << ";" + << "line-height:" << round(info.ascent - info.descent) << ";" + << "font-style:normal;" + << "font-weight:normal;" + << "visibility:visible;" + << "}" // end of .f + << endl; } static string general_font_family(GfxFont * font) { - if(font -> isFixedWidth()) + if(font->isFixedWidth()) return "monospace"; - else if (font -> isSerif()) + else if (font->isSerif()) return "serif"; else return "sans-serif"; From 9efa67681b31ea49a8c7b7735b8858d357db6814 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 29 Jan 2013 22:23:59 +0800 Subject: [PATCH 34/45] outline is closed by default, and will be opened by pdf2htmlEX.js if not empty --- share/manifest | 30 ++++++++++++++++++++++-------- share/pdf2htmlEX.js | 4 ++-- src/HTMLRenderer/general.cc | 4 +++- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/share/manifest b/share/manifest index 6afd97d..7227a8c 100644 --- a/share/manifest +++ b/share/manifest @@ -1,4 +1,4 @@ -# manifest +# pdf2htmlEX manifest # by WangLu # 2012.09.12 # @@ -21,39 +21,53 @@ """ + # base CSS styles @base.css - # PDF specific CSS styles $css - # necessary Javascript codes @jquery.js @pdf2htmlEX.js +# entry point of pdf2htmlEX """ +""" +""" -
""" +# The container of outline +# By default this is hidden, pdf2htmlEX.js will add the 'opened' class if it is not empty +# You can add a class 'opened' here if you want it always opened or you don't use pdf2htmlEX.js +# e.g. +#
+""" +
+""" $outline - """
+""" + +# The container of PDF pages +# check base.css for an example and requirements of its CSS styles +"""
""" - -# PDF pages $pages - """
+""" + + +""" """ diff --git a/share/pdf2htmlEX.js b/share/pdf2htmlEX.js index 84e0eb2..b50e82d 100644 --- a/share/pdf2htmlEX.js +++ b/share/pdf2htmlEX.js @@ -118,8 +118,8 @@ var pdf2htmlEX = (function(){ this.container = $('#'+this.container_id); // need a better design - if(this.outline.children().length == 0) { - this.outline.toggleClass('opened'); + if(this.outline.children().length > 0) { + this.outline.addClass('opened'); } var new_pages = new Array(); diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index a3bfae2..e67237d 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -392,7 +392,9 @@ void HTMLRenderer::post_process() continue; } - if(line.empty() || line[0] == '#') + if(line.empty() + || (line.find_first_not_of(' ') == string::npos) + || line[0] == '#') continue; From 3ad0564b2573e201b47601eb3a8c3fc9b054fbaf Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 29 Jan 2013 22:31:34 +0800 Subject: [PATCH 35/45] changelog --- ChangeLog | 4 ++++ debian/changelog | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/ChangeLog b/ChangeLog index 494c28c..3a89c38 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ Latest v0.7 +* Process outline +* Fix build with poppler +* Many code cleaning jobs [John Hewson] + v0.6 2013.01.26 diff --git a/debian/changelog b/debian/changelog index 17613da..cedab76 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +pdf2htmlex (0.7-1~git201301292229r2595c-0ubuntu1) quantal; urgency=low + + * Fixed a CSS issue + + -- WANG Lu Tue, 29 Jan 2013 22:29:21 +0800 + pdf2htmlex (0.7-1~git201301282229r2595c-0ubuntu1) quantal; urgency=low * Process PDF Outline From 2e91696b3312dcfd23459d0ad1df1f4f198ffe53 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 29 Jan 2013 22:52:32 +0800 Subject: [PATCH 36/45] improve manpage; show data-dir in -v --- pdf2htmlEX.1.in | 11 ++++++++++- src/pdf2htmlEX.cc | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pdf2htmlEX.1.in b/pdf2htmlEX.1.in index 630ddb6..fd3745c 100644 --- a/pdf2htmlEX.1.in +++ b/pdf2htmlEX.1.in @@ -208,7 +208,7 @@ Whether to process non-text objects (as images) .TP .B --data-dir (Default: @CMAKE_INSTALL_PREFIX@/share/pdf2htmlEX) -Specify the folder holding the manifest and other files +Specify the folder holding the manifest and other files (see below for the manifest file)` .TP .B --css-draw <0|1> (Default: 0) @@ -231,6 +231,15 @@ Print copyright and version info Print usage information +.SH MANIFEST and DATA-DIR +When split-pages is 0, the manifest file describes how the final html page should be generated. + +By default, pdf2htmlEX will use the manifest in the default data-dir (run `pdf2htmlEX -v` to check), which gives a simple demo of its syntax. + +You can modify the default one, or you can create a new one and specify the correct data-dir in the command line. + +When single-html is 1, all files referred by the manifest must be located in the data-dir. + .SH EXAMPLE .TP .B pdf2htmlEX /path/to/file.pdf diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index ba3eaec..1bfe6cb 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -48,6 +48,7 @@ void show_version_and_exit(const char * dummy = nullptr) cerr << "Libraries: "; cerr << "poppler " << POPPLER_VERSION << ", "; cerr << "libfontforge " << ffw_get_version() << endl; + cerr << "Default data-dir: " << PDF2HTMLEX_DATA_PATH << endl; exit(EXIT_SUCCESS); } From 57517f79f7163cab5bdfdd7cbf6cc9ea4d8c14d2 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Tue, 29 Jan 2013 23:26:41 +0800 Subject: [PATCH 37/45] tweak manpage --- pdf2htmlEX.1.in | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/pdf2htmlEX.1.in b/pdf2htmlEX.1.in index fd3745c..9df3065 100644 --- a/pdf2htmlEX.1.in +++ b/pdf2htmlEX.1.in @@ -23,9 +23,7 @@ Other objects are rendered as images and also embedded. .SH OPTIONS - -.TP -.B Pages +.SS Pages .TP .B -f, --first-page (Default: 1) @@ -35,9 +33,7 @@ Specify the first page to process .B -l, --last-page (Default: last page) Specify the last page to process - -.TP -.B Dimensions +.SS Dimensions .B --zoom , --fit-width , --fit-height --zoom specifies the zoom factor directly; --fit-width/height specifies the maximum width/height of a page, the values are in pixels. @@ -55,8 +51,7 @@ Use CropBox instead of MediaBox for output. Specify the horizontal and vertical DPI for images -.TP -.B Output Files +.SS Output Files .TP .B --single-html <0|1> (Default: 1) @@ -92,9 +87,7 @@ Specify the filename of the generated outline file, if not embedded. If it's empty, the file name will be determined automatically. - -.TP -.B Fonts +.SS Fonts .TP .B --embed-base-font <0|1> (Default: 1) @@ -140,9 +133,7 @@ If set to 1, glyphs narrower than described in PDF will be stretched; otherwise .B --squeeze-wide-glyph <0|1> (Default: 1) If set to 1, glyphs wider than described in PDF will be squeezed; otherwise it will be truncated. - -.TP -.B Text +.SS Text .TP .B --heps , --veps (Default: 1) @@ -178,9 +169,7 @@ If set to -1, a customized map is used such that rendering will be correct in HT If set to 0, pdf2htmlEX would try its best to balance the two methods above. - -.TP -.B Encryption +.SS PDF Protection .TP .B -o, --owner-password @@ -194,9 +183,7 @@ Specify user password .B --no-drm <0|1> (Default: 0) Override document DRM settings - -.TP -.B Misc. +.SS Misc. .TP .B --clean-tmp <0|1> (Default: 1) @@ -218,9 +205,7 @@ Experimental and unsupported CSS drawing .B --debug <0|1> (Default: 0) Print debug information. - -.TP -.B Meta +.SS Meta .TP .B -v, --version @@ -230,7 +215,6 @@ Print copyright and version info .B --help Print usage information - .SH MANIFEST and DATA-DIR When split-pages is 0, the manifest file describes how the final html page should be generated. From b9763d10727c4b7e5c082fc55dc2e2903cdb7912 Mon Sep 17 00:00:00 2001 From: John Hewson Date: Tue, 29 Jan 2013 18:13:48 +0000 Subject: [PATCH 38/45] cosmetic changes --- src/HTMLRenderer/export.cc | 113 +++++++++++++++++++------------------ 1 file changed, 59 insertions(+), 54 deletions(-) diff --git a/src/HTMLRenderer/export.cc b/src/HTMLRenderer/export.cc index ff76fc6..c9e1516 100644 --- a/src/HTMLRenderer/export.cc +++ b/src/HTMLRenderer/export.cc @@ -18,62 +18,67 @@ namespace pdf2htmlEX { - void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suffix, GfxFont * font) +void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suffix, GfxFont * font) +{ + string mime_type, format; + if(suffix == ".ttf") { - string mime_type, format; - if (suffix == ".ttf") { - format = "truetype"; - mime_type = "application/x-font-ttf"; - } - else if (suffix == ".otf") { - format = "opentype"; - mime_type = "application/x-font-otf"; - } - else if (suffix == ".woff") { - format = "woff"; - mime_type = "application/font-woff"; - } - else if (suffix == ".eot") { - format = "embedded-opentype"; - mime_type = "application/vnd.ms-fontobject"; - } - else if (suffix == ".svg") { - format = "svg"; - mime_type = "image/svg+xml"; - } - - f_css.fs << "@font-face{" - << "font-family:f" << info.id << ";" - << "src:url("; - - { - auto fn = str_fmt("f%llx%s", info.id, suffix.c_str()); - if(param->single_html) - { - auto path = param->tmp_dir + "/" + (char*)fn; - ifstream fin(path, ifstream::binary); - if(!fin) - throw "Cannot locate font file: " + path; - f_css.fs << "'data:font/" + mime_type + ";base64," << base64stream(fin) << "'"; - } - else - { - f_css.fs << (char*)fn; - } - } - - f_css.fs << ")" - << "format(\"" << format << "\");" - << "}" // end of @font-face - << ".f" << info.id << "{" - << "font-family:f" << info.id << ";" - << "line-height:" << round(info.ascent - info.descent) << ";" - << "font-style:normal;" - << "font-weight:normal;" - << "visibility:visible;" - << "}" // end of .f - << endl; + format = "truetype"; + mime_type = "application/x-font-ttf"; } + else if(suffix == ".otf") + { + format = "opentype"; + mime_type = "application/x-font-otf"; + } + else if(suffix == ".woff") + { + format = "woff"; + mime_type = "application/font-woff"; + } + else if(suffix == ".eot") + { + format = "embedded-opentype"; + mime_type = "application/vnd.ms-fontobject"; + } + else if(suffix == ".svg") + { + format = "svg"; + mime_type = "image/svg+xml"; + } + + f_css.fs << "@font-face{" + << "font-family:f" << info.id << ";" + << "src:url("; + + { + auto fn = str_fmt("f%llx%s", info.id, suffix.c_str()); + if(param->single_html) + { + auto path = param->tmp_dir + "/" + (char*)fn; + ifstream fin(path, ifstream::binary); + if(!fin) + throw "Cannot locate font file: " + path; + f_css.fs << "'data:font/" + mime_type + ";base64," << base64stream(fin) << "'"; + } + else + { + f_css.fs << (char*)fn; + } + } + + f_css.fs << ")" + << "format(\"" << format << "\");" + << "}" // end of @font-face + << ".f" << info.id << "{" + << "font-family:f" << info.id << ";" + << "line-height:" << round(info.ascent - info.descent) << ";" + << "font-style:normal;" + << "font-weight:normal;" + << "visibility:visible;" + << "}" // end of .f + << endl; +} static string general_font_family(GfxFont * font) { From b2ae01a2fa4d987c256df6d008613d30c487abde Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 30 Jan 2013 02:48:05 +0800 Subject: [PATCH 39/45] make type_name a member function --- src/util/ArgParser.cc | 4 ++++ src/util/ArgParser.h | 34 ++++++++++++++-------------------- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/src/util/ArgParser.cc b/src/util/ArgParser.cc index de3b270..433b579 100644 --- a/src/util/ArgParser.cc +++ b/src/util/ArgParser.cc @@ -146,6 +146,10 @@ void ArgParser::show_usage(ostream & out) const } } +template<> const char * ArgParser::get_type_name (void) { return "int"; } +template<> const char * ArgParser::get_type_name (void) { return "fp"; } +template<> const char * ArgParser::get_type_name (void) { return "string"; } + ArgParser::ArgEntryBase::ArgEntryBase(const char * name, const char * description, bool need_arg) : shortname(0), name(name), description(description), need_arg(need_arg) { diff --git a/src/util/ArgParser.h b/src/util/ArgParser.h index 2f68991..d5aafde 100644 --- a/src/util/ArgParser.h +++ b/src/util/ArgParser.h @@ -39,24 +39,6 @@ void dump_value(std::ostream & out, const T & v) extern void dump_value(std::ostream & out, const std::string & v); -// type names helper -template -struct type_name { - static char const* value() { return "unknown"; } -}; - -template<> struct type_name { - static char const* value() { return "int"; } -}; - -template<> struct type_name { - static char const* value() { return "fp"; } -}; - -template<> struct type_name { - static char const* value() { return "string"; } -}; - class ArgParser { public: @@ -71,13 +53,20 @@ class ArgParser ArgParser & add(const char * optname, const char * description, ArgParserCallBack callback = nullptr); + /* + * location == nullptr means no argument is needed + */ template - ArgParser & add(const char * optname, T * location, const Tv & default_value, const char * description, ArgParserCallBack callback = nullptr, bool dont_show_default = false); + ArgParser & add(const char * optname, T * location, const Tv & default_value, const char * description, ArgParserCallBack callback = nullptr, bool dont_show_default = false); void parse(int argc, char ** argv) const; void show_usage(std::ostream & out) const; private: + // type names helper + template + static const char * get_type_name(void) { return "unknown"; } + class ArgEntryBase { public: @@ -127,6 +116,11 @@ ArgParser & ArgParser::add(const char * optname, T * location, const Tv & defaul return *this; } +// Known types +template<> const char * ArgParser::get_type_name (void); +template<> const char * ArgParser::get_type_name (void); +template<> const char * ArgParser::get_type_name (void); + template ArgParser::ArgEntry::ArgEntry(const char * name, T * location, const Tv & default_value, ArgParserCallBack callback, const char * description, bool dont_show_default) : ArgEntryBase(name, description, (location != nullptr)) @@ -178,7 +172,7 @@ void ArgParser::ArgEntry::show_usage(std::ostream & out) const if(need_arg) { - sout << " <" << type_name::value() << ">"; + sout << " <" << get_type_name() << ">"; } std::string s = sout.str(); From a7bba1bf486b812270a4ea8c45d79ff1984bb52d Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 30 Jan 2013 03:22:11 +0800 Subject: [PATCH 40/45] clean ArgParser --- src/util/ArgParser.cc | 2 +- src/util/ArgParser.h | 25 +++++++++++++++---------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/util/ArgParser.cc b/src/util/ArgParser.cc index 433b579..9b66a20 100644 --- a/src/util/ArgParser.cc +++ b/src/util/ArgParser.cc @@ -163,7 +163,7 @@ ArgParser::ArgEntryBase::ArgEntryBase(const char * name, const char * descriptio } else { - cerr << "Warning: argument '" << this->name << "' may not be parsed correctly" << endl; + cerr << "Warning: argument '" << this->name << "' cannnot be parsed as a short option" << endl; } } } diff --git a/src/util/ArgParser.h b/src/util/ArgParser.h index d5aafde..a6c58c4 100644 --- a/src/util/ArgParser.h +++ b/src/util/ArgParser.h @@ -47,15 +47,14 @@ class ArgParser typedef void (*ArgParserCallBack) (const char * arg); /* - * optname: name of the argment, should be provided as --optname - * description: if description is "", the argument won't be shown in show_usage() + * The 1st is for arg without arguments (i.e. flags), and the 2nd is for general args. + * optname: + * - if not nullptr, it should be the name of the arg, should be in the format of "[,]", e.g. "help,h" + * - if nullptr, it denotes an optional arg, and description will be ignored + * description: + * - if description is nullptr or "", the argument won't be shown in show_usage() */ - ArgParser & add(const char * optname, const char * description, ArgParserCallBack callback = nullptr); - - /* - * location == nullptr means no argument is needed - */ template ArgParser & add(const char * optname, T * location, const Tv & default_value, const char * description, ArgParserCallBack callback = nullptr, bool dont_show_default = false); @@ -70,6 +69,7 @@ class ArgParser class ArgEntryBase { public: + /* name or description cannot be nullptr */ ArgEntryBase(const char * name, const char * description, bool need_arg); virtual ~ArgEntryBase() { } char shortname; @@ -107,11 +107,16 @@ class ArgParser template ArgParser & ArgParser::add(const char * optname, T * location, const Tv & default_value, const char * description, ArgParserCallBack callback, bool dont_show_default) { - // use "" in case nullptr is provided + // ArgEntry does not accept nullptr as optname nor description if((!optname) || (!optname[0])) + { + // when optname is nullptr or "", it's optional, and description is dropped optional_arg_entries.push_back(new ArgEntry("", location, default_value, callback, "", dont_show_default)); + } else - arg_entries.push_back(new ArgEntry(optname, location, default_value, callback, description, dont_show_default)); + { + arg_entries.push_back(new ArgEntry(optname, location, default_value, callback, (description ? description : ""), dont_show_default)); + } return *this; } @@ -152,7 +157,7 @@ void ArgParser::ArgEntry::parse(const char * arg) const template void ArgParser::ArgEntry::show_usage(std::ostream & out) const { - if(description == "") + if(description.empty()) return; std::ostringstream sout; From 71f1cb3b8ae1ed315932300cd9a04efa8e467c8c Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 30 Jan 2013 03:40:17 +0800 Subject: [PATCH 41/45] clean unuseful warning messages in ArgParser --- src/util/ArgParser.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/util/ArgParser.cc b/src/util/ArgParser.cc index 9b66a20..0edc25f 100644 --- a/src/util/ArgParser.cc +++ b/src/util/ArgParser.cc @@ -76,7 +76,7 @@ void ArgParser::parse(int argc, char ** argv) const int v = p->shortname; if(!(opt_map.insert(make_pair(v, p)).second)) { - cerr << "Warning: duplicated shortname '" << v << "' used by -" << (char)(p->shortname) << " and -" << (char)(opt_map[p->shortname]->shortname) << endl; + cerr << "Warning: duplicated shortname: " << v << endl; } } @@ -93,7 +93,7 @@ void ArgParser::parse(int argc, char ** argv) const } if(!(opt_map.insert(make_pair(v, p)).second)) { - cerr << "Warning: duplicated shortname '" << v << "' used by --" << (p->name) << " and --" << (opt_map[p->shortname]->name) << endl; + cerr << "Warning: duplicated long name: " << (p->name) << endl; } } } From 2b70f5b2a1ca9154273d98fa518db47370c1af65 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 30 Jan 2013 04:06:48 +0800 Subject: [PATCH 42/45] fix float point number comparison in install_* --- src/HTMLRenderer/install.cc | 8 ++++---- src/util/math.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/HTMLRenderer/install.cc b/src/HTMLRenderer/install.cc index 1ec80ca..a3f7818 100644 --- a/src/HTMLRenderer/install.cc +++ b/src/HTMLRenderer/install.cc @@ -281,7 +281,7 @@ long long HTMLRenderer::install_whitespace(double ws_width, double & actual_widt { // ws_width is already mulitpled by draw_scale auto iter = whitespace_map.lower_bound(ws_width - param->h_eps); - if((iter != whitespace_map.end()) && (abs(iter->first - ws_width) < param->h_eps)) + if((iter != whitespace_map.end()) && (abs(iter->first - ws_width) <= param->h_eps)) { actual_width = iter->first; return iter->second; @@ -297,7 +297,7 @@ long long HTMLRenderer::install_whitespace(double ws_width, double & actual_widt long long HTMLRenderer::install_rise(double rise) { auto iter = rise_map.lower_bound(rise - param->v_eps); - if((iter != rise_map.end()) && (abs(iter->first - rise) < param->v_eps)) + if((iter != rise_map.end()) && (abs(iter->first - rise) <= param->v_eps)) { return iter->second; } @@ -311,7 +311,7 @@ long long HTMLRenderer::install_rise(double rise) long long HTMLRenderer::install_height(double height) { auto iter = height_map.lower_bound(height - EPS); - if((iter != height_map.end()) && (abs(iter->first - height) < EPS)) + if((iter != height_map.end()) && (abs(iter->first - height) <= EPS)) { return iter->second; } @@ -324,7 +324,7 @@ long long HTMLRenderer::install_height(double height) long long HTMLRenderer::install_left(double left) { auto iter = left_map.lower_bound(left - param->h_eps); - if((iter != left_map.end()) && (abs(iter->first - left) < param->h_eps)) + if((iter != left_map.end()) && (abs(iter->first - left) <= param->h_eps)) { return iter->second; } diff --git a/src/util/math.h b/src/util/math.h index 9c9f5db..2966090 100644 --- a/src/util/math.h +++ b/src/util/math.h @@ -15,7 +15,7 @@ namespace pdf2htmlEX { static inline double round(double x) { return (std::abs(x) > EPS) ? x : 0.0; } -static inline bool equal(double x, double y) { return std::abs(x-y) < EPS; } +static inline bool equal(double x, double y) { return std::abs(x-y) <= EPS; } static inline bool is_positive(double x) { return x > EPS; } static inline bool tm_equal(const double * tm1, const double * tm2, int size = 6) { From 6d0344999841c26c13a7527792e497a87fdd0c22 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 30 Jan 2013 12:15:22 +0800 Subject: [PATCH 43/45] update readme --- README.md | 41 ++++++++++++-------------------------- src/HTMLRenderer/export.cc | 7 +++++++ 2 files changed, 20 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index c1f5bcd..db2b965 100644 --- a/README.md +++ b/README.md @@ -54,32 +54,15 @@ Readers can also be benefitted ## Get started -### Ubuntu - -[PPA](https://launchpad.net/~coolwanglu/+archive/pdf2htmlex), which is not so up-to-date. - -### ArchLinux - -[AUR Package](https://aur.archlinux.org/packages.php?ID=62426), special thanks to Arthur Titeica - -### Gentoo - -Install through Overlay gentoo-zh, mrueg or sunrise, thanks to the packagers. - -### Mac - -[Homebrew Formula](https://github.com/jamiely/homebrew/blob/pdf2htmlex/Library/Formula/pdf2htmlex.rb), special thanks to Jamie Ly - -[Macports (local repo)](https://github.com/iapain/pdf2htmlEX-macport), special thanks to Deepak Thukral - -### Windows - -The code may be built with Cygwin. - -Or with MinGW with some modifications. - -More info can be found on [the pdf2htmlEX page in TeX Wiki](http://oku.edu.mie-u.ac.jp/~okumura/texwiki/?pdf2htmlEX) (in Japanese), special thanks to Haruhiko Okumura +### Install + +Thanks to all packagers! + * [Ubuntu PPA](https://launchpad.net/~coolwanglu/+archive/pdf2htmlex) by Lu Wang , not always up-to-date. + * [ArchLinux AUR](https://aur.archlinux.org/packages.php?ID=62426) by Arthur Titeica + * [Gentoo Overlay](http://gpo.zugaina.org/app-text/pdf2htmlex), gentoo-zh, mrueg or sunrise, by respective packagers. + * [Homebrew Formula](https://github.com/jamiely/homebrew/blob/pdf2htmlex/Library/Formula/pdf2htmlex.rb) by Jamie Ly + * [Macports (local repo)](https://github.com/iapain/pdf2htmlEX-macport) by Deepak Thukral ### Build from source @@ -97,6 +80,10 @@ More info can be found on [the pdf2htmlEX page in TeX Wiki](http://oku.edu.mie-u * git version is recommended to avoid annoying compilation issues * [Optional] **ttfautohint** * run pdf2htmlEX with **--external-hint-tool=ttfautohint** to enable it +* [For Windows] + * Cygwin + * or MinGW, with some modifications to pdf2htmlEX. See [the pdf2htmlEX page in TeX Wiki](http://oku.edu.mie-u.ac.jp/~okumura/texwiki/?pdf2htmlEX) (in Japanese), special thanks to Haruhiko Okumura + #### Compiling @@ -107,9 +94,7 @@ More info can be found on [the pdf2htmlEX page in TeX Wiki](http://oku.edu.mie-u ## Usage pdf2htmlEX /path/to/foobar.pdf - pdf2htmlEX --help - man pdf2htmlEX ## FAQ @@ -152,7 +137,7 @@ pdf2htmlEX is inspired by the following projects: ## Contact * Mailing list - * Please read [**FAQ**](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ) before sending emails. Or your message might be ignored. + * Please read `man pdf2htmlEX` and [**FAQ**](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ) before sending emails. Or your message might be ignored. * Please use the **latest master branch**. * Lu Wang diff --git a/src/HTMLRenderer/export.cc b/src/HTMLRenderer/export.cc index c9e1516..c5e2f7b 100644 --- a/src/HTMLRenderer/export.cc +++ b/src/HTMLRenderer/export.cc @@ -18,6 +18,9 @@ namespace pdf2htmlEX { +using std::cerr; +using std::endl; + void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suffix, GfxFont * font) { string mime_type, format; @@ -46,6 +49,10 @@ void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suff format = "svg"; mime_type = "image/svg+xml"; } + else + { + cerr << "Warning: unknown font suffix: " << suffix << endl; + } f_css.fs << "@font-face{" << "font-family:f" << info.id << ";" From 38b8c0c7f83c6f05a63f77e44f5d70f8bb4c1211 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 30 Jan 2013 12:18:43 +0800 Subject: [PATCH 44/45] readme --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index db2b965..e74f401 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ Thanks to all packagers! * run pdf2htmlEX with **--external-hint-tool=ttfautohint** to enable it * [For Windows] * Cygwin - * or MinGW, with some modifications to pdf2htmlEX. See [the pdf2htmlEX page in TeX Wiki](http://oku.edu.mie-u.ac.jp/~okumura/texwiki/?pdf2htmlEX) (in Japanese), special thanks to Haruhiko Okumura + * or MinGW, with some modifications to pdf2htmlEX. See [pdf2htmlEX on TeX Wiki](http://oku.edu.mie-u.ac.jp/~okumura/texwiki/?pdf2htmlEX) (in Japanese), special thanks to Haruhiko Okumura #### Compiling @@ -133,7 +133,6 @@ pdf2htmlEX is inspired by the following projects: * Crocodoc * Google Doc - ## Contact * Mailing list @@ -144,7 +143,6 @@ pdf2htmlEX is inspired by the following projects: * Please use the mailing list above unless for personal enquiries. * Accepting messages in **Chinese**, **English** or **Japanese**. - ### Special Thanks * Hongliang Tian From 28409d7bf30bb2789434330f6645fa813bbc2262 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 30 Jan 2013 12:21:57 +0800 Subject: [PATCH 45/45] readme --- README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index e74f401..f9716d2 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,16 @@ GPLv2 & GPLv3 Dual licensed ### [**Donate Now**](http://coolwanglu.github.com/pdf2htmlEX/donate.html) +## Contact + +* Mailing list + * Please read `man pdf2htmlEX` and [**FAQ**](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ) before sending emails. Or your message might be ignored. + * Please use the **latest master branch**. + +* Lu Wang + * Please use the mailing list above unless for personal enquiries. + * Accepting messages in **Chinese**, **English** or **Japanese**. + ## Acknowledge pdf2htmlEX is made possible thanks to the following projects: @@ -133,16 +143,6 @@ pdf2htmlEX is inspired by the following projects: * Crocodoc * Google Doc -## Contact - -* Mailing list - * Please read `man pdf2htmlEX` and [**FAQ**](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ) before sending emails. Or your message might be ignored. - * Please use the **latest master branch**. - -* Lu Wang - * Please use the mailing list above unless for personal enquiries. - * Accepting messages in **Chinese**, **English** or **Japanese**. - ### Special Thanks * Hongliang Tian