From 1ac17806967d1bef9b10dcd43450ddb02da3141d Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Fri, 17 Aug 2012 15:25:10 +0800 Subject: [PATCH 1/4] add a function detecting illegal unicode in html --- src/util.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/util.h b/src/util.h index 92c9c5e..1e7a740 100644 --- a/src/util.h +++ b/src/util.h @@ -43,6 +43,28 @@ static inline bool _tm_equal(const double * tm1, const double * tm2, int size = return true; } +/* + * http://en.wikipedia.org/wiki/HTML_decimal_character_rendering + */ +static inline bool isLegalUnicode(Unicode u) +{ + /* + if((u == 9) || (u == 10) || (u == 13)) + return true; + */ + + if(u <= 31) + return false; + + if((u >= 127) && (u <= 159)) + return false; + + if((u >= 0xd800) && (u <= 0xdfff)) + return false; + + return true; +} + static inline void outputUnicodes(std::ostream & out, const Unicode * u, int uLen) { for(int i = 0; i < uLen; ++i) From e621ae6716d342ffb57f03f831475223f3a1e461 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Fri, 17 Aug 2012 18:13:21 +0800 Subject: [PATCH 2/4] move data files to share/ --- CMakeLists.txt | 4 ++-- {lib => share}/all.css | 0 {lib => share}/head.html | 0 {lib => share}/neck.html | 0 {lib => share}/tail.html | 0 src/HTMLRenderer/general.cc | 14 +++++++------- src/config.h.in | 2 +- src/pdf2htmlEX.cc | 2 +- 8 files changed, 11 insertions(+), 11 deletions(-) rename {lib => share}/all.css (100%) rename {lib => share}/head.html (100%) rename {lib => share}/neck.html (100%) rename {lib => share}/tail.html (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 713a52e..2472991 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,5 +39,5 @@ add_executable(pdf2htmlEX target_link_libraries(pdf2htmlEX poppler boost_program_options boost_filesystem boost_system) install (TARGETS pdf2htmlEX DESTINATION bin) -file (GLOB libfiles lib/*) -install (FILES ${libfiles} DESTINATION lib/pdf2htmlEX) +file (GLOB datafiles share/*) +install (FILES ${datafiles} DESTINATION share/pdf2htmlEX) diff --git a/lib/all.css b/share/all.css similarity index 100% rename from lib/all.css rename to share/all.css diff --git a/lib/head.html b/share/head.html similarity index 100% rename from lib/head.html rename to share/head.html diff --git a/lib/neck.html b/share/neck.html similarity index 100% rename from lib/neck.html rename to share/neck.html diff --git a/lib/tail.html b/share/tail.html similarity index 100% rename from lib/tail.html rename to share/tail.html diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 422538c..0b27149 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -112,19 +112,19 @@ void HTMLRenderer::pre_process() html_fout.open(dest_dir / param->output_filename, ofstream::binary); allcss_fout.open(dest_dir / CSS_FILENAME, ofstream::binary); - html_fout << ifstream(PDF2HTMLEX_LIB_PATH / HEAD_HTML_FILENAME, ifstream::binary).rdbuf(); + html_fout << ifstream(PDF2HTMLEX_DATA_PATH / HEAD_HTML_FILENAME, ifstream::binary).rdbuf(); html_fout << "" << endl; - html_fout << ifstream(PDF2HTMLEX_LIB_PATH / NECK_HTML_FILENAME, ifstream::binary).rdbuf(); + html_fout << ifstream(PDF2HTMLEX_DATA_PATH / NECK_HTML_FILENAME, ifstream::binary).rdbuf(); } - allcss_fout << ifstream(PDF2HTMLEX_LIB_PATH / CSS_FILENAME, ifstream::binary).rdbuf(); + allcss_fout << ifstream(PDF2HTMLEX_DATA_PATH / CSS_FILENAME, ifstream::binary).rdbuf(); } void HTMLRenderer::post_process() { if(!param->single_html) { - html_fout << ifstream(PDF2HTMLEX_LIB_PATH / TAIL_HTML_FILENAME, ifstream::binary).rdbuf(); + html_fout << ifstream(PDF2HTMLEX_DATA_PATH / TAIL_HTML_FILENAME, ifstream::binary).rdbuf(); } html_fout.close(); @@ -195,17 +195,17 @@ void HTMLRenderer::process_single_html() { ofstream out (dest_dir / param->output_filename, ofstream::binary); - out << ifstream(PDF2HTMLEX_LIB_PATH / HEAD_HTML_FILENAME , ifstream::binary).rdbuf(); + out << ifstream(PDF2HTMLEX_DATA_PATH / HEAD_HTML_FILENAME , ifstream::binary).rdbuf(); out << "" << endl; - out << ifstream(PDF2HTMLEX_LIB_PATH / NECK_HTML_FILENAME, ifstream::binary).rdbuf(); + out << ifstream(PDF2HTMLEX_DATA_PATH / NECK_HTML_FILENAME, ifstream::binary).rdbuf(); out << ifstream(tmp_dir / (param->output_filename + ".part"), ifstream::binary).rdbuf(); - out << ifstream(PDF2HTMLEX_LIB_PATH / TAIL_HTML_FILENAME, ifstream::binary).rdbuf(); + out << ifstream(PDF2HTMLEX_DATA_PATH / TAIL_HTML_FILENAME, ifstream::binary).rdbuf(); } void HTMLRenderer::add_tmp_file(const string & fn) diff --git a/src/config.h.in b/src/config.h.in index d02324a..e637de3 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -14,7 +14,7 @@ static const std::string PDF2HTMLEX_VERSION = "@PDF2HTMLEX_VERSION@"; static const std::string PDF2HTMLEX_PREFIX = "@CMAKE_INSTALL_PREFIX@"; -static const boost::filesystem::path PDF2HTMLEX_LIB_PATH = boost::filesystem::path(PDF2HTMLEX_PREFIX) / "lib" / "pdf2htmlEX"; +static const boost::filesystem::path PDF2HTMLEX_DATA_PATH = boost::filesystem::path(PDF2HTMLEX_PREFIX) / "share" / "pdf2htmlEX"; #endif //CONFIG_H__ diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index fe74c91..20f1ced 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -172,7 +172,7 @@ int main(int argc, char **argv) //prepare the directories for(const auto & p : {param.dest_dir, param.tmp_dir}) { - if(equivalent(PDF2HTMLEX_LIB_PATH, p)) + if(equivalent(PDF2HTMLEX_DATA_PATH, p)) { cerr << "The specified directory \"" << p << "\" is the library path for pdf2htmlEX. Please use another one." << endl; return -1; From 885430425240732feaf6914d714badacceda3ef1 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Fri, 17 Aug 2012 21:45:16 +0800 Subject: [PATCH 3/4] update README --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 15fff21..f67792f 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,9 @@ pdf2html**EX** [**View Demo**](http://coolwanglu.github.com/pdf2htmlEX/demo/demo.html) +**WINDOWS USERS: Please make sure ClearType is turned on** +(Control Panel -> Display -> Appearance -> Effects -> "Use the following method to smooth edges of screen fonts" -> ClearType) + Introduction ----------------------------- pdf2htmlEX renders PDF files in HTML, utilizing modern Web technologies, aims to provide an accuracy rendering, while keeping optimized for Web display. From f89db9208b86722f0b0a818e2789c39873f7f8db Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Sat, 18 Aug 2012 15:46:07 +0800 Subject: [PATCH 4/4] update README --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f67792f..086fd2c 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,8 @@ pdf2html**EX** [**View Demo**](http://coolwanglu.github.com/pdf2htmlEX/demo/demo.html) -**WINDOWS USERS: Please make sure ClearType is turned on** +**WINDOWS XP USERS: Please make sure ClearType is turned on** + (Control Panel -> Display -> Appearance -> Effects -> "Use the following method to smooth edges of screen fonts" -> ClearType) Introduction