From 1ee8518096a2d0b41b5f7b828ad44e0419acbb07 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Mon, 27 Aug 2012 15:56:16 +0800 Subject: [PATCH] seems better font re-encoding --- CMakeLists.txt | 4 ++-- share/all.css | 1 - src/BackgroundRenderer.cc | 1 + src/HTMLRenderer/install.cc | 31 +++++++++++++++--------------- src/HTMLRenderer/state.cc | 4 ++-- src/HTMLRenderer/text.cc | 2 +- src/util.h | 38 ++++++++++++++++++++++++++++++++----- 7 files changed, 55 insertions(+), 26 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2472991..e69b29b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,8 +14,8 @@ set(PDF2HTMLEX_VERSION "0.1") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wunused-function") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2") -#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb") +#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb") configure_file (src/config.h.in src/config.h) diff --git a/share/all.css b/share/all.css index 7a4a191..7bf8eba 100644 --- a/share/all.css +++ b/share/all.css @@ -19,7 +19,6 @@ } #pdf-main .l { position:absolute; - height:0; white-space:pre; font-size:1px; transform-origin:0% 100%; diff --git a/src/BackgroundRenderer.cc b/src/BackgroundRenderer.cc index af84cf9..aad7f99 100644 --- a/src/BackgroundRenderer.cc +++ b/src/BackgroundRenderer.cc @@ -18,5 +18,6 @@ void BackgroundRenderer::drawChar(GfxState *state, double x, double y, double originX, double originY, CharCode code, int nBytes, Unicode *u, int uLen) { + SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen); } diff --git a/src/HTMLRenderer/install.cc b/src/HTMLRenderer/install.cc index 27a7b43..7c78d71 100644 --- a/src/HTMLRenderer/install.cc +++ b/src/HTMLRenderer/install.cc @@ -42,6 +42,7 @@ long long HTMLRenderer::install_font(GfxFont * font) if(param->debug) { cerr << "Install font: (" << (font->getID()->num) << ' ' << (font->getID()->gen) << ") -> " << format("f%|1$x|")%new_fn_id << endl; + cerr << "Ascent: " << (font->getAscent()) << " Descent: " << (font->getDescent()) << endl; } if(font->getType() == fontType3) { @@ -133,19 +134,6 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix, int code2GID_len = 0; int maxcode = 0; - // if we cannot map to unicode through ctu, map the char to private Unicode values - auto map_to_unicode = [&ctu](int c)->Unicode - { - Unicode *u; - int n = 0; - if(ctu) - { - n = ctu->mapToUnicode(c, &u); - } - - return check_unicode(u, n, c); - }; - if(!font->isCIDFont()) { maxcode = 0xff; @@ -201,8 +189,21 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix, for(int i = 0; i <= maxcode; ++i) { map_fout << format("0x%|1$X|") % ((code2GID && (i < code2GID_len))? code2GID[i] : i); - map_fout << format(" 0x%|1$X|") % map_to_unicode(i); - map_fout << format(" # 0x%|1$X|") % i << endl; + + Unicode u, *pu; + int n = 0; + if(ctu) + n = ctu->mapToUnicode(i, &pu); + + u = check_unicode(pu, n, i, font); + + map_fout << format(" 0x%|1$X|") % u; + map_fout << format(" # 0x%|1$X|") % i; + + for(int j = 0; j < n; ++j) + map_fout << format(" 0x%|1$X|") % pu[j]; + + map_fout << endl; } script_fout << format("LoadEncodingFile(%1%, \"%2%\")") % (tmp_dir / (fn+".encoding")) % fn << endl; diff --git a/src/HTMLRenderer/state.cc b/src/HTMLRenderer/state.cc index f4b489f..773aa24 100644 --- a/src/HTMLRenderer/state.cc +++ b/src/HTMLRenderer/state.cc @@ -352,8 +352,8 @@ void HTMLRenderer::prepare_line(GfxState * state) double x,y; // in user space state->transform(state->getCurX(), state->getCurY(), &x, &y); - html_fout << format("
") - % y % x % cur_tm_id; + html_fout << format("
") + % x % y % cur_tm_id; //resync position draw_ty = cur_ty; diff --git a/src/HTMLRenderer/text.cc b/src/HTMLRenderer/text.cc index 020765f..b29deae 100644 --- a/src/HTMLRenderer/text.cc +++ b/src/HTMLRenderer/text.cc @@ -196,7 +196,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s) ++nSpaces; } - Unicode uu = check_unicode(u, uLen, code); + Unicode uu = check_unicode(u, uLen, code, font); outputUnicodes(html_fout, &uu, 1); dx += dx1; diff --git a/src/util.h b/src/util.h index 1b5c7a2..89bcf13 100644 --- a/src/util.h +++ b/src/util.h @@ -15,8 +15,10 @@ #include #include +#include #include #include +#include #include "Consts.h" @@ -65,12 +67,38 @@ static inline bool isLegalUnicode(Unicode u) return true; } -static inline Unicode check_unicode(Unicode * u, int len, CharCode code) +/* + * We have to use a single Unicode value to reencode fonts + * if we got multi-unicode values, it might be expanded ligature, try to restore it + * if we cannot figure it out at the end, use a private mapping + */ +static inline Unicode check_unicode(Unicode * u, int len, CharCode code, GfxFont * font) { - if((len == 0) || (len > 1) || (!isLegalUnicode(*u))) - return (Unicode)(code + 0xE000); - else - return *u; + Unicode private_mapping = (Unicode)(code + 0xE000); + + if(len == 0) + return private_mapping; + + if(len == 1) + { + if(isLegalUnicode(*u)) + return *u; + } + + if(!font->isCIDFont()) + { + char * cname = dynamic_cast(font)->getCharName(code); + // may be untranslated ligature + if(cname) + { + Unicode ou = globalParams->mapNameToUnicode(cname); + + if(isLegalUnicode(ou)) + return ou; + } + } + + return private_mapping; } static inline void outputUnicodes(std::ostream & out, const Unicode * u, int uLen)