1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 13:00:08 +00:00

seems better font re-encoding

This commit is contained in:
Lu Wang 2012-08-27 15:56:16 +08:00
parent c8d942b511
commit 1ee8518096
7 changed files with 55 additions and 26 deletions

View File

@ -14,8 +14,8 @@ set(PDF2HTMLEX_VERSION "0.1")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wunused-function")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb")
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb")
configure_file (src/config.h.in src/config.h)

View File

@ -19,7 +19,6 @@
}
#pdf-main .l {
position:absolute;
height:0;
white-space:pre;
font-size:1px;
transform-origin:0% 100%;

View File

@ -18,5 +18,6 @@ void BackgroundRenderer::drawChar(GfxState *state, double x, double y,
double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen)
{
SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen);
}

View File

@ -42,6 +42,7 @@ long long HTMLRenderer::install_font(GfxFont * font)
if(param->debug)
{
cerr << "Install font: (" << (font->getID()->num) << ' ' << (font->getID()->gen) << ") -> " << format("f%|1$x|")%new_fn_id << endl;
cerr << "Ascent: " << (font->getAscent()) << " Descent: " << (font->getDescent()) << endl;
}
if(font->getType() == fontType3) {
@ -133,19 +134,6 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix,
int code2GID_len = 0;
int maxcode = 0;
// if we cannot map to unicode through ctu, map the char to private Unicode values
auto map_to_unicode = [&ctu](int c)->Unicode
{
Unicode *u;
int n = 0;
if(ctu)
{
n = ctu->mapToUnicode(c, &u);
}
return check_unicode(u, n, c);
};
if(!font->isCIDFont())
{
maxcode = 0xff;
@ -201,8 +189,21 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix,
for(int i = 0; i <= maxcode; ++i)
{
map_fout << format("0x%|1$X|") % ((code2GID && (i < code2GID_len))? code2GID[i] : i);
map_fout << format(" 0x%|1$X|") % map_to_unicode(i);
map_fout << format(" # 0x%|1$X|") % i << endl;
Unicode u, *pu;
int n = 0;
if(ctu)
n = ctu->mapToUnicode(i, &pu);
u = check_unicode(pu, n, i, font);
map_fout << format(" 0x%|1$X|") % u;
map_fout << format(" # 0x%|1$X|") % i;
for(int j = 0; j < n; ++j)
map_fout << format(" 0x%|1$X|") % pu[j];
map_fout << endl;
}
script_fout << format("LoadEncodingFile(%1%, \"%2%\")") % (tmp_dir / (fn+".encoding")) % fn << endl;

View File

@ -352,8 +352,8 @@ void HTMLRenderer::prepare_line(GfxState * state)
double x,y; // in user space
state->transform(state->getCurX(), state->getCurY(), &x, &y);
html_fout << format("<div style=\"bottom:%1%px;left:%2%px;\" class=\"l t%|3$x|\"><em></em>")
% y % x % cur_tm_id;
html_fout << format("<div style=\"left:%1%px;bottom:%2%px;\" class=\"l t%|3$x|\"><em></em>")
% x % y % cur_tm_id;
//resync position
draw_ty = cur_ty;

View File

@ -196,7 +196,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
++nSpaces;
}
Unicode uu = check_unicode(u, uLen, code);
Unicode uu = check_unicode(u, uLen, code, font);
outputUnicodes(html_fout, &uu, 1);
dx += dx1;

View File

@ -15,8 +15,10 @@
#include <ostream>
#include <GfxState.h>
#include <GfxFont.h>
#include <CharTypes.h>
#include <UTF8.h>
#include <GlobalParams.h>
#include "Consts.h"
@ -65,12 +67,38 @@ static inline bool isLegalUnicode(Unicode u)
return true;
}
static inline Unicode check_unicode(Unicode * u, int len, CharCode code)
/*
* We have to use a single Unicode value to reencode fonts
* if we got multi-unicode values, it might be expanded ligature, try to restore it
* if we cannot figure it out at the end, use a private mapping
*/
static inline Unicode check_unicode(Unicode * u, int len, CharCode code, GfxFont * font)
{
if((len == 0) || (len > 1) || (!isLegalUnicode(*u)))
return (Unicode)(code + 0xE000);
else
return *u;
Unicode private_mapping = (Unicode)(code + 0xE000);
if(len == 0)
return private_mapping;
if(len == 1)
{
if(isLegalUnicode(*u))
return *u;
}
if(!font->isCIDFont())
{
char * cname = dynamic_cast<Gfx8BitFont*>(font)->getCharName(code);
// may be untranslated ligature
if(cname)
{
Unicode ou = globalParams->mapNameToUnicode(cname);
if(isLegalUnicode(ou))
return ou;
}
}
return private_mapping;
}
static inline void outputUnicodes(std::ostream & out, const Unicode * u, int uLen)