mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 04:50:09 +00:00
seems better font re-encoding
This commit is contained in:
parent
c8d942b511
commit
1ee8518096
@ -14,8 +14,8 @@ set(PDF2HTMLEX_VERSION "0.1")
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wunused-function")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
|
||||
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb")
|
||||
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb")
|
||||
|
||||
configure_file (src/config.h.in src/config.h)
|
||||
|
||||
|
@ -19,7 +19,6 @@
|
||||
}
|
||||
#pdf-main .l {
|
||||
position:absolute;
|
||||
height:0;
|
||||
white-space:pre;
|
||||
font-size:1px;
|
||||
transform-origin:0% 100%;
|
||||
|
@ -18,5 +18,6 @@ void BackgroundRenderer::drawChar(GfxState *state, double x, double y,
|
||||
double originX, double originY,
|
||||
CharCode code, int nBytes, Unicode *u, int uLen)
|
||||
{
|
||||
SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen);
|
||||
}
|
||||
|
||||
|
@ -42,6 +42,7 @@ long long HTMLRenderer::install_font(GfxFont * font)
|
||||
if(param->debug)
|
||||
{
|
||||
cerr << "Install font: (" << (font->getID()->num) << ' ' << (font->getID()->gen) << ") -> " << format("f%|1$x|")%new_fn_id << endl;
|
||||
cerr << "Ascent: " << (font->getAscent()) << " Descent: " << (font->getDescent()) << endl;
|
||||
}
|
||||
|
||||
if(font->getType() == fontType3) {
|
||||
@ -133,19 +134,6 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix,
|
||||
int code2GID_len = 0;
|
||||
int maxcode = 0;
|
||||
|
||||
// if we cannot map to unicode through ctu, map the char to private Unicode values
|
||||
auto map_to_unicode = [&ctu](int c)->Unicode
|
||||
{
|
||||
Unicode *u;
|
||||
int n = 0;
|
||||
if(ctu)
|
||||
{
|
||||
n = ctu->mapToUnicode(c, &u);
|
||||
}
|
||||
|
||||
return check_unicode(u, n, c);
|
||||
};
|
||||
|
||||
if(!font->isCIDFont())
|
||||
{
|
||||
maxcode = 0xff;
|
||||
@ -201,8 +189,21 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, const string & suffix,
|
||||
for(int i = 0; i <= maxcode; ++i)
|
||||
{
|
||||
map_fout << format("0x%|1$X|") % ((code2GID && (i < code2GID_len))? code2GID[i] : i);
|
||||
map_fout << format(" 0x%|1$X|") % map_to_unicode(i);
|
||||
map_fout << format(" # 0x%|1$X|") % i << endl;
|
||||
|
||||
Unicode u, *pu;
|
||||
int n = 0;
|
||||
if(ctu)
|
||||
n = ctu->mapToUnicode(i, &pu);
|
||||
|
||||
u = check_unicode(pu, n, i, font);
|
||||
|
||||
map_fout << format(" 0x%|1$X|") % u;
|
||||
map_fout << format(" # 0x%|1$X|") % i;
|
||||
|
||||
for(int j = 0; j < n; ++j)
|
||||
map_fout << format(" 0x%|1$X|") % pu[j];
|
||||
|
||||
map_fout << endl;
|
||||
}
|
||||
|
||||
script_fout << format("LoadEncodingFile(%1%, \"%2%\")") % (tmp_dir / (fn+".encoding")) % fn << endl;
|
||||
|
@ -352,8 +352,8 @@ void HTMLRenderer::prepare_line(GfxState * state)
|
||||
double x,y; // in user space
|
||||
state->transform(state->getCurX(), state->getCurY(), &x, &y);
|
||||
|
||||
html_fout << format("<div style=\"bottom:%1%px;left:%2%px;\" class=\"l t%|3$x|\"><em></em>")
|
||||
% y % x % cur_tm_id;
|
||||
html_fout << format("<div style=\"left:%1%px;bottom:%2%px;\" class=\"l t%|3$x|\"><em></em>")
|
||||
% x % y % cur_tm_id;
|
||||
|
||||
//resync position
|
||||
draw_ty = cur_ty;
|
||||
|
@ -196,7 +196,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
|
||||
++nSpaces;
|
||||
}
|
||||
|
||||
Unicode uu = check_unicode(u, uLen, code);
|
||||
Unicode uu = check_unicode(u, uLen, code, font);
|
||||
outputUnicodes(html_fout, &uu, 1);
|
||||
|
||||
dx += dx1;
|
||||
|
38
src/util.h
38
src/util.h
@ -15,8 +15,10 @@
|
||||
#include <ostream>
|
||||
|
||||
#include <GfxState.h>
|
||||
#include <GfxFont.h>
|
||||
#include <CharTypes.h>
|
||||
#include <UTF8.h>
|
||||
#include <GlobalParams.h>
|
||||
|
||||
#include "Consts.h"
|
||||
|
||||
@ -65,12 +67,38 @@ static inline bool isLegalUnicode(Unicode u)
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline Unicode check_unicode(Unicode * u, int len, CharCode code)
|
||||
/*
|
||||
* We have to use a single Unicode value to reencode fonts
|
||||
* if we got multi-unicode values, it might be expanded ligature, try to restore it
|
||||
* if we cannot figure it out at the end, use a private mapping
|
||||
*/
|
||||
static inline Unicode check_unicode(Unicode * u, int len, CharCode code, GfxFont * font)
|
||||
{
|
||||
if((len == 0) || (len > 1) || (!isLegalUnicode(*u)))
|
||||
return (Unicode)(code + 0xE000);
|
||||
else
|
||||
return *u;
|
||||
Unicode private_mapping = (Unicode)(code + 0xE000);
|
||||
|
||||
if(len == 0)
|
||||
return private_mapping;
|
||||
|
||||
if(len == 1)
|
||||
{
|
||||
if(isLegalUnicode(*u))
|
||||
return *u;
|
||||
}
|
||||
|
||||
if(!font->isCIDFont())
|
||||
{
|
||||
char * cname = dynamic_cast<Gfx8BitFont*>(font)->getCharName(code);
|
||||
// may be untranslated ligature
|
||||
if(cname)
|
||||
{
|
||||
Unicode ou = globalParams->mapNameToUnicode(cname);
|
||||
|
||||
if(isLegalUnicode(ou))
|
||||
return ou;
|
||||
}
|
||||
}
|
||||
|
||||
return private_mapping;
|
||||
}
|
||||
|
||||
static inline void outputUnicodes(std::ostream & out, const Unicode * u, int uLen)
|
||||
|
Loading…
Reference in New Issue
Block a user