mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-07-05 17:48:38 +00:00
working on width for CID fonts
This commit is contained in:
parent
cf1c1a2108
commit
1aebc73818
|
@ -18,6 +18,6 @@ void BackgroundRenderer::drawChar(GfxState *state, double x, double y,
|
||||||
double originX, double originY,
|
double originX, double originY,
|
||||||
CharCode code, int nBytes, Unicode *u, int uLen)
|
CharCode code, int nBytes, Unicode *u, int uLen)
|
||||||
{
|
{
|
||||||
//SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen);
|
// SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -177,209 +177,216 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
|
||||||
*iter = tolower(*iter);
|
*iter = tolower(*iter);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
* TODO
|
||||||
* if parm->tounicode is 0, try the provided tounicode map first
|
* if parm->tounicode is 0, try the provided tounicode map first
|
||||||
*/
|
*/
|
||||||
info.use_tounicode = (is_truetype_suffix(suffix) || (param->tounicode >= 0));
|
info.use_tounicode = (is_truetype_suffix(suffix) || (param->tounicode > 0));
|
||||||
info.has_space = false;
|
info.has_space = false;
|
||||||
|
|
||||||
const char * used_map = nullptr;
|
const char * used_map = nullptr;
|
||||||
|
|
||||||
ffw_metric(&info.ascent, &info.descent, &info.em_size);
|
ffw_metric(&info.ascent, &info.descent, &info.em_size);
|
||||||
if(!get_metric_only)
|
|
||||||
|
if(param->debug)
|
||||||
{
|
{
|
||||||
used_map = font_preprocessor.get_code_map(hash_ref(font->getID()));
|
cerr << "Ascent: " << info.ascent << " Descent: " << info.descent << endl;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
if(get_metric_only)
|
||||||
* Step 1
|
return;
|
||||||
* dump the font file directly from the font descriptor and put the glyphs into the correct slots
|
|
||||||
*
|
used_map = font_preprocessor.get_code_map(hash_ref(font->getID()));
|
||||||
* for 8bit + nonTrueType
|
|
||||||
* re-encoding the font using a PostScript encoding list (glyph id <-> glpyh name)
|
/*
|
||||||
*
|
* Step 1
|
||||||
* for 8bit + TrueType
|
* dump the font file directly from the font descriptor and put the glyphs into the correct slots
|
||||||
* sort the glpyhs as the original order, and later will map GID (instead of char code) to Unicode
|
*
|
||||||
*
|
* for 8bit + nonTrueType
|
||||||
* for CID + nonTrueType
|
* re-encoding the font using a PostScript encoding list (glyph id <-> glpyh name)
|
||||||
* Flatten the font
|
*
|
||||||
*
|
* for 8bit + TrueType
|
||||||
* for CID Truetype
|
* sort the glpyhs as the original order, and later will map GID (instead of char code) to Unicode
|
||||||
* same as 8bitTrueType, except for that we have to check 65536 charcodes
|
*
|
||||||
*/
|
* for CID + nonTrueType
|
||||||
if(!font->isCIDFont())
|
* Flatten the font
|
||||||
|
*
|
||||||
|
* for CID Truetype
|
||||||
|
* same as 8bitTrueType, except for that we have to check 65536 charcodes
|
||||||
|
*/
|
||||||
|
if(!font->isCIDFont())
|
||||||
|
{
|
||||||
|
font_8bit = dynamic_cast<Gfx8BitFont*>(font);
|
||||||
|
maxcode = 0xff;
|
||||||
|
if(is_truetype_suffix(suffix))
|
||||||
{
|
{
|
||||||
font_8bit = dynamic_cast<Gfx8BitFont*>(font);
|
ffw_reencode_glyph_order();
|
||||||
maxcode = 0xff;
|
FoFiTrueType *fftt = nullptr;
|
||||||
if(is_truetype_suffix(suffix))
|
if((fftt = FoFiTrueType::load((char*)filepath.c_str())) != nullptr)
|
||||||
{
|
{
|
||||||
ffw_reencode_glyph_order();
|
code2GID = font_8bit->getCodeToGIDMap(fftt);
|
||||||
FoFiTrueType *fftt = nullptr;
|
code2GID_len = 256;
|
||||||
if((fftt = FoFiTrueType::load((char*)filepath.c_str())) != nullptr)
|
delete fftt;
|
||||||
{
|
|
||||||
code2GID = font_8bit->getCodeToGIDMap(fftt);
|
|
||||||
code2GID_len = 256;
|
|
||||||
delete fftt;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// move the slot such that it's consistent with the encoding seen in PDF
|
|
||||||
unordered_set<string> nameset;
|
|
||||||
bool name_conflict_warned = false;
|
|
||||||
|
|
||||||
memset(cur_mapping2, 0, 0x100 * sizeof(char*));
|
|
||||||
|
|
||||||
for(int i = 0; i < 256; ++i)
|
|
||||||
{
|
|
||||||
if(!used_map[i]) continue;
|
|
||||||
|
|
||||||
auto cn = font_8bit->getCharName(i);
|
|
||||||
if(cn == nullptr)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if(nameset.insert(string(cn)).second)
|
|
||||||
{
|
|
||||||
cur_mapping2[i] = cn;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if(!name_conflict_warned)
|
|
||||||
{
|
|
||||||
name_conflict_warned = true;
|
|
||||||
//TODO: may be resolved using advanced font properties?
|
|
||||||
cerr << "Warning: encoding confliction detected in font: " << hex << info.id << dec << endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ffw_reencode_raw2(cur_mapping2, 256, 0);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
font_cid = dynamic_cast<GfxCIDFont*>(font);
|
// move the slot such that it's consistent with the encoding seen in PDF
|
||||||
maxcode = 0xffff;
|
unordered_set<string> nameset;
|
||||||
|
bool name_conflict_warned = false;
|
||||||
|
|
||||||
if(is_truetype_suffix(suffix))
|
memset(cur_mapping2, 0, 0x100 * sizeof(char*));
|
||||||
|
|
||||||
|
for(int i = 0; i < 256; ++i)
|
||||||
{
|
{
|
||||||
ffw_reencode_glyph_order();
|
if(!used_map[i]) continue;
|
||||||
|
|
||||||
GfxCIDFont * _font = dynamic_cast<GfxCIDFont*>(font);
|
auto cn = font_8bit->getCharName(i);
|
||||||
|
if(cn == nullptr)
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if(nameset.insert(string(cn)).second)
|
||||||
|
{
|
||||||
|
cur_mapping2[i] = cn;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if(!name_conflict_warned)
|
||||||
|
{
|
||||||
|
name_conflict_warned = true;
|
||||||
|
//TODO: may be resolved using advanced font properties?
|
||||||
|
cerr << "Warning: encoding confliction detected in font: " << hex << info.id << dec << endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// code2GID has been stored for embedded CID fonts
|
ffw_reencode_raw2(cur_mapping2, 256, 0);
|
||||||
code2GID = _font->getCIDToGID();
|
}
|
||||||
code2GID_len = _font->getCIDToGIDLen();
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
font_cid = dynamic_cast<GfxCIDFont*>(font);
|
||||||
|
maxcode = 0xffff;
|
||||||
|
|
||||||
|
if(is_truetype_suffix(suffix))
|
||||||
|
{
|
||||||
|
ffw_reencode_glyph_order();
|
||||||
|
|
||||||
|
GfxCIDFont * _font = dynamic_cast<GfxCIDFont*>(font);
|
||||||
|
|
||||||
|
// code2GID has been stored for embedded CID fonts
|
||||||
|
code2GID = _font->getCIDToGID();
|
||||||
|
code2GID_len = _font->getCIDToGIDLen();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ffw_cidflatten();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Step 2
|
||||||
|
* map charcode (or GID for CID truetype)
|
||||||
|
* generate an Consortium encoding file and let fontforge handle it.
|
||||||
|
*
|
||||||
|
* - Always map to Unicode for 8bit TrueType fonts and CID fonts
|
||||||
|
*
|
||||||
|
* - For 8bit nonTruetype fonts:
|
||||||
|
* Try to calculate the correct Unicode value from the glyph names, unless param->always_apply_tounicode is set
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* Also fill in the width_list, and set widths accordingly
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
{
|
||||||
|
unordered_set<int> codeset;
|
||||||
|
bool name_conflict_warned = false;
|
||||||
|
|
||||||
|
auto ctu = font->getToUnicode();
|
||||||
|
memset(cur_mapping, -1, 0x10000 * sizeof(*cur_mapping));
|
||||||
|
memset(width_list, -1, 0x10000 * sizeof(*width_list));
|
||||||
|
|
||||||
|
if(code2GID)
|
||||||
|
maxcode = min(maxcode, code2GID_len - 1);
|
||||||
|
|
||||||
|
bool is_truetype = is_truetype_suffix(suffix);
|
||||||
|
int max_key = maxcode;
|
||||||
|
/*
|
||||||
|
* Traverse all possible codes
|
||||||
|
*/
|
||||||
|
for(int i = 0; i <= maxcode; ++i)
|
||||||
|
{
|
||||||
|
if(!used_map[i])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Skip glyphs without names (only for non-ttf fonts)
|
||||||
|
*/
|
||||||
|
if(!is_truetype && (font_8bit != nullptr)
|
||||||
|
&& (font_8bit->getCharName(i) == nullptr))
|
||||||
|
{
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
int k = i;
|
||||||
|
if(code2GID)
|
||||||
|
{
|
||||||
|
if((k = code2GID[i]) == 0) continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(k > max_key)
|
||||||
|
max_key = k;
|
||||||
|
|
||||||
|
Unicode u, *pu=&u;
|
||||||
|
if(info.use_tounicode)
|
||||||
|
{
|
||||||
|
int n = ctu ? (ctu->mapToUnicode(i, &pu)) : 0;
|
||||||
|
u = check_unicode(pu, n, i, font);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ffw_cidflatten();
|
u = unicode_from_font(i, font);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
if(u == ' ')
|
||||||
* Step 2
|
info.has_space = true;
|
||||||
* map charcode (or GID for CID truetype)
|
|
||||||
* generate an Consortium encoding file and let fontforge handle it.
|
|
||||||
*
|
|
||||||
* - Always map to Unicode for 8bit TrueType fonts and CID fonts
|
|
||||||
*
|
|
||||||
* - For 8bit nonTruetype fonts:
|
|
||||||
* Try to calculate the correct Unicode value from the glyph names, unless param->always_apply_tounicode is set
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* Also fill in the width_list, and set widths accordingly
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
if(codeset.insert(u).second)
|
||||||
{
|
|
||||||
unordered_set<int> codeset;
|
|
||||||
bool name_conflict_warned = false;
|
|
||||||
|
|
||||||
auto ctu = font->getToUnicode();
|
|
||||||
memset(cur_mapping, -1, 0x10000 * sizeof(*cur_mapping));
|
|
||||||
memset(width_list, -1, 0x1000 * sizeof(*width_list));
|
|
||||||
|
|
||||||
if(code2GID)
|
|
||||||
maxcode = min(maxcode, code2GID_len - 1);
|
|
||||||
|
|
||||||
bool is_truetype = is_truetype_suffix(suffix);
|
|
||||||
int max_key = maxcode;
|
|
||||||
/*
|
|
||||||
* Traverse all possible codes
|
|
||||||
*/
|
|
||||||
for(int i = 0; i <= maxcode; ++i)
|
|
||||||
{
|
{
|
||||||
if(!used_map[i])
|
cur_mapping[k] = u;
|
||||||
continue;
|
}
|
||||||
|
else
|
||||||
/*
|
{
|
||||||
* Skip glyphs without names (only for non-ttf fonts)
|
if(!name_conflict_warned)
|
||||||
*/
|
|
||||||
if(!is_truetype && (font_8bit != nullptr)
|
|
||||||
&& (font_8bit->getCharName(i) == nullptr))
|
|
||||||
{
|
{
|
||||||
continue;
|
name_conflict_warned = true;
|
||||||
}
|
//TODO: may be resolved using advanced font properties?
|
||||||
|
cerr << "Warning: encoding confliction detected in font: " << hex << info.id << dec << endl;
|
||||||
int k = i;
|
|
||||||
if(code2GID)
|
|
||||||
{
|
|
||||||
if((k = code2GID[i]) == 0) continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if(k > max_key)
|
|
||||||
max_key = k;
|
|
||||||
|
|
||||||
Unicode u, *pu=&u;
|
|
||||||
if(info.use_tounicode)
|
|
||||||
{
|
|
||||||
int n = ctu ? (ctu->mapToUnicode(i, &pu)) : 0;
|
|
||||||
u = check_unicode(pu, n, i, font);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
u = unicode_from_font(i, font);
|
|
||||||
}
|
|
||||||
|
|
||||||
if(u == ' ')
|
|
||||||
info.has_space = true;
|
|
||||||
|
|
||||||
if(codeset.insert(u).second)
|
|
||||||
{
|
|
||||||
cur_mapping[k] = u;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if(!name_conflict_warned)
|
|
||||||
{
|
|
||||||
name_conflict_warned = true;
|
|
||||||
//TODO: may be resolved using advanced font properties?
|
|
||||||
cerr << "Warning: encoding confliction detected in font: " << hex << info.id << dec << endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(font_8bit)
|
|
||||||
{
|
|
||||||
width_list[k] = (int)round(font_8bit->getWidth(i) * info.em_size);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
char buf[2];
|
|
||||||
buf[0] = (i >> 8) & 0xff;
|
|
||||||
buf[1] = (i & 0xff);
|
|
||||||
width_list[k] = (int)round(font_cid->getWidth(buf, 2) * info.em_size);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ffw_reencode_raw(cur_mapping, max_key + 1, 1);
|
if(font_8bit)
|
||||||
ffw_set_widths(width_list, max_key + 1);
|
{
|
||||||
|
width_list[k] = (int)round(font_8bit->getWidth(i) * info.em_size);
|
||||||
if(ctu)
|
}
|
||||||
ctu->decRefCnt();
|
else
|
||||||
|
{
|
||||||
|
char buf[2];
|
||||||
|
buf[0] = (i >> 8) & 0xff;
|
||||||
|
buf[1] = (i & 0xff);
|
||||||
|
width_list[k] = (int)round(font_cid->getWidth(buf, 2) * info.em_size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ffw_reencode_raw(cur_mapping, max_key + 1, 1);
|
||||||
|
ffw_set_widths(width_list, max_key + 1);
|
||||||
|
|
||||||
|
if(ctu)
|
||||||
|
ctu->decRefCnt();
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -392,11 +399,11 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
|
||||||
|
|
||||||
ffw_save((char*)fn);
|
ffw_save((char*)fn);
|
||||||
ffw_close();
|
ffw_close();
|
||||||
}
|
|
||||||
|
|
||||||
if(param->debug)
|
ffw_load_font((char*)fn);
|
||||||
{
|
ffw_metric(&info.ascent, &info.descent, &info.em_size);
|
||||||
cerr << "Ascent: " << info.ascent << " Descent: " << info.descent << endl;
|
ffw_save((char*)fn);
|
||||||
|
ffw_close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
57
src/ffw.c
57
src/ffw.c
|
@ -107,10 +107,6 @@ void ffw_load_font(const char * filename)
|
||||||
assert(font->fv);
|
assert(font->fv);
|
||||||
|
|
||||||
cur_fv = font->fv;
|
cur_fv = font->fv;
|
||||||
|
|
||||||
SFDefaultOS2Info(&font->pfminfo, font, font->fontname);
|
|
||||||
font->pfminfo.pfmset = 1;
|
|
||||||
font->changed = 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ffw_do_reencode(Encoding * encoding, int force)
|
static void ffw_do_reencode(Encoding * encoding, int force)
|
||||||
|
@ -222,16 +218,18 @@ void ffw_close(void)
|
||||||
void ffw_metric(double * ascent, double * descent, int * em_size)
|
void ffw_metric(double * ascent, double * descent, int * em_size)
|
||||||
{
|
{
|
||||||
SplineFont * sf = cur_fv->sf;
|
SplineFont * sf = cur_fv->sf;
|
||||||
|
struct pfminfo * info = &sf->pfminfo;
|
||||||
|
|
||||||
|
SFDefaultOS2Info(info, sf, sf->fontname);
|
||||||
|
info->pfmset = 1;
|
||||||
|
sf->changed = 1;
|
||||||
|
|
||||||
DBounds bb;
|
DBounds bb;
|
||||||
SplineFontFindBounds(sf, &bb);
|
SplineFontFindBounds(sf, &bb);
|
||||||
|
|
||||||
struct pfminfo * info = &sf->pfminfo;
|
|
||||||
|
|
||||||
*em_size = sf->ascent + sf->descent;
|
*em_size = sf->ascent + sf->descent;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
//debug
|
|
||||||
printf("bb %lf %lf\n", bb.maxy, bb.miny);
|
printf("bb %lf %lf\n", bb.maxy, bb.miny);
|
||||||
printf("_ %d %d\n", sf->ascent, sf->descent);
|
printf("_ %d %d\n", sf->ascent, sf->descent);
|
||||||
printf("win %d %d\n", info->os2_winascent, info->os2_windescent);
|
printf("win %d %d\n", info->os2_winascent, info->os2_windescent);
|
||||||
|
@ -254,22 +252,28 @@ void ffw_metric(double * ascent, double * descent, int * em_size)
|
||||||
*ascent = *descent = 0;
|
*ascent = *descent = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
sf->ascent = min(floor(bb.maxy+0.5), em);
|
int a = floor(bb.maxy + 0.5);
|
||||||
|
int d = floor(bb.miny + 0.5);
|
||||||
|
|
||||||
|
if(a < 0) a = 0;
|
||||||
|
if(d > 0) d = 0;
|
||||||
|
|
||||||
|
sf->ascent = min(a, em);
|
||||||
sf->descent = em - bb.maxy;
|
sf->descent = em - bb.maxy;
|
||||||
|
|
||||||
info->os2_winascent = 0;
|
info->os2_winascent = a;
|
||||||
info->os2_typoascent = 0;
|
info->os2_typoascent = a;
|
||||||
info->hhead_ascent = 0;
|
info->hhead_ascent = a;
|
||||||
info->winascent_add = 1;
|
info->winascent_add = 0;
|
||||||
info->typoascent_add = 1;
|
info->typoascent_add = 0;
|
||||||
info->hheadascent_add = 1;
|
info->hheadascent_add = 0;
|
||||||
|
|
||||||
info->os2_windescent = 0;
|
info->os2_windescent = -d;
|
||||||
info->os2_typodescent = 0;
|
info->os2_typodescent = d;
|
||||||
info->hhead_descent = 0;
|
info->hhead_descent = d;
|
||||||
info->windescent_add = 1;
|
info->windescent_add = 0;
|
||||||
info->typodescent_add = 1;
|
info->typodescent_add = 0;
|
||||||
info->hheaddescent_add = 1;
|
info->hheaddescent_add = 0;
|
||||||
|
|
||||||
info->os2_typolinegap = 0;
|
info->os2_typolinegap = 0;
|
||||||
info->linegap = 0;
|
info->linegap = 0;
|
||||||
|
@ -282,14 +286,21 @@ void ffw_set_widths(int * width_list, int mapping_len)
|
||||||
{
|
{
|
||||||
SplineFont * sf = cur_fv->sf;
|
SplineFont * sf = cur_fv->sf;
|
||||||
|
|
||||||
|
if(sf->onlybitmaps
|
||||||
|
&& cur_fv->active_bitmap != NULL
|
||||||
|
&& sf->bitmaps != NULL)
|
||||||
|
{
|
||||||
|
printf("TODO: width vs bitmap\n");
|
||||||
|
}
|
||||||
|
|
||||||
EncMap * map = cur_fv->map;
|
EncMap * map = cur_fv->map;
|
||||||
int i;
|
int i;
|
||||||
int imax = min(mapping_len, map->enccount);
|
int imax = min(mapping_len, map->enccount);
|
||||||
for(i = 0; i < imax; ++i)
|
for(i = 0; i < imax; ++i)
|
||||||
{
|
{
|
||||||
// TODO why need this
|
/*
|
||||||
// when width_list[i] == -1, the code itself should be unused.
|
* Do mess with it if the glyphs is not used.
|
||||||
// but might be reference within ttf etc
|
*/
|
||||||
if(width_list[i] == -1) continue;
|
if(width_list[i] == -1) continue;
|
||||||
|
|
||||||
int j = map->map[i];
|
int j = map->map[i];
|
||||||
|
|
Loading…
Reference in New Issue
Block a user