1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-07-03 08:38:39 +00:00

working on width for CID fonts

This commit is contained in:
Lu Wang 2012-09-22 01:19:15 +08:00
parent cf1c1a2108
commit 1aebc73818
3 changed files with 218 additions and 200 deletions

View File

@ -18,6 +18,6 @@ void BackgroundRenderer::drawChar(GfxState *state, double x, double y,
double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen)
{
//SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen);
// SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen);
}

View File

@ -177,209 +177,216 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
*iter = tolower(*iter);
/*
* TODO
* if parm->tounicode is 0, try the provided tounicode map first
*/
info.use_tounicode = (is_truetype_suffix(suffix) || (param->tounicode >= 0));
info.use_tounicode = (is_truetype_suffix(suffix) || (param->tounicode > 0));
info.has_space = false;
const char * used_map = nullptr;
ffw_metric(&info.ascent, &info.descent, &info.em_size);
if(!get_metric_only)
if(param->debug)
{
used_map = font_preprocessor.get_code_map(hash_ref(font->getID()));
cerr << "Ascent: " << info.ascent << " Descent: " << info.descent << endl;
}
/*
* Step 1
* dump the font file directly from the font descriptor and put the glyphs into the correct slots
*
* for 8bit + nonTrueType
* re-encoding the font using a PostScript encoding list (glyph id <-> glpyh name)
*
* for 8bit + TrueType
* sort the glpyhs as the original order, and later will map GID (instead of char code) to Unicode
*
* for CID + nonTrueType
* Flatten the font
*
* for CID Truetype
* same as 8bitTrueType, except for that we have to check 65536 charcodes
*/
if(!font->isCIDFont())
if(get_metric_only)
return;
used_map = font_preprocessor.get_code_map(hash_ref(font->getID()));
/*
* Step 1
* dump the font file directly from the font descriptor and put the glyphs into the correct slots
*
* for 8bit + nonTrueType
* re-encoding the font using a PostScript encoding list (glyph id <-> glpyh name)
*
* for 8bit + TrueType
* sort the glpyhs as the original order, and later will map GID (instead of char code) to Unicode
*
* for CID + nonTrueType
* Flatten the font
*
* for CID Truetype
* same as 8bitTrueType, except for that we have to check 65536 charcodes
*/
if(!font->isCIDFont())
{
font_8bit = dynamic_cast<Gfx8BitFont*>(font);
maxcode = 0xff;
if(is_truetype_suffix(suffix))
{
font_8bit = dynamic_cast<Gfx8BitFont*>(font);
maxcode = 0xff;
if(is_truetype_suffix(suffix))
ffw_reencode_glyph_order();
FoFiTrueType *fftt = nullptr;
if((fftt = FoFiTrueType::load((char*)filepath.c_str())) != nullptr)
{
ffw_reencode_glyph_order();
FoFiTrueType *fftt = nullptr;
if((fftt = FoFiTrueType::load((char*)filepath.c_str())) != nullptr)
{
code2GID = font_8bit->getCodeToGIDMap(fftt);
code2GID_len = 256;
delete fftt;
}
}
else
{
// move the slot such that it's consistent with the encoding seen in PDF
unordered_set<string> nameset;
bool name_conflict_warned = false;
memset(cur_mapping2, 0, 0x100 * sizeof(char*));
for(int i = 0; i < 256; ++i)
{
if(!used_map[i]) continue;
auto cn = font_8bit->getCharName(i);
if(cn == nullptr)
{
continue;
}
else
{
if(nameset.insert(string(cn)).second)
{
cur_mapping2[i] = cn;
}
else
{
if(!name_conflict_warned)
{
name_conflict_warned = true;
//TODO: may be resolved using advanced font properties?
cerr << "Warning: encoding confliction detected in font: " << hex << info.id << dec << endl;
}
}
}
}
ffw_reencode_raw2(cur_mapping2, 256, 0);
code2GID = font_8bit->getCodeToGIDMap(fftt);
code2GID_len = 256;
delete fftt;
}
}
else
{
font_cid = dynamic_cast<GfxCIDFont*>(font);
maxcode = 0xffff;
// move the slot such that it's consistent with the encoding seen in PDF
unordered_set<string> nameset;
bool name_conflict_warned = false;
if(is_truetype_suffix(suffix))
memset(cur_mapping2, 0, 0x100 * sizeof(char*));
for(int i = 0; i < 256; ++i)
{
ffw_reencode_glyph_order();
if(!used_map[i]) continue;
GfxCIDFont * _font = dynamic_cast<GfxCIDFont*>(font);
auto cn = font_8bit->getCharName(i);
if(cn == nullptr)
{
continue;
}
else
{
if(nameset.insert(string(cn)).second)
{
cur_mapping2[i] = cn;
}
else
{
if(!name_conflict_warned)
{
name_conflict_warned = true;
//TODO: may be resolved using advanced font properties?
cerr << "Warning: encoding confliction detected in font: " << hex << info.id << dec << endl;
}
}
}
}
// code2GID has been stored for embedded CID fonts
code2GID = _font->getCIDToGID();
code2GID_len = _font->getCIDToGIDLen();
ffw_reencode_raw2(cur_mapping2, 256, 0);
}
}
else
{
font_cid = dynamic_cast<GfxCIDFont*>(font);
maxcode = 0xffff;
if(is_truetype_suffix(suffix))
{
ffw_reencode_glyph_order();
GfxCIDFont * _font = dynamic_cast<GfxCIDFont*>(font);
// code2GID has been stored for embedded CID fonts
code2GID = _font->getCIDToGID();
code2GID_len = _font->getCIDToGIDLen();
}
else
{
ffw_cidflatten();
}
}
/*
* Step 2
* map charcode (or GID for CID truetype)
* generate an Consortium encoding file and let fontforge handle it.
*
* - Always map to Unicode for 8bit TrueType fonts and CID fonts
*
* - For 8bit nonTruetype fonts:
* Try to calculate the correct Unicode value from the glyph names, unless param->always_apply_tounicode is set
*
*
* Also fill in the width_list, and set widths accordingly
*/
{
unordered_set<int> codeset;
bool name_conflict_warned = false;
auto ctu = font->getToUnicode();
memset(cur_mapping, -1, 0x10000 * sizeof(*cur_mapping));
memset(width_list, -1, 0x10000 * sizeof(*width_list));
if(code2GID)
maxcode = min(maxcode, code2GID_len - 1);
bool is_truetype = is_truetype_suffix(suffix);
int max_key = maxcode;
/*
* Traverse all possible codes
*/
for(int i = 0; i <= maxcode; ++i)
{
if(!used_map[i])
continue;
/*
* Skip glyphs without names (only for non-ttf fonts)
*/
if(!is_truetype && (font_8bit != nullptr)
&& (font_8bit->getCharName(i) == nullptr))
{
continue;
}
int k = i;
if(code2GID)
{
if((k = code2GID[i]) == 0) continue;
}
if(k > max_key)
max_key = k;
Unicode u, *pu=&u;
if(info.use_tounicode)
{
int n = ctu ? (ctu->mapToUnicode(i, &pu)) : 0;
u = check_unicode(pu, n, i, font);
}
else
{
ffw_cidflatten();
u = unicode_from_font(i, font);
}
}
/*
* Step 2
* map charcode (or GID for CID truetype)
* generate an Consortium encoding file and let fontforge handle it.
*
* - Always map to Unicode for 8bit TrueType fonts and CID fonts
*
* - For 8bit nonTruetype fonts:
* Try to calculate the correct Unicode value from the glyph names, unless param->always_apply_tounicode is set
*
*
* Also fill in the width_list, and set widths accordingly
*/
if(u == ' ')
info.has_space = true;
{
unordered_set<int> codeset;
bool name_conflict_warned = false;
auto ctu = font->getToUnicode();
memset(cur_mapping, -1, 0x10000 * sizeof(*cur_mapping));
memset(width_list, -1, 0x1000 * sizeof(*width_list));
if(code2GID)
maxcode = min(maxcode, code2GID_len - 1);
bool is_truetype = is_truetype_suffix(suffix);
int max_key = maxcode;
/*
* Traverse all possible codes
*/
for(int i = 0; i <= maxcode; ++i)
if(codeset.insert(u).second)
{
if(!used_map[i])
continue;
/*
* Skip glyphs without names (only for non-ttf fonts)
*/
if(!is_truetype && (font_8bit != nullptr)
&& (font_8bit->getCharName(i) == nullptr))
cur_mapping[k] = u;
}
else
{
if(!name_conflict_warned)
{
continue;
}
int k = i;
if(code2GID)
{
if((k = code2GID[i]) == 0) continue;
}
if(k > max_key)
max_key = k;
Unicode u, *pu=&u;
if(info.use_tounicode)
{
int n = ctu ? (ctu->mapToUnicode(i, &pu)) : 0;
u = check_unicode(pu, n, i, font);
}
else
{
u = unicode_from_font(i, font);
}
if(u == ' ')
info.has_space = true;
if(codeset.insert(u).second)
{
cur_mapping[k] = u;
}
else
{
if(!name_conflict_warned)
{
name_conflict_warned = true;
//TODO: may be resolved using advanced font properties?
cerr << "Warning: encoding confliction detected in font: " << hex << info.id << dec << endl;
}
}
if(font_8bit)
{
width_list[k] = (int)round(font_8bit->getWidth(i) * info.em_size);
}
else
{
char buf[2];
buf[0] = (i >> 8) & 0xff;
buf[1] = (i & 0xff);
width_list[k] = (int)round(font_cid->getWidth(buf, 2) * info.em_size);
name_conflict_warned = true;
//TODO: may be resolved using advanced font properties?
cerr << "Warning: encoding confliction detected in font: " << hex << info.id << dec << endl;
}
}
ffw_reencode_raw(cur_mapping, max_key + 1, 1);
ffw_set_widths(width_list, max_key + 1);
if(ctu)
ctu->decRefCnt();
if(font_8bit)
{
width_list[k] = (int)round(font_8bit->getWidth(i) * info.em_size);
}
else
{
char buf[2];
buf[0] = (i >> 8) & 0xff;
buf[1] = (i & 0xff);
width_list[k] = (int)round(font_cid->getWidth(buf, 2) * info.em_size);
}
}
ffw_reencode_raw(cur_mapping, max_key + 1, 1);
ffw_set_widths(width_list, max_key + 1);
if(ctu)
ctu->decRefCnt();
}
{
@ -392,11 +399,11 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
ffw_save((char*)fn);
ffw_close();
}
if(param->debug)
{
cerr << "Ascent: " << info.ascent << " Descent: " << info.descent << endl;
ffw_load_font((char*)fn);
ffw_metric(&info.ascent, &info.descent, &info.em_size);
ffw_save((char*)fn);
ffw_close();
}
}

View File

@ -107,10 +107,6 @@ void ffw_load_font(const char * filename)
assert(font->fv);
cur_fv = font->fv;
SFDefaultOS2Info(&font->pfminfo, font, font->fontname);
font->pfminfo.pfmset = 1;
font->changed = 1;
}
static void ffw_do_reencode(Encoding * encoding, int force)
@ -222,16 +218,18 @@ void ffw_close(void)
void ffw_metric(double * ascent, double * descent, int * em_size)
{
SplineFont * sf = cur_fv->sf;
struct pfminfo * info = &sf->pfminfo;
SFDefaultOS2Info(info, sf, sf->fontname);
info->pfmset = 1;
sf->changed = 1;
DBounds bb;
SplineFontFindBounds(sf, &bb);
struct pfminfo * info = &sf->pfminfo;
*em_size = sf->ascent + sf->descent;
/*
//debug
printf("bb %lf %lf\n", bb.maxy, bb.miny);
printf("_ %d %d\n", sf->ascent, sf->descent);
printf("win %d %d\n", info->os2_winascent, info->os2_windescent);
@ -254,22 +252,28 @@ void ffw_metric(double * ascent, double * descent, int * em_size)
*ascent = *descent = 0;
}
sf->ascent = min(floor(bb.maxy+0.5), em);
int a = floor(bb.maxy + 0.5);
int d = floor(bb.miny + 0.5);
if(a < 0) a = 0;
if(d > 0) d = 0;
sf->ascent = min(a, em);
sf->descent = em - bb.maxy;
info->os2_winascent = 0;
info->os2_typoascent = 0;
info->hhead_ascent = 0;
info->winascent_add = 1;
info->typoascent_add = 1;
info->hheadascent_add = 1;
info->os2_winascent = a;
info->os2_typoascent = a;
info->hhead_ascent = a;
info->winascent_add = 0;
info->typoascent_add = 0;
info->hheadascent_add = 0;
info->os2_windescent = 0;
info->os2_typodescent = 0;
info->hhead_descent = 0;
info->windescent_add = 1;
info->typodescent_add = 1;
info->hheaddescent_add = 1;
info->os2_windescent = -d;
info->os2_typodescent = d;
info->hhead_descent = d;
info->windescent_add = 0;
info->typodescent_add = 0;
info->hheaddescent_add = 0;
info->os2_typolinegap = 0;
info->linegap = 0;
@ -282,14 +286,21 @@ void ffw_set_widths(int * width_list, int mapping_len)
{
SplineFont * sf = cur_fv->sf;
if(sf->onlybitmaps
&& cur_fv->active_bitmap != NULL
&& sf->bitmaps != NULL)
{
printf("TODO: width vs bitmap\n");
}
EncMap * map = cur_fv->map;
int i;
int imax = min(mapping_len, map->enccount);
for(i = 0; i < imax; ++i)
{
// TODO why need this
// when width_list[i] == -1, the code itself should be unused.
// but might be reference within ttf etc
/*
* Do mess with it if the glyphs is not used.
*/
if(width_list[i] == -1) continue;
int j = map->map[i];