2012-08-14 08:23:15 +00:00
|
|
|
/*
|
2012-09-03 12:57:14 +00:00
|
|
|
* text.cc
|
2012-08-14 08:23:15 +00:00
|
|
|
*
|
2012-08-31 07:50:14 +00:00
|
|
|
* Handling text & font, and relative stuffs
|
2012-08-14 08:23:15 +00:00
|
|
|
*
|
2012-10-05 15:38:17 +00:00
|
|
|
* Copyright (C) 2012 Lu Wang <coolwanglu@gmail.com>
|
2012-08-14 08:23:15 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <iostream>
|
2012-08-20 21:48:21 +00:00
|
|
|
#include <algorithm>
|
2012-09-01 18:22:16 +00:00
|
|
|
#include <unordered_set>
|
2012-09-07 17:18:15 +00:00
|
|
|
#include <cctype>
|
2012-09-20 05:24:16 +00:00
|
|
|
#include <cmath>
|
2012-08-14 08:23:15 +00:00
|
|
|
|
2012-08-31 07:27:17 +00:00
|
|
|
#include <CharCodeToUnicode.h>
|
|
|
|
#include <fofi/FoFiTrueType.h>
|
2012-08-27 15:09:01 +00:00
|
|
|
|
2012-08-14 08:23:15 +00:00
|
|
|
#include "HTMLRenderer.h"
|
2012-11-29 09:28:05 +00:00
|
|
|
#include "util/ffw.h"
|
|
|
|
#include "util/namespace.h"
|
2012-11-29 09:45:26 +00:00
|
|
|
#include "util/unicode.h"
|
2012-11-29 10:16:05 +00:00
|
|
|
#include "util/path.h"
|
|
|
|
#include "util/math.h"
|
2012-11-29 10:28:07 +00:00
|
|
|
#include "util/misc.h"
|
2012-08-14 08:23:15 +00:00
|
|
|
|
2012-09-12 15:26:14 +00:00
|
|
|
namespace pdf2htmlEX {
|
|
|
|
|
2012-09-01 18:22:16 +00:00
|
|
|
using std::unordered_set;
|
2012-09-05 16:07:21 +00:00
|
|
|
using std::min;
|
2012-09-06 07:09:47 +00:00
|
|
|
using std::all_of;
|
2012-09-27 12:17:11 +00:00
|
|
|
using std::floor;
|
2012-09-26 13:31:29 +00:00
|
|
|
using std::swap;
|
2012-11-29 10:28:07 +00:00
|
|
|
using std::cerr;
|
|
|
|
using std::endl;
|
2012-08-20 21:48:21 +00:00
|
|
|
|
2012-09-09 16:21:46 +00:00
|
|
|
string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
|
2012-08-14 08:23:15 +00:00
|
|
|
{
|
|
|
|
Object obj, obj1, obj2;
|
2012-08-27 15:09:01 +00:00
|
|
|
Object font_obj, font_obj2, fontdesc_obj;
|
|
|
|
string suffix;
|
2012-09-09 16:21:46 +00:00
|
|
|
string filepath;
|
2012-08-14 08:23:15 +00:00
|
|
|
|
2012-08-27 15:09:01 +00:00
|
|
|
try
|
|
|
|
{
|
|
|
|
// mupdf consulted
|
|
|
|
string subtype;
|
2012-08-14 08:23:15 +00:00
|
|
|
|
2012-08-27 15:09:01 +00:00
|
|
|
auto * id = font->getID();
|
2012-08-14 08:23:15 +00:00
|
|
|
|
2012-08-27 15:09:01 +00:00
|
|
|
Object ref_obj;
|
|
|
|
ref_obj.initRef(id->num, id->gen);
|
|
|
|
ref_obj.fetch(xref, &font_obj);
|
|
|
|
ref_obj.free();
|
2012-08-14 08:23:15 +00:00
|
|
|
|
2012-08-27 15:09:01 +00:00
|
|
|
if(!font_obj.isDict())
|
2012-08-14 08:23:15 +00:00
|
|
|
{
|
2012-08-27 15:09:01 +00:00
|
|
|
cerr << "Font object is not a dictionary" << endl;
|
|
|
|
throw 0;
|
2012-08-14 08:23:15 +00:00
|
|
|
}
|
|
|
|
|
2012-08-27 15:09:01 +00:00
|
|
|
Dict * dict = font_obj.getDict();
|
|
|
|
if(dict->lookup("DescendantFonts", &font_obj2)->isArray())
|
|
|
|
{
|
|
|
|
if(font_obj2.arrayGetLength() == 0)
|
|
|
|
{
|
|
|
|
cerr << "Warning: empty DescendantFonts array" << endl;
|
|
|
|
}
|
|
|
|
else
|
2012-08-14 08:23:15 +00:00
|
|
|
{
|
2012-08-27 15:09:01 +00:00
|
|
|
if(font_obj2.arrayGetLength() > 1)
|
|
|
|
cerr << "TODO: multiple entries in DescendantFonts array" << endl;
|
|
|
|
|
|
|
|
if(font_obj2.arrayGet(0, &obj2)->isDict())
|
|
|
|
{
|
|
|
|
dict = obj2.getDict();
|
|
|
|
}
|
2012-08-14 08:23:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-08-27 15:09:01 +00:00
|
|
|
if(!dict->lookup("FontDescriptor", &fontdesc_obj)->isDict())
|
|
|
|
{
|
|
|
|
cerr << "Cannot find FontDescriptor " << endl;
|
|
|
|
throw 0;
|
|
|
|
}
|
2012-08-14 08:23:15 +00:00
|
|
|
|
2012-08-27 15:09:01 +00:00
|
|
|
dict = fontdesc_obj.getDict();
|
|
|
|
|
|
|
|
if(dict->lookup("FontFile3", &obj)->isStream())
|
2012-08-14 08:23:15 +00:00
|
|
|
{
|
2012-08-27 15:09:01 +00:00
|
|
|
if(obj.streamGetDict()->lookup("Subtype", &obj1)->isName())
|
2012-08-14 08:23:15 +00:00
|
|
|
{
|
2012-08-27 15:09:01 +00:00
|
|
|
subtype = obj1.getName();
|
|
|
|
if(subtype == "Type1C")
|
|
|
|
{
|
|
|
|
suffix = ".cff";
|
|
|
|
}
|
|
|
|
else if (subtype == "CIDFontType0C")
|
|
|
|
{
|
|
|
|
suffix = ".cid";
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
cerr << "Unknown subtype: " << subtype << endl;
|
|
|
|
throw 0;
|
|
|
|
}
|
2012-08-14 08:23:15 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2012-08-27 15:09:01 +00:00
|
|
|
cerr << "Invalid subtype in font descriptor" << endl;
|
|
|
|
throw 0;
|
2012-08-14 08:23:15 +00:00
|
|
|
}
|
|
|
|
}
|
2012-08-27 15:09:01 +00:00
|
|
|
else if (dict->lookup("FontFile2", &obj)->isStream())
|
|
|
|
{
|
|
|
|
suffix = ".ttf";
|
|
|
|
}
|
|
|
|
else if (dict->lookup("FontFile", &obj)->isStream())
|
|
|
|
{
|
|
|
|
suffix = ".pfa";
|
|
|
|
}
|
2012-08-14 08:23:15 +00:00
|
|
|
else
|
|
|
|
{
|
2012-08-27 15:09:01 +00:00
|
|
|
cerr << "Cannot find FontFile for dump" << endl;
|
|
|
|
throw 0;
|
2012-08-14 08:23:15 +00:00
|
|
|
}
|
|
|
|
|
2012-08-27 15:09:01 +00:00
|
|
|
if(suffix == "")
|
|
|
|
{
|
|
|
|
cerr << "Font type unrecognized" << endl;
|
|
|
|
throw 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
obj.streamReset();
|
2012-08-14 08:23:15 +00:00
|
|
|
|
2012-09-12 16:16:34 +00:00
|
|
|
filepath = (char*)str_fmt("%s/f%llx%s", param->tmp_dir.c_str(), fn_id, suffix.c_str());
|
2012-11-26 21:38:13 +00:00
|
|
|
tmp_files.add(filepath);
|
2012-09-09 16:21:46 +00:00
|
|
|
|
2012-09-09 17:27:32 +00:00
|
|
|
ofstream outf(filepath, ofstream::binary);
|
2012-09-17 12:07:50 +00:00
|
|
|
if(!outf)
|
|
|
|
throw string("Cannot open file ") + filepath + " for writing";
|
2012-08-15 04:27:41 +00:00
|
|
|
|
2012-08-27 15:09:01 +00:00
|
|
|
char buf[1024];
|
|
|
|
int len;
|
|
|
|
while((len = obj.streamGetChars(1024, (Guchar*)buf)) > 0)
|
|
|
|
{
|
|
|
|
outf.write(buf, len);
|
|
|
|
}
|
|
|
|
outf.close();
|
|
|
|
obj.streamClose();
|
|
|
|
}
|
|
|
|
catch(int)
|
2012-08-14 08:23:15 +00:00
|
|
|
{
|
2012-09-07 16:38:41 +00:00
|
|
|
cerr << "Someting wrong when trying to dump font " << hex << fn_id << dec << endl;
|
2012-08-14 08:23:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
obj2.free();
|
|
|
|
obj1.free();
|
|
|
|
obj.free();
|
|
|
|
|
|
|
|
fontdesc_obj.free();
|
|
|
|
font_obj2.free();
|
|
|
|
font_obj.free();
|
2012-08-27 15:09:01 +00:00
|
|
|
|
2012-08-31 07:27:17 +00:00
|
|
|
return filepath;
|
|
|
|
}
|
|
|
|
|
2012-09-09 16:21:46 +00:00
|
|
|
void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only)
|
2012-08-31 07:27:17 +00:00
|
|
|
{
|
2012-09-27 04:30:11 +00:00
|
|
|
if(param->debug)
|
|
|
|
{
|
|
|
|
cerr << "Embed font: " << filepath << " " << info.id << endl;
|
|
|
|
}
|
|
|
|
|
2012-09-17 12:40:10 +00:00
|
|
|
ffw_load_font(filepath.c_str());
|
2012-10-06 11:20:35 +00:00
|
|
|
ffw_prepare_font();
|
2012-10-05 15:38:17 +00:00
|
|
|
|
|
|
|
if(param->debug)
|
|
|
|
{
|
2012-10-06 11:20:35 +00:00
|
|
|
auto fn = str_fmt("%s/__raw_font_%lld", param->tmp_dir.c_str(), info.id, param->font_suffix.c_str());
|
2012-11-26 21:38:13 +00:00
|
|
|
tmp_files.add((char*)fn);
|
2012-10-06 11:20:35 +00:00
|
|
|
ofstream((char*)fn, ofstream::binary) << ifstream(filepath).rdbuf();
|
2012-10-05 15:38:17 +00:00
|
|
|
}
|
|
|
|
|
2012-08-31 07:27:17 +00:00
|
|
|
int * code2GID = nullptr;
|
|
|
|
int code2GID_len = 0;
|
|
|
|
int maxcode = 0;
|
|
|
|
|
|
|
|
Gfx8BitFont * font_8bit = nullptr;
|
2012-09-20 05:24:16 +00:00
|
|
|
GfxCIDFont * font_cid = nullptr;
|
2012-08-31 07:27:17 +00:00
|
|
|
|
2012-09-09 18:38:40 +00:00
|
|
|
string suffix = get_suffix(filepath);
|
|
|
|
for(auto iter = suffix.begin(); iter != suffix.end(); ++iter)
|
|
|
|
*iter = tolower(*iter);
|
|
|
|
|
2012-09-20 15:40:44 +00:00
|
|
|
/*
|
|
|
|
* if parm->tounicode is 0, try the provided tounicode map first
|
|
|
|
*/
|
2012-09-24 08:59:47 +00:00
|
|
|
info.use_tounicode = (is_truetype_suffix(suffix) || (param->tounicode >= 0));
|
2012-10-08 08:26:19 +00:00
|
|
|
bool has_space = false;
|
2012-09-06 17:33:00 +00:00
|
|
|
|
2012-09-17 18:37:30 +00:00
|
|
|
const char * used_map = nullptr;
|
|
|
|
|
2012-09-23 17:55:35 +00:00
|
|
|
info.em_size = ffw_get_em_size();
|
2012-09-21 17:19:15 +00:00
|
|
|
|
|
|
|
if(get_metric_only)
|
2012-09-30 07:43:23 +00:00
|
|
|
{
|
|
|
|
ffw_metric(&info.ascent, &info.descent);
|
|
|
|
ffw_close();
|
2012-09-21 17:19:15 +00:00
|
|
|
return;
|
2012-09-30 07:43:23 +00:00
|
|
|
}
|
2012-09-21 17:19:15 +00:00
|
|
|
|
2012-09-22 04:41:53 +00:00
|
|
|
used_map = preprocessor.get_code_map(hash_ref(font->getID()));
|
2012-09-21 17:19:15 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Step 1
|
2012-09-30 07:43:23 +00:00
|
|
|
* dump the font file directly from the font descriptor and put the glyphs into the correct slots *
|
2012-09-21 17:19:15 +00:00
|
|
|
* for 8bit + nonTrueType
|
2012-10-06 16:51:03 +00:00
|
|
|
* re-encoding the font by glyph names
|
2012-09-21 17:19:15 +00:00
|
|
|
*
|
|
|
|
* for 8bit + TrueType
|
|
|
|
* sort the glpyhs as the original order, and later will map GID (instead of char code) to Unicode
|
|
|
|
*
|
|
|
|
* for CID + nonTrueType
|
|
|
|
* Flatten the font
|
|
|
|
*
|
|
|
|
* for CID Truetype
|
|
|
|
* same as 8bitTrueType, except for that we have to check 65536 charcodes
|
|
|
|
*/
|
|
|
|
if(!font->isCIDFont())
|
|
|
|
{
|
|
|
|
font_8bit = dynamic_cast<Gfx8BitFont*>(font);
|
|
|
|
maxcode = 0xff;
|
|
|
|
if(is_truetype_suffix(suffix))
|
2012-08-31 07:27:17 +00:00
|
|
|
{
|
2012-09-21 17:19:15 +00:00
|
|
|
ffw_reencode_glyph_order();
|
|
|
|
FoFiTrueType *fftt = nullptr;
|
|
|
|
if((fftt = FoFiTrueType::load((char*)filepath.c_str())) != nullptr)
|
2012-08-31 07:50:14 +00:00
|
|
|
{
|
2012-09-21 17:19:15 +00:00
|
|
|
code2GID = font_8bit->getCodeToGIDMap(fftt);
|
|
|
|
code2GID_len = 256;
|
|
|
|
delete fftt;
|
2012-08-31 07:50:14 +00:00
|
|
|
}
|
2012-09-21 17:19:15 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// move the slot such that it's consistent with the encoding seen in PDF
|
|
|
|
unordered_set<string> nameset;
|
|
|
|
bool name_conflict_warned = false;
|
2012-08-31 07:50:14 +00:00
|
|
|
|
2012-09-21 17:19:15 +00:00
|
|
|
memset(cur_mapping2, 0, 0x100 * sizeof(char*));
|
2012-09-03 14:32:56 +00:00
|
|
|
|
2012-09-21 17:19:15 +00:00
|
|
|
for(int i = 0; i < 256; ++i)
|
|
|
|
{
|
|
|
|
if(!used_map[i]) continue;
|
2012-09-06 16:58:23 +00:00
|
|
|
|
2012-09-21 17:19:15 +00:00
|
|
|
auto cn = font_8bit->getCharName(i);
|
|
|
|
if(cn == nullptr)
|
|
|
|
{
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if(nameset.insert(string(cn)).second)
|
2012-09-01 18:22:16 +00:00
|
|
|
{
|
2012-09-21 17:19:15 +00:00
|
|
|
cur_mapping2[i] = cn;
|
2012-09-01 18:22:16 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2012-09-21 17:19:15 +00:00
|
|
|
if(!name_conflict_warned)
|
2012-09-01 18:22:16 +00:00
|
|
|
{
|
2012-09-21 17:19:15 +00:00
|
|
|
name_conflict_warned = true;
|
|
|
|
//TODO: may be resolved using advanced font properties?
|
|
|
|
cerr << "Warning: encoding confliction detected in font: " << hex << info.id << dec << endl;
|
2012-09-01 18:22:16 +00:00
|
|
|
}
|
|
|
|
}
|
2012-08-31 07:50:14 +00:00
|
|
|
}
|
2012-08-31 07:27:17 +00:00
|
|
|
}
|
2012-09-21 17:19:15 +00:00
|
|
|
|
|
|
|
ffw_reencode_raw2(cur_mapping2, 256, 0);
|
2012-08-31 07:27:17 +00:00
|
|
|
}
|
2012-09-21 17:19:15 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
font_cid = dynamic_cast<GfxCIDFont*>(font);
|
|
|
|
maxcode = 0xffff;
|
2012-08-31 07:27:17 +00:00
|
|
|
|
2012-09-21 17:19:15 +00:00
|
|
|
if(is_truetype_suffix(suffix))
|
|
|
|
{
|
|
|
|
ffw_reencode_glyph_order();
|
2012-08-31 07:27:17 +00:00
|
|
|
|
2012-09-21 17:19:15 +00:00
|
|
|
GfxCIDFont * _font = dynamic_cast<GfxCIDFont*>(font);
|
2012-08-31 07:50:14 +00:00
|
|
|
|
2012-09-21 17:19:15 +00:00
|
|
|
// code2GID has been stored for embedded CID fonts
|
|
|
|
code2GID = _font->getCIDToGID();
|
|
|
|
code2GID_len = _font->getCIDToGIDLen();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
ffw_cidflatten();
|
2012-08-31 07:27:17 +00:00
|
|
|
}
|
2012-09-21 17:19:15 +00:00
|
|
|
}
|
2012-08-31 07:27:17 +00:00
|
|
|
|
2012-09-21 17:19:15 +00:00
|
|
|
/*
|
|
|
|
* Step 2
|
2012-10-06 16:51:03 +00:00
|
|
|
* - map charcode (or GID for CID truetype)
|
2012-09-21 17:19:15 +00:00
|
|
|
*
|
2012-10-06 16:51:03 +00:00
|
|
|
* -> Always map to Unicode for 8bit TrueType fonts and CID fonts
|
2012-09-21 17:19:15 +00:00
|
|
|
*
|
2012-10-06 16:51:03 +00:00
|
|
|
* -> For 8bit nonTruetype fonts:
|
|
|
|
* Try to calculate the correct Unicode value from the glyph names, when collision is detected in ToUnicode Map
|
2012-09-21 17:19:15 +00:00
|
|
|
*
|
2012-10-06 16:51:03 +00:00
|
|
|
* - Fill in the width_list, and set widths accordingly
|
|
|
|
* - Remove unused glyphs
|
2012-09-21 17:19:15 +00:00
|
|
|
*/
|
2012-08-31 07:27:17 +00:00
|
|
|
|
2012-09-03 12:57:14 +00:00
|
|
|
|
2012-09-21 17:19:15 +00:00
|
|
|
{
|
|
|
|
unordered_set<int> codeset;
|
|
|
|
bool name_conflict_warned = false;
|
2012-09-08 02:24:04 +00:00
|
|
|
|
2012-09-21 17:19:15 +00:00
|
|
|
auto ctu = font->getToUnicode();
|
|
|
|
memset(cur_mapping, -1, 0x10000 * sizeof(*cur_mapping));
|
|
|
|
memset(width_list, -1, 0x10000 * sizeof(*width_list));
|
2012-08-31 07:27:17 +00:00
|
|
|
|
2012-09-21 17:19:15 +00:00
|
|
|
if(code2GID)
|
2012-10-02 18:19:40 +00:00
|
|
|
maxcode = min<int>(maxcode, code2GID_len - 1);
|
2012-09-21 17:19:15 +00:00
|
|
|
|
|
|
|
bool is_truetype = is_truetype_suffix(suffix);
|
|
|
|
int max_key = maxcode;
|
|
|
|
/*
|
|
|
|
* Traverse all possible codes
|
|
|
|
*/
|
2012-09-24 08:59:47 +00:00
|
|
|
bool retried = false; // avoid infinite loop
|
2012-09-21 17:19:15 +00:00
|
|
|
for(int i = 0; i <= maxcode; ++i)
|
|
|
|
{
|
|
|
|
if(!used_map[i])
|
|
|
|
continue;
|
2012-09-05 16:07:21 +00:00
|
|
|
|
2012-09-20 05:24:16 +00:00
|
|
|
/*
|
2012-09-21 17:19:15 +00:00
|
|
|
* Skip glyphs without names (only for non-ttf fonts)
|
2012-09-20 05:24:16 +00:00
|
|
|
*/
|
2012-09-21 17:19:15 +00:00
|
|
|
if(!is_truetype && (font_8bit != nullptr)
|
|
|
|
&& (font_8bit->getCharName(i) == nullptr))
|
2012-09-03 12:57:14 +00:00
|
|
|
{
|
2012-09-21 17:19:15 +00:00
|
|
|
continue;
|
|
|
|
}
|
2012-08-31 07:27:17 +00:00
|
|
|
|
2012-09-21 17:19:15 +00:00
|
|
|
int k = i;
|
|
|
|
if(code2GID)
|
|
|
|
{
|
|
|
|
if((k = code2GID[i]) == 0) continue;
|
|
|
|
}
|
2012-09-05 16:07:21 +00:00
|
|
|
|
2012-09-21 17:19:15 +00:00
|
|
|
if(k > max_key)
|
|
|
|
max_key = k;
|
2012-09-05 17:01:47 +00:00
|
|
|
|
2012-09-21 17:19:15 +00:00
|
|
|
Unicode u, *pu=&u;
|
|
|
|
if(info.use_tounicode)
|
|
|
|
{
|
|
|
|
int n = ctu ? (ctu->mapToUnicode(i, &pu)) : 0;
|
|
|
|
u = check_unicode(pu, n, i, font);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
u = unicode_from_font(i, font);
|
|
|
|
}
|
2012-08-31 07:27:17 +00:00
|
|
|
|
2012-09-21 17:19:15 +00:00
|
|
|
if(u == ' ')
|
2012-10-08 08:26:19 +00:00
|
|
|
has_space = true;
|
2012-09-19 04:24:13 +00:00
|
|
|
|
2012-09-21 17:19:15 +00:00
|
|
|
if(codeset.insert(u).second)
|
|
|
|
{
|
|
|
|
cur_mapping[k] = u;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2012-09-24 08:59:47 +00:00
|
|
|
// collision detected
|
|
|
|
if(param->tounicode == 0)
|
|
|
|
{
|
|
|
|
// in auto mode, just drop the tounicode map
|
|
|
|
if(!retried)
|
|
|
|
{
|
2012-11-30 09:33:27 +00:00
|
|
|
cerr << "ToUnicode CMap is not valid and got dropped for font: " << hex << info.id << dec << endl;
|
2012-09-24 08:59:47 +00:00
|
|
|
retried = true;
|
|
|
|
codeset.clear();
|
|
|
|
info.use_tounicode = false;
|
2012-10-06 14:41:06 +00:00
|
|
|
//TODO: constant for the length
|
2012-09-24 08:59:47 +00:00
|
|
|
memset(cur_mapping, -1, 0x10000 * sizeof(*cur_mapping));
|
|
|
|
memset(width_list, -1, 0x10000 * sizeof(*width_list));
|
|
|
|
i = -1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
2012-09-21 17:19:15 +00:00
|
|
|
if(!name_conflict_warned)
|
2012-09-08 02:24:04 +00:00
|
|
|
{
|
2012-09-21 17:19:15 +00:00
|
|
|
name_conflict_warned = true;
|
|
|
|
//TODO: may be resolved using advanced font properties?
|
|
|
|
cerr << "Warning: encoding confliction detected in font: " << hex << info.id << dec << endl;
|
2012-09-08 02:24:04 +00:00
|
|
|
}
|
2012-09-21 17:19:15 +00:00
|
|
|
}
|
2012-09-20 05:24:16 +00:00
|
|
|
|
2012-09-21 17:19:15 +00:00
|
|
|
if(font_8bit)
|
|
|
|
{
|
2012-09-27 12:17:11 +00:00
|
|
|
width_list[k] = (int)floor(font_8bit->getWidth(i) * info.em_size + 0.5);
|
2012-09-03 12:57:14 +00:00
|
|
|
}
|
2012-09-21 17:19:15 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
char buf[2];
|
|
|
|
buf[0] = (i >> 8) & 0xff;
|
|
|
|
buf[1] = (i & 0xff);
|
2012-09-27 12:17:11 +00:00
|
|
|
width_list[k] = (int)floor(font_cid->getWidth(buf, 2) * info.em_size + 0.5);
|
2012-09-21 17:19:15 +00:00
|
|
|
}
|
|
|
|
}
|
2012-08-31 07:50:14 +00:00
|
|
|
|
2012-10-05 15:38:17 +00:00
|
|
|
ffw_set_widths(width_list, max_key + 1, param->stretch_narrow_glyph, param->squeeze_wide_glyph, param->remove_unused_glyph);
|
2012-10-06 11:20:35 +00:00
|
|
|
|
2012-09-30 07:43:23 +00:00
|
|
|
ffw_reencode_raw(cur_mapping, max_key + 1, 1);
|
2012-08-31 07:27:17 +00:00
|
|
|
|
2012-11-30 09:33:27 +00:00
|
|
|
// we need the space character for offsets
|
2012-10-08 08:26:19 +00:00
|
|
|
if(!has_space)
|
|
|
|
{
|
|
|
|
int space_width;
|
|
|
|
if(font_8bit)
|
|
|
|
{
|
|
|
|
space_width = (int)floor(font_8bit->getWidth(' ') * info.em_size + 0.5);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
char buf[2] = {0, ' '};
|
|
|
|
space_width = (int)floor(font_cid->getWidth(buf, 2) * info.em_size + 0.5);
|
|
|
|
}
|
|
|
|
ffw_make_char((int)' ', space_width);
|
|
|
|
}
|
|
|
|
|
2012-09-21 17:19:15 +00:00
|
|
|
if(ctu)
|
|
|
|
ctu->decRefCnt();
|
2012-08-31 07:27:17 +00:00
|
|
|
}
|
|
|
|
|
2012-09-22 08:41:38 +00:00
|
|
|
/*
|
|
|
|
* Step 3
|
|
|
|
*
|
|
|
|
* Generate the font as desired
|
|
|
|
*
|
|
|
|
*/
|
2012-09-26 13:31:29 +00:00
|
|
|
string cur_tmp_fn = (char*)str_fmt("%s/__tmp_font1%s", param->tmp_dir.c_str(), param->font_suffix.c_str());
|
2012-11-26 21:38:13 +00:00
|
|
|
tmp_files.add(cur_tmp_fn);
|
2012-09-26 13:31:29 +00:00
|
|
|
string other_tmp_fn = (char*)str_fmt("%s/__tmp_font2%s", param->tmp_dir.c_str(), param->font_suffix.c_str());
|
2012-11-26 21:38:13 +00:00
|
|
|
tmp_files.add(other_tmp_fn);
|
2012-09-23 12:25:22 +00:00
|
|
|
|
2012-09-26 13:31:29 +00:00
|
|
|
ffw_save(cur_tmp_fn.c_str());
|
2012-10-04 14:21:46 +00:00
|
|
|
|
2012-09-23 12:25:22 +00:00
|
|
|
ffw_close();
|
2012-09-26 13:31:29 +00:00
|
|
|
|
2012-09-23 12:25:22 +00:00
|
|
|
/*
|
2012-09-23 14:53:21 +00:00
|
|
|
* Step 4
|
2012-09-23 18:22:42 +00:00
|
|
|
* Font Hinting
|
2012-09-23 12:25:22 +00:00
|
|
|
*/
|
2012-09-23 18:22:42 +00:00
|
|
|
bool hinted = false;
|
|
|
|
|
|
|
|
// Call external hinting program if specified
|
2012-09-23 12:25:22 +00:00
|
|
|
if(param->external_hint_tool != "")
|
|
|
|
{
|
2012-09-26 13:31:29 +00:00
|
|
|
hinted = (system((char*)str_fmt("%s \"%s\" \"%s\"", param->external_hint_tool.c_str(), cur_tmp_fn.c_str(), other_tmp_fn.c_str())) == 0);
|
2012-09-23 18:22:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Call internal hinting procedure if specified
|
|
|
|
if((!hinted) && (param->auto_hint))
|
|
|
|
{
|
2012-09-26 13:31:29 +00:00
|
|
|
ffw_load_font(cur_tmp_fn.c_str());
|
2012-09-23 18:22:42 +00:00
|
|
|
ffw_auto_hint();
|
2012-09-26 13:31:29 +00:00
|
|
|
ffw_save(other_tmp_fn.c_str());
|
|
|
|
ffw_close();
|
2012-09-23 18:22:42 +00:00
|
|
|
hinted = true;
|
|
|
|
}
|
|
|
|
|
2012-09-26 13:31:29 +00:00
|
|
|
if(hinted)
|
2012-09-23 18:22:42 +00:00
|
|
|
{
|
2012-09-26 13:31:29 +00:00
|
|
|
swap(cur_tmp_fn, other_tmp_fn);
|
2012-09-21 13:02:54 +00:00
|
|
|
}
|
2012-09-23 14:53:21 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Step 5
|
2012-09-26 13:31:29 +00:00
|
|
|
* Generate the font
|
2012-09-23 14:53:21 +00:00
|
|
|
* Reload to retrieve/fix accurate ascent/descent
|
|
|
|
*/
|
2012-09-26 13:31:29 +00:00
|
|
|
string fn = (char*)str_fmt("%s/f%llx%s",
|
|
|
|
(param->single_html ? param->tmp_dir : param->dest_dir).c_str(),
|
|
|
|
info.id, param->font_suffix.c_str());
|
|
|
|
|
|
|
|
if(param->single_html)
|
2012-11-26 21:38:13 +00:00
|
|
|
tmp_files.add(fn);
|
2012-09-26 13:31:29 +00:00
|
|
|
|
|
|
|
ffw_load_font(cur_tmp_fn.c_str());
|
2012-09-23 17:55:35 +00:00
|
|
|
ffw_metric(&info.ascent, &info.descent);
|
2012-09-23 14:53:21 +00:00
|
|
|
ffw_save(fn.c_str());
|
2012-09-30 07:43:23 +00:00
|
|
|
|
2012-09-23 14:53:21 +00:00
|
|
|
ffw_close();
|
2012-08-14 08:23:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void HTMLRenderer::drawString(GfxState * state, GooString * s)
|
|
|
|
{
|
|
|
|
if(s->getLength() == 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
auto font = state->getFont();
|
|
|
|
if((font == nullptr) || (font->getWMode()))
|
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
//hidden
|
|
|
|
if((state->getRender() & 3) == 3)
|
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// see if the line has to be closed due to state change
|
|
|
|
check_state_change(state);
|
2012-10-01 17:59:04 +00:00
|
|
|
prepare_text_line(state);
|
2012-08-14 08:23:15 +00:00
|
|
|
|
|
|
|
// Now ready to output
|
|
|
|
// get the unicodes
|
|
|
|
char *p = s->getCString();
|
|
|
|
int len = s->getLength();
|
|
|
|
|
|
|
|
double dx = 0;
|
|
|
|
double dy = 0;
|
2012-08-23 20:36:27 +00:00
|
|
|
double dxerr = 0;
|
2012-08-14 08:23:15 +00:00
|
|
|
double dx1,dy1;
|
|
|
|
double ox, oy;
|
|
|
|
|
|
|
|
int nChars = 0;
|
|
|
|
int nSpaces = 0;
|
|
|
|
int uLen;
|
|
|
|
|
|
|
|
CharCode code;
|
|
|
|
Unicode *u = nullptr;
|
|
|
|
|
2012-11-30 09:33:27 +00:00
|
|
|
while (len > 0)
|
|
|
|
{
|
2012-08-14 08:23:15 +00:00
|
|
|
auto n = font->getNextChar(p, len, &code, &u, &uLen, &dx1, &dy1, &ox, &oy);
|
2012-09-17 18:37:30 +00:00
|
|
|
|
2012-11-29 10:16:05 +00:00
|
|
|
if(!(equal(ox, 0) && equal(oy, 0)))
|
2012-08-14 08:23:15 +00:00
|
|
|
{
|
2012-08-14 09:13:29 +00:00
|
|
|
cerr << "TODO: non-zero origins" << endl;
|
2012-08-14 08:23:15 +00:00
|
|
|
}
|
|
|
|
|
2012-09-07 00:39:21 +00:00
|
|
|
bool is_space = false;
|
2012-08-19 20:50:28 +00:00
|
|
|
if (n == 1 && *p == ' ')
|
|
|
|
{
|
|
|
|
++nSpaces;
|
2012-09-07 00:39:21 +00:00
|
|
|
is_space = true;
|
2012-08-19 20:50:28 +00:00
|
|
|
}
|
2012-08-24 06:21:20 +00:00
|
|
|
|
2012-09-07 00:39:21 +00:00
|
|
|
if(is_space && (param->space_as_offset))
|
2012-09-06 07:09:47 +00:00
|
|
|
{
|
2012-09-07 00:39:21 +00:00
|
|
|
// ignore horiz_scaling, as it's merged in CTM
|
2012-10-02 06:19:20 +00:00
|
|
|
line_buf.append_offset((dx1 * cur_font_size + cur_letter_space + cur_word_space) * draw_text_scale);
|
2012-09-06 07:09:47 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2012-09-28 09:53:36 +00:00
|
|
|
if((param->decompose_ligature) && (uLen > 1) && all_of(u, u+uLen, isLegalUnicode))
|
2012-09-07 00:39:21 +00:00
|
|
|
{
|
|
|
|
line_buf.append_unicodes(u, uLen);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2012-09-28 12:07:25 +00:00
|
|
|
if(cur_font_info->use_tounicode)
|
2012-09-28 09:25:12 +00:00
|
|
|
{
|
2012-09-28 12:07:25 +00:00
|
|
|
Unicode uu = check_unicode(u, uLen, code, font);
|
|
|
|
line_buf.append_unicodes(&uu, 1);
|
2012-09-28 09:25:12 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2012-09-28 12:07:25 +00:00
|
|
|
Unicode uu = unicode_from_font(code, font);
|
2012-09-28 09:25:12 +00:00
|
|
|
line_buf.append_unicodes(&uu, 1);
|
|
|
|
}
|
2012-09-07 00:39:21 +00:00
|
|
|
}
|
2012-09-06 07:09:47 +00:00
|
|
|
}
|
2012-08-14 08:23:15 +00:00
|
|
|
|
2012-08-23 20:36:27 +00:00
|
|
|
dx += dx1;
|
|
|
|
dy += dy1;
|
2012-08-14 08:23:15 +00:00
|
|
|
|
|
|
|
++nChars;
|
|
|
|
p += n;
|
|
|
|
len -= n;
|
|
|
|
}
|
|
|
|
|
2012-09-04 04:54:47 +00:00
|
|
|
double hs = state->getHorizScaling();
|
|
|
|
|
2012-08-21 20:34:39 +00:00
|
|
|
// horiz_scaling is merged into ctm now,
|
2012-08-21 19:44:48 +00:00
|
|
|
// so the coordinate system is ugly
|
2012-09-04 04:54:47 +00:00
|
|
|
dx = (dx * cur_font_size + nChars * cur_letter_space + nSpaces * cur_word_space) * hs;
|
2012-08-14 08:23:15 +00:00
|
|
|
|
2012-09-04 04:54:47 +00:00
|
|
|
dy *= cur_font_size;
|
2012-08-14 08:23:15 +00:00
|
|
|
|
|
|
|
cur_tx += dx;
|
|
|
|
cur_ty += dy;
|
|
|
|
|
2012-09-04 04:54:47 +00:00
|
|
|
draw_tx += dx + dxerr * cur_font_size * hs;
|
2012-08-14 08:23:15 +00:00
|
|
|
draw_ty += dy;
|
|
|
|
}
|
2012-09-12 15:26:14 +00:00
|
|
|
|
|
|
|
} // namespace pdf2htmlEX
|