mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 04:50:09 +00:00
do not reencode external fonts
This commit is contained in:
parent
7f8bd3b5b5
commit
9d69539fe5
3
TODO
3
TODO
@ -1,7 +1,4 @@
|
|||||||
pc1.pdf
|
|
||||||
jyb.pdf
|
|
||||||
cjkmix2*.pdf
|
cjkmix2*.pdf
|
||||||
Sample4.pdf
|
|
||||||
|
|
||||||
- optimization levels
|
- optimization levels
|
||||||
- don't dump image when it is empty
|
- don't dump image when it is empty
|
||||||
|
@ -45,7 +45,7 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
|
|||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
// mupdf consulted
|
// inspired by mupdf
|
||||||
string subtype;
|
string subtype;
|
||||||
|
|
||||||
auto * id = font->getID();
|
auto * id = font->getID();
|
||||||
@ -448,6 +448,13 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
|
|||||||
|
|
||||||
ffw_set_widths(width_list, max_key + 1, param.stretch_narrow_glyph, param.squeeze_wide_glyph, param.remove_unused_glyph);
|
ffw_set_widths(width_list, max_key + 1, param.stretch_narrow_glyph, param.squeeze_wide_glyph, param.remove_unused_glyph);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reencoding is not likely to work for external fonts,
|
||||||
|
* Just hope that we can get correct Unicode values, and let the browser choose the correct glyphs
|
||||||
|
*
|
||||||
|
* TODO: maybe add an option here, but anyway according to the DPF spec, this is implement-dependent
|
||||||
|
*/
|
||||||
|
if(info.is_embeded)
|
||||||
ffw_reencode_raw(cur_mapping, max_key + 1, 1);
|
ffw_reencode_raw(cur_mapping, max_key + 1, 1);
|
||||||
|
|
||||||
// In some space offsets in HTML, we insert a ' ' there in order to improve text copy&paste
|
// In some space offsets in HTML, we insert a ' ' there in order to improve text copy&paste
|
||||||
@ -467,7 +474,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
|
|||||||
ffw_add_empty_char((int32_t)' ', (int)floor(info.space_width * info.em_size + 0.5));
|
ffw_add_empty_char((int32_t)' ', (int)floor(info.space_width * info.em_size + 0.5));
|
||||||
if(param.debug)
|
if(param.debug)
|
||||||
{
|
{
|
||||||
cerr << "Missing space width in font: " << hex << info.id << " set to " << dec << info.space_width << endl;
|
cerr << "Missing space width in font " << hex << info.id << ": set to " << dec << info.space_width << endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -556,6 +563,7 @@ const FontInfo * HTMLRenderer::install_font(GfxFont * font)
|
|||||||
FontInfo & new_font_info = cur_info_iter->second;
|
FontInfo & new_font_info = cur_info_iter->second;
|
||||||
new_font_info.id = new_fn_id;
|
new_font_info.id = new_fn_id;
|
||||||
new_font_info.use_tounicode = true;
|
new_font_info.use_tounicode = true;
|
||||||
|
new_font_info.is_embeded = false;
|
||||||
|
|
||||||
if(font == nullptr)
|
if(font == nullptr)
|
||||||
{
|
{
|
||||||
@ -582,12 +590,12 @@ const FontInfo * HTMLRenderer::install_font(GfxFont * font)
|
|||||||
if(font->getType() == fontType3) {
|
if(font->getType() == fontType3) {
|
||||||
cerr << "Type 3 fonts are unsupported and will be rendered as Image" << endl;
|
cerr << "Type 3 fonts are unsupported and will be rendered as Image" << endl;
|
||||||
export_remote_default_font(new_fn_id);
|
export_remote_default_font(new_fn_id);
|
||||||
return &(cur_info_iter->second);
|
return &new_font_info;
|
||||||
}
|
}
|
||||||
if(font->getWMode()) {
|
if(font->getWMode()) {
|
||||||
cerr << "Writing mode is unsupported and will be rendered as Image" << endl;
|
cerr << "Writing mode is unsupported and will be rendered as Image" << endl;
|
||||||
export_remote_default_font(new_fn_id);
|
export_remote_default_font(new_fn_id);
|
||||||
return &(cur_info_iter->second);
|
return &new_font_info;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(auto * font_loc = font->locateFont(xref, gTrue))
|
if(auto * font_loc = font->locateFont(xref, gTrue))
|
||||||
@ -595,13 +603,16 @@ const FontInfo * HTMLRenderer::install_font(GfxFont * font)
|
|||||||
switch(font_loc -> locType)
|
switch(font_loc -> locType)
|
||||||
{
|
{
|
||||||
case gfxFontLocEmbedded:
|
case gfxFontLocEmbedded:
|
||||||
install_embedded_font(font, cur_info_iter->second);
|
new_font_info.is_embeded = true;
|
||||||
|
install_embedded_font(font, new_font_info);
|
||||||
break;
|
break;
|
||||||
case gfxFontLocExternal:
|
case gfxFontLocExternal:
|
||||||
install_external_font(font, cur_info_iter->second);
|
new_font_info.is_embeded = false;
|
||||||
|
install_external_font(font, new_font_info);
|
||||||
break;
|
break;
|
||||||
case gfxFontLocResident:
|
case gfxFontLocResident:
|
||||||
install_base_font(font, font_loc, cur_info_iter->second);
|
new_font_info.is_embeded = false;
|
||||||
|
install_base_font(font, font_loc, new_font_info);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
cerr << "TODO: other font loc" << endl;
|
cerr << "TODO: other font loc" << endl;
|
||||||
|
@ -17,6 +17,7 @@ struct FontInfo
|
|||||||
double space_width;
|
double space_width;
|
||||||
double ascent, descent;
|
double ascent, descent;
|
||||||
bool is_type3;
|
bool is_type3;
|
||||||
|
bool is_embeded;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct HTMLTextState
|
struct HTMLTextState
|
||||||
|
@ -13,7 +13,7 @@ with open('out.html','w') as outf:
|
|||||||
if not f.lower().endswith('.pdf'):
|
if not f.lower().endswith('.pdf'):
|
||||||
continue
|
continue
|
||||||
print f
|
print f
|
||||||
if os.system('pdf2htmlEX -l 10 --no-drm 1 --fit-width 1024 --dest-dir html --auto-hint=1 --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) != 0:
|
if os.system('pdf2htmlEX -l 10 --no-drm 1 --fit-width 1024 --dest-dir html --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) != 0:
|
||||||
print "error on ", f
|
print "error on ", f
|
||||||
sys.exit(-1)
|
sys.exit(-1)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user