From 9d69539fe558a94a0275c559c413cb8a6737af47 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Fri, 3 May 2013 01:47:55 +0800 Subject: [PATCH] do not reencode external fonts --- TODO | 3 --- src/HTMLRenderer/font.cc | 27 +++++++++++++++++++-------- src/HTMLState.h | 1 + test/test.py | 2 +- 4 files changed, 21 insertions(+), 12 deletions(-) diff --git a/TODO b/TODO index 1413a61..37db7fe 100644 --- a/TODO +++ b/TODO @@ -1,7 +1,4 @@ -pc1.pdf -jyb.pdf cjkmix2*.pdf -Sample4.pdf - optimization levels - don't dump image when it is empty diff --git a/src/HTMLRenderer/font.cc b/src/HTMLRenderer/font.cc index 2a075d4..a02a23d 100644 --- a/src/HTMLRenderer/font.cc +++ b/src/HTMLRenderer/font.cc @@ -45,7 +45,7 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id) try { - // mupdf consulted + // inspired by mupdf string subtype; auto * id = font->getID(); @@ -448,7 +448,14 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo ffw_set_widths(width_list, max_key + 1, param.stretch_narrow_glyph, param.squeeze_wide_glyph, param.remove_unused_glyph); - ffw_reencode_raw(cur_mapping, max_key + 1, 1); + /* + * Reencoding is not likely to work for external fonts, + * Just hope that we can get correct Unicode values, and let the browser choose the correct glyphs + * + * TODO: maybe add an option here, but anyway according to the DPF spec, this is implement-dependent + */ + if(info.is_embeded) + ffw_reencode_raw(cur_mapping, max_key + 1, 1); // In some space offsets in HTML, we insert a ' ' there in order to improve text copy&paste // We need to make sure that ' ' is in the font, otherwise it would be very ugly if you select the text @@ -467,7 +474,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo ffw_add_empty_char((int32_t)' ', (int)floor(info.space_width * info.em_size + 0.5)); if(param.debug) { - cerr << "Missing space width in font: " << hex << info.id << " set to " << dec << info.space_width << endl; + cerr << "Missing space width in font " << hex << info.id << ": set to " << dec << info.space_width << endl; } } @@ -556,6 +563,7 @@ const FontInfo * HTMLRenderer::install_font(GfxFont * font) FontInfo & new_font_info = cur_info_iter->second; new_font_info.id = new_fn_id; new_font_info.use_tounicode = true; + new_font_info.is_embeded = false; if(font == nullptr) { @@ -582,12 +590,12 @@ const FontInfo * HTMLRenderer::install_font(GfxFont * font) if(font->getType() == fontType3) { cerr << "Type 3 fonts are unsupported and will be rendered as Image" << endl; export_remote_default_font(new_fn_id); - return &(cur_info_iter->second); + return &new_font_info; } if(font->getWMode()) { cerr << "Writing mode is unsupported and will be rendered as Image" << endl; export_remote_default_font(new_fn_id); - return &(cur_info_iter->second); + return &new_font_info; } if(auto * font_loc = font->locateFont(xref, gTrue)) @@ -595,13 +603,16 @@ const FontInfo * HTMLRenderer::install_font(GfxFont * font) switch(font_loc -> locType) { case gfxFontLocEmbedded: - install_embedded_font(font, cur_info_iter->second); + new_font_info.is_embeded = true; + install_embedded_font(font, new_font_info); break; case gfxFontLocExternal: - install_external_font(font, cur_info_iter->second); + new_font_info.is_embeded = false; + install_external_font(font, new_font_info); break; case gfxFontLocResident: - install_base_font(font, font_loc, cur_info_iter->second); + new_font_info.is_embeded = false; + install_base_font(font, font_loc, new_font_info); break; default: cerr << "TODO: other font loc" << endl; diff --git a/src/HTMLState.h b/src/HTMLState.h index f5e07e0..bfda4e5 100644 --- a/src/HTMLState.h +++ b/src/HTMLState.h @@ -17,6 +17,7 @@ struct FontInfo double space_width; double ascent, descent; bool is_type3; + bool is_embeded; }; struct HTMLTextState diff --git a/test/test.py b/test/test.py index 7978341..021dfe7 100755 --- a/test/test.py +++ b/test/test.py @@ -13,7 +13,7 @@ with open('out.html','w') as outf: if not f.lower().endswith('.pdf'): continue print f - if os.system('pdf2htmlEX -l 10 --no-drm 1 --fit-width 1024 --dest-dir html --auto-hint=1 --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) != 0: + if os.system('pdf2htmlEX -l 10 --no-drm 1 --fit-width 1024 --dest-dir html --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) != 0: print "error on ", f sys.exit(-1)