1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 04:50:09 +00:00

add option 'decompose-ligature'

This commit is contained in:
Lu Wang 2012-09-06 15:09:47 +08:00
parent 61bd8d6919
commit 79778e0ff3
5 changed files with 19 additions and 10 deletions

4
TODO
View File

@ -1,13 +1,9 @@
cmake - pkgconfig - fontforge - python
fix glyph width as provided in pdf
option to break ligatures
detect duplicate base fonts when embedding
consider left-shift in optimization
multiple charcode mapped to a same glyph
re-encoded only used glyphs

View File

@ -69,7 +69,10 @@ There are several base font defined in PDF standards, which are supposed to be p
If this switch is on, local matched font will be used and embedded; otherwise only font names are exported such that web browsers may try to find proper fonts themselves.
.TP
.B --embed-external-font <0|1> (Default: 0)
Similar as above but for non-base fonts
Similar as above but for non-base fonts.
.TP
.B --decompose-ligature <0|1> (Default: 0)
Decompose ligatures. For example 'fi' -> 'f''i'.
.TP
.B --heps <len>, --veps <len> (Default: 1)
Specify the maximum tolerable horizontal/vertical offset (in pixels).
@ -77,7 +80,7 @@ Specify the maximum tolerable horizontal/vertical offset (in pixels).
pdf2htmlEX would try to optimize the generated HTML file moving Text within this distance.
.TP
.B --space-threshold <ratio> (Default: 1.0/6)
pdf2htmlEX would insert a whitespace character ' ' if the distance between two consecutive letters in the same line is wider than ratio * font_size
pdf2htmlEX would insert a whitespace character ' ' if the distance between two consecutive letters in the same line is wider than ratio * font_size.
.TP
.B --font-size-multiplier <ratio> (Default: 10)
Many web browsers limit the minimum font size, and many would round the given font size, which results in incorrect rendering.
@ -104,10 +107,10 @@ If switched off, intermediate files won't be cleaned in the end.
Convert file.pdf into file.html
.TP
.B pdf2htmlEX --tmp-dir tmp --clean-tmp 0 --debug 1 /path/to/file.pdf
Convert file.pdf and leave all intermediate files
Convert file.pdf and leave all intermediate files.
.TP
.B pdf2htmlEX --dest-dir out --single-html 0 --debug 1 /path/to/file.pdf
Convert file.pdf into out/file.html and leave font/image files separated
Convert file.pdf into out/file.html and leave font/image files separated.
.SH COPYRIGHT
.PP

View File

@ -26,6 +26,7 @@
using boost::algorithm::to_lower;
using std::unordered_set;
using std::min;
using std::all_of;
path HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
{
@ -422,8 +423,15 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
++nSpaces;
}
Unicode uu = (cur_font_info->use_tounicode ? check_unicode(u, uLen, code, font) : unicode_from_font(code, font));
line_buf.append_unicodes(&uu, 1);
if((param->decompose_ligature) && all_of(u, u+uLen, isLegalUnicode))
{
line_buf.append_unicodes(u, uLen);
}
else
{
Unicode uu = (cur_font_info->use_tounicode ? check_unicode(u, uLen, code, font) : unicode_from_font(code, font));
line_buf.append_unicodes(&uu, 1);
}
dx += dx1;
dy += dy1;

View File

@ -30,6 +30,7 @@ struct Param
int single_html;
int embed_base_font;
int embed_external_font;
int decompose_ligature;
// Advanced tweak
double h_eps, v_eps;

View File

@ -78,6 +78,7 @@ po::variables_map parse_options (int argc, char **argv)
("single-html", po::value<int>(&param.single_html)->default_value(1), "combine everything into one single HTML file")
("embed-base-font", po::value<int>(&param.embed_base_font)->default_value(0), "embed local matched font for base 14 fonts in the PDF file")
("embed-external-font", po::value<int>(&param.embed_external_font)->default_value(0), "embed local matched font for external fonts in the PDF file")
("decompose-ligature", po::value<int>(&param.decompose_ligature)->default_value(0), "decompose ligatures, for example 'fi' -> 'f''i'")
("heps", po::value<double>(&param.h_eps)->default_value(1.0), "max tolerated horizontal offset (in pixels)")
("veps", po::value<double>(&param.v_eps)->default_value(1.0), "max tolerated vertical offset (in pixels)")