mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-21 20:50:07 +00:00
add option 'decompose-ligature'
This commit is contained in:
parent
61bd8d6919
commit
79778e0ff3
4
TODO
4
TODO
@ -1,13 +1,9 @@
|
||||
cmake - pkgconfig - fontforge - python
|
||||
|
||||
fix glyph width as provided in pdf
|
||||
|
||||
option to break ligatures
|
||||
|
||||
detect duplicate base fonts when embedding
|
||||
|
||||
consider left-shift in optimization
|
||||
|
||||
multiple charcode mapped to a same glyph
|
||||
re-encoded only used glyphs
|
||||
|
||||
|
11
pdf2htmlEX.1
11
pdf2htmlEX.1
@ -69,7 +69,10 @@ There are several base font defined in PDF standards, which are supposed to be p
|
||||
If this switch is on, local matched font will be used and embedded; otherwise only font names are exported such that web browsers may try to find proper fonts themselves.
|
||||
.TP
|
||||
.B --embed-external-font <0|1> (Default: 0)
|
||||
Similar as above but for non-base fonts
|
||||
Similar as above but for non-base fonts.
|
||||
.TP
|
||||
.B --decompose-ligature <0|1> (Default: 0)
|
||||
Decompose ligatures. For example 'fi' -> 'f''i'.
|
||||
.TP
|
||||
.B --heps <len>, --veps <len> (Default: 1)
|
||||
Specify the maximum tolerable horizontal/vertical offset (in pixels).
|
||||
@ -77,7 +80,7 @@ Specify the maximum tolerable horizontal/vertical offset (in pixels).
|
||||
pdf2htmlEX would try to optimize the generated HTML file moving Text within this distance.
|
||||
.TP
|
||||
.B --space-threshold <ratio> (Default: 1.0/6)
|
||||
pdf2htmlEX would insert a whitespace character ' ' if the distance between two consecutive letters in the same line is wider than ratio * font_size
|
||||
pdf2htmlEX would insert a whitespace character ' ' if the distance between two consecutive letters in the same line is wider than ratio * font_size.
|
||||
.TP
|
||||
.B --font-size-multiplier <ratio> (Default: 10)
|
||||
Many web browsers limit the minimum font size, and many would round the given font size, which results in incorrect rendering.
|
||||
@ -104,10 +107,10 @@ If switched off, intermediate files won't be cleaned in the end.
|
||||
Convert file.pdf into file.html
|
||||
.TP
|
||||
.B pdf2htmlEX --tmp-dir tmp --clean-tmp 0 --debug 1 /path/to/file.pdf
|
||||
Convert file.pdf and leave all intermediate files
|
||||
Convert file.pdf and leave all intermediate files.
|
||||
.TP
|
||||
.B pdf2htmlEX --dest-dir out --single-html 0 --debug 1 /path/to/file.pdf
|
||||
Convert file.pdf into out/file.html and leave font/image files separated
|
||||
Convert file.pdf into out/file.html and leave font/image files separated.
|
||||
|
||||
.SH COPYRIGHT
|
||||
.PP
|
||||
|
@ -26,6 +26,7 @@
|
||||
using boost::algorithm::to_lower;
|
||||
using std::unordered_set;
|
||||
using std::min;
|
||||
using std::all_of;
|
||||
|
||||
path HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
|
||||
{
|
||||
@ -422,8 +423,15 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
|
||||
++nSpaces;
|
||||
}
|
||||
|
||||
Unicode uu = (cur_font_info->use_tounicode ? check_unicode(u, uLen, code, font) : unicode_from_font(code, font));
|
||||
line_buf.append_unicodes(&uu, 1);
|
||||
if((param->decompose_ligature) && all_of(u, u+uLen, isLegalUnicode))
|
||||
{
|
||||
line_buf.append_unicodes(u, uLen);
|
||||
}
|
||||
else
|
||||
{
|
||||
Unicode uu = (cur_font_info->use_tounicode ? check_unicode(u, uLen, code, font) : unicode_from_font(code, font));
|
||||
line_buf.append_unicodes(&uu, 1);
|
||||
}
|
||||
|
||||
dx += dx1;
|
||||
dy += dy1;
|
||||
|
@ -30,6 +30,7 @@ struct Param
|
||||
int single_html;
|
||||
int embed_base_font;
|
||||
int embed_external_font;
|
||||
int decompose_ligature;
|
||||
|
||||
// Advanced tweak
|
||||
double h_eps, v_eps;
|
||||
|
@ -78,6 +78,7 @@ po::variables_map parse_options (int argc, char **argv)
|
||||
("single-html", po::value<int>(¶m.single_html)->default_value(1), "combine everything into one single HTML file")
|
||||
("embed-base-font", po::value<int>(¶m.embed_base_font)->default_value(0), "embed local matched font for base 14 fonts in the PDF file")
|
||||
("embed-external-font", po::value<int>(¶m.embed_external_font)->default_value(0), "embed local matched font for external fonts in the PDF file")
|
||||
("decompose-ligature", po::value<int>(¶m.decompose_ligature)->default_value(0), "decompose ligatures, for example 'fi' -> 'f''i'")
|
||||
|
||||
("heps", po::value<double>(¶m.h_eps)->default_value(1.0), "max tolerated horizontal offset (in pixels)")
|
||||
("veps", po::value<double>(¶m.v_eps)->default_value(1.0), "max tolerated vertical offset (in pixels)")
|
||||
|
Loading…
Reference in New Issue
Block a user