mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 04:50:09 +00:00
add option 'decompose-ligature'
This commit is contained in:
parent
61bd8d6919
commit
79778e0ff3
4
TODO
4
TODO
@ -1,13 +1,9 @@
|
|||||||
cmake - pkgconfig - fontforge - python
|
|
||||||
|
|
||||||
fix glyph width as provided in pdf
|
fix glyph width as provided in pdf
|
||||||
|
|
||||||
option to break ligatures
|
option to break ligatures
|
||||||
|
|
||||||
detect duplicate base fonts when embedding
|
detect duplicate base fonts when embedding
|
||||||
|
|
||||||
consider left-shift in optimization
|
|
||||||
|
|
||||||
multiple charcode mapped to a same glyph
|
multiple charcode mapped to a same glyph
|
||||||
re-encoded only used glyphs
|
re-encoded only used glyphs
|
||||||
|
|
||||||
|
11
pdf2htmlEX.1
11
pdf2htmlEX.1
@ -69,7 +69,10 @@ There are several base font defined in PDF standards, which are supposed to be p
|
|||||||
If this switch is on, local matched font will be used and embedded; otherwise only font names are exported such that web browsers may try to find proper fonts themselves.
|
If this switch is on, local matched font will be used and embedded; otherwise only font names are exported such that web browsers may try to find proper fonts themselves.
|
||||||
.TP
|
.TP
|
||||||
.B --embed-external-font <0|1> (Default: 0)
|
.B --embed-external-font <0|1> (Default: 0)
|
||||||
Similar as above but for non-base fonts
|
Similar as above but for non-base fonts.
|
||||||
|
.TP
|
||||||
|
.B --decompose-ligature <0|1> (Default: 0)
|
||||||
|
Decompose ligatures. For example 'fi' -> 'f''i'.
|
||||||
.TP
|
.TP
|
||||||
.B --heps <len>, --veps <len> (Default: 1)
|
.B --heps <len>, --veps <len> (Default: 1)
|
||||||
Specify the maximum tolerable horizontal/vertical offset (in pixels).
|
Specify the maximum tolerable horizontal/vertical offset (in pixels).
|
||||||
@ -77,7 +80,7 @@ Specify the maximum tolerable horizontal/vertical offset (in pixels).
|
|||||||
pdf2htmlEX would try to optimize the generated HTML file moving Text within this distance.
|
pdf2htmlEX would try to optimize the generated HTML file moving Text within this distance.
|
||||||
.TP
|
.TP
|
||||||
.B --space-threshold <ratio> (Default: 1.0/6)
|
.B --space-threshold <ratio> (Default: 1.0/6)
|
||||||
pdf2htmlEX would insert a whitespace character ' ' if the distance between two consecutive letters in the same line is wider than ratio * font_size
|
pdf2htmlEX would insert a whitespace character ' ' if the distance between two consecutive letters in the same line is wider than ratio * font_size.
|
||||||
.TP
|
.TP
|
||||||
.B --font-size-multiplier <ratio> (Default: 10)
|
.B --font-size-multiplier <ratio> (Default: 10)
|
||||||
Many web browsers limit the minimum font size, and many would round the given font size, which results in incorrect rendering.
|
Many web browsers limit the minimum font size, and many would round the given font size, which results in incorrect rendering.
|
||||||
@ -104,10 +107,10 @@ If switched off, intermediate files won't be cleaned in the end.
|
|||||||
Convert file.pdf into file.html
|
Convert file.pdf into file.html
|
||||||
.TP
|
.TP
|
||||||
.B pdf2htmlEX --tmp-dir tmp --clean-tmp 0 --debug 1 /path/to/file.pdf
|
.B pdf2htmlEX --tmp-dir tmp --clean-tmp 0 --debug 1 /path/to/file.pdf
|
||||||
Convert file.pdf and leave all intermediate files
|
Convert file.pdf and leave all intermediate files.
|
||||||
.TP
|
.TP
|
||||||
.B pdf2htmlEX --dest-dir out --single-html 0 --debug 1 /path/to/file.pdf
|
.B pdf2htmlEX --dest-dir out --single-html 0 --debug 1 /path/to/file.pdf
|
||||||
Convert file.pdf into out/file.html and leave font/image files separated
|
Convert file.pdf into out/file.html and leave font/image files separated.
|
||||||
|
|
||||||
.SH COPYRIGHT
|
.SH COPYRIGHT
|
||||||
.PP
|
.PP
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
using boost::algorithm::to_lower;
|
using boost::algorithm::to_lower;
|
||||||
using std::unordered_set;
|
using std::unordered_set;
|
||||||
using std::min;
|
using std::min;
|
||||||
|
using std::all_of;
|
||||||
|
|
||||||
path HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
|
path HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
|
||||||
{
|
{
|
||||||
@ -422,8 +423,15 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
|
|||||||
++nSpaces;
|
++nSpaces;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if((param->decompose_ligature) && all_of(u, u+uLen, isLegalUnicode))
|
||||||
|
{
|
||||||
|
line_buf.append_unicodes(u, uLen);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
Unicode uu = (cur_font_info->use_tounicode ? check_unicode(u, uLen, code, font) : unicode_from_font(code, font));
|
Unicode uu = (cur_font_info->use_tounicode ? check_unicode(u, uLen, code, font) : unicode_from_font(code, font));
|
||||||
line_buf.append_unicodes(&uu, 1);
|
line_buf.append_unicodes(&uu, 1);
|
||||||
|
}
|
||||||
|
|
||||||
dx += dx1;
|
dx += dx1;
|
||||||
dy += dy1;
|
dy += dy1;
|
||||||
|
@ -30,6 +30,7 @@ struct Param
|
|||||||
int single_html;
|
int single_html;
|
||||||
int embed_base_font;
|
int embed_base_font;
|
||||||
int embed_external_font;
|
int embed_external_font;
|
||||||
|
int decompose_ligature;
|
||||||
|
|
||||||
// Advanced tweak
|
// Advanced tweak
|
||||||
double h_eps, v_eps;
|
double h_eps, v_eps;
|
||||||
|
@ -78,6 +78,7 @@ po::variables_map parse_options (int argc, char **argv)
|
|||||||
("single-html", po::value<int>(¶m.single_html)->default_value(1), "combine everything into one single HTML file")
|
("single-html", po::value<int>(¶m.single_html)->default_value(1), "combine everything into one single HTML file")
|
||||||
("embed-base-font", po::value<int>(¶m.embed_base_font)->default_value(0), "embed local matched font for base 14 fonts in the PDF file")
|
("embed-base-font", po::value<int>(¶m.embed_base_font)->default_value(0), "embed local matched font for base 14 fonts in the PDF file")
|
||||||
("embed-external-font", po::value<int>(¶m.embed_external_font)->default_value(0), "embed local matched font for external fonts in the PDF file")
|
("embed-external-font", po::value<int>(¶m.embed_external_font)->default_value(0), "embed local matched font for external fonts in the PDF file")
|
||||||
|
("decompose-ligature", po::value<int>(¶m.decompose_ligature)->default_value(0), "decompose ligatures, for example 'fi' -> 'f''i'")
|
||||||
|
|
||||||
("heps", po::value<double>(¶m.h_eps)->default_value(1.0), "max tolerated horizontal offset (in pixels)")
|
("heps", po::value<double>(¶m.h_eps)->default_value(1.0), "max tolerated horizontal offset (in pixels)")
|
||||||
("veps", po::value<double>(¶m.v_eps)->default_value(1.0), "max tolerated vertical offset (in pixels)")
|
("veps", po::value<double>(¶m.v_eps)->default_value(1.0), "max tolerated vertical offset (in pixels)")
|
||||||
|
Loading…
Reference in New Issue
Block a user