mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 13:00:08 +00:00
working on new --tounicode
This commit is contained in:
parent
634394d771
commit
0eb3c5af99
19
ChangeLog
Normal file
19
ChangeLog
Normal file
@ -0,0 +1,19 @@
|
||||
Latest
|
||||
|
||||
* Removed dependency of boost::format and boost::algorithm
|
||||
* New option --space-as-offset
|
||||
* A font preprocessor, for solving encoding problems
|
||||
* Better HTML optimization, states are reused
|
||||
* HTML should work when Javascript is disabled
|
||||
|
||||
v0.2
|
||||
2012.09.06
|
||||
|
||||
* Fontforge is now linked with, not called with scripts
|
||||
* Better accuracy of rendering, with a new line model
|
||||
* New option --decompose-ligature
|
||||
|
||||
v0.1
|
||||
2012.08.28
|
||||
|
||||
* The first release
|
10
pdf2htmlEX.1
10
pdf2htmlEX.1
@ -89,10 +89,14 @@ Specify a ratio greater than 1 would resolve this issue.
|
||||
|
||||
For some versions of Firefox, however, there will be a problem when the font size is too large, in which case a smaller value should be specified here.
|
||||
.TP
|
||||
.B --always-apply-tounicode <0|1> (Default: 0)
|
||||
A ToUnicode map may be provided for fonts in PDF which indicates the 'meaning' of the characters.
|
||||
.B --tounicode <-1|0|1> (Default: 0)
|
||||
A ToUnicode map may be provided for each font in PDF which indicates the 'meaning' of the characters. However often there is better "ToUnicode" info in Type 0/1 fonts, and sometimes the ToUnicode map provided is wrong.
|
||||
|
||||
However often there is better "ToUnicode" info in Type 0/1 fonts, and sometimes the ToUnicode map provided is wrong. So by default pdf2htmlEX will find the Unicode value directly from the fonts instead of ToUnicode map. This behavior may be changed by turning on this switch.
|
||||
If this value is set to 1, the ToUnicode Map is always applied, if provided in PDF, and characters may not render correctly in HTML if there are collisions.
|
||||
|
||||
If set to -1, a customized map is used such that rendering will be correct in HTML (visually the same), but you may not get correct characters by select & copy & paste.
|
||||
|
||||
If set to 0, pdf2htmlEX would try it best to balance the two methods above.
|
||||
.TP
|
||||
.B --space-as-offset <0|1> (Default: 0)
|
||||
Treat space characters as offsets, which may increase the size of the output.
|
||||
|
@ -36,7 +36,7 @@ struct Param
|
||||
double h_eps, v_eps;
|
||||
double space_threshold;
|
||||
double font_size_multiplier;
|
||||
int always_apply_tounicode;
|
||||
int tounicode;
|
||||
int space_as_offset;
|
||||
|
||||
std::string font_suffix, font_format;
|
||||
|
@ -83,7 +83,7 @@ po::variables_map parse_options (int argc, char **argv)
|
||||
("veps", po::value<double>(¶m.v_eps)->default_value(1.0), "max tolerated vertical offset (in pixels)")
|
||||
("space-threshold", po::value<double>(¶m.space_threshold)->default_value(1.0/8), "distance no thiner than (threshold * em) will be considered as a space character")
|
||||
("font-size-multiplier", po::value<double>(¶m.font_size_multiplier)->default_value(10.0), "setting a value greater than 1 would increase the rendering accuracy")
|
||||
("always-apply-tounicode", po::value<int>(¶m.always_apply_tounicode)->default_value(0), "ToUnicode map is ignore for non-TTF fonts unless this switch is on")
|
||||
("tounicode", po::value<int>(¶m.tounicode)->default_value(0), "Specify how to deal with ToUnicode map, 0 for auto, 1 for forced, -1 for disabled")
|
||||
("space-as-offset", po::value<int>(¶m.space_as_offset)->default_value(0), "treat space characters as offsets")
|
||||
|
||||
("font-suffix", po::value<string>(¶m.font_suffix)->default_value(".ttf"), "suffix for extracted font files")
|
||||
|
Loading…
Reference in New Issue
Block a user