1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 13:00:08 +00:00

add --optimize-text; make --space-as-offse default

This commit is contained in:
Lu Wang 2013-04-03 09:06:32 +08:00
parent e3eff29dec
commit 2c96e52626
4 changed files with 11 additions and 2 deletions

View File

@ -188,7 +188,7 @@ Specify a ratio greater than 1 would resolve this issue, however it might freeze
For some versions of Firefox, however, there will be a problem when the font size is too large, in which case a smaller value should be specified here.
.TP
.B --space-as-offset <0|1> (Default: 0)
.B --space-as-offset <0|1> (Default: 1)
Treat space characters as offsets, which may increase the size of the output.
Turn it on if space characters are not displayed correctly, or you want to remove positional spaces.
@ -203,6 +203,10 @@ If set to -1, a customized map is used such that rendering will be correct in HT
If set to 0, pdf2htmlEX would try its best to balance the two methods above.
.TP
.B --optimize-text <0|1> (Deafult: 1)
If set to 1, pdf2htmlEX will try to reduce the number of HTML elements used for text. Turn it off if anything goes wrong.
.SS PDF Protection
.TP

View File

@ -237,6 +237,9 @@ void HTMLRenderer::TextLineBuffer::set_state (State & state)
void HTMLRenderer::TextLineBuffer::optimize(void)
{
if(!(renderer->param->optimize_text))
return;
assert(!states.empty());
auto offset_iter = offsets.begin();

View File

@ -51,6 +51,7 @@ struct Param
double font_size_multiplier;
int space_as_offset;
int tounicode;
int optimize_text;
// encryption
std::string owner_password, user_password;

View File

@ -93,8 +93,9 @@ void parse_options (int argc, char **argv)
.add("veps", &param.v_eps, 1.0, "vertical threshold for merging text, in pixels")
.add("space-threshold", &param.space_threshold, (1.0/8), "word break threshold (threshold * em)")
.add("font-size-multiplier", &param.font_size_multiplier, 4.0, "a value greater than 1 increases the rendering accuracy")
.add("space-as-offset", &param.space_as_offset, 0, "treat space characters as offsets")
.add("space-as-offset", &param.space_as_offset, 1, "treat space characters as offsets")
.add("tounicode", &param.tounicode, 0, "how to handle ToUnicode CMaps (0=auto, 1=force, -1=ignore)")
.add("optimize-text", &param.optimize_text, 1, "try to reduce the number of HTML elements used for text")
// encryption
.add("owner-password,o", &param.owner_password, "", "owner password (for encrypted files)", nullptr, true)