mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 04:50:09 +00:00
add an option 'optimize'
This commit is contained in:
parent
bbe9b99b4e
commit
2d558253a5
2
TODO
2
TODO
@ -6,7 +6,7 @@ option to break ligatures
|
|||||||
|
|
||||||
detect duplicate base fonts when embedding
|
detect duplicate base fonts when embedding
|
||||||
|
|
||||||
compress div/span states
|
consider left-shift in optimization
|
||||||
|
|
||||||
multiple charcode mapped to a same glyph
|
multiple charcode mapped to a same glyph
|
||||||
re-encoded only used glyphs
|
re-encoded only used glyphs
|
||||||
|
@ -89,6 +89,9 @@ A ToUnicode map may be provided for fonts in PDF which indicates the 'meaning' o
|
|||||||
|
|
||||||
However often there is better "ToUnicode" info in Type 1 fonts, and sometimes the ToUnicode map provided is wrong. So by default pdf2htmlEX will find the Unicode value directly from the fonts instead of ToUnicode map. This behavior may be changed by turning on this switch.
|
However often there is better "ToUnicode" info in Type 1 fonts, and sometimes the ToUnicode map provided is wrong. So by default pdf2htmlEX will find the Unicode value directly from the fonts instead of ToUnicode map. This behavior may be changed by turning on this switch.
|
||||||
.TP
|
.TP
|
||||||
|
.B --optimize <0|1> (Default: 0)
|
||||||
|
Try to optimize the output HTML file, might be slow.
|
||||||
|
.TP
|
||||||
.B --font-suffix <suffix> (Default: ".ttf"), --font-format <format> (Default: "truetype")
|
.B --font-suffix <suffix> (Default: ".ttf"), --font-format <format> (Default: "truetype")
|
||||||
Specify the suffix and format of fonts extracted from the PDF file. They should be consistent.
|
Specify the suffix and format of fonts extracted from the PDF file. They should be consistent.
|
||||||
.TP
|
.TP
|
||||||
|
@ -66,14 +66,14 @@ void HTMLRenderer::LineBuffer::flush(void)
|
|||||||
for(auto & s : states)
|
for(auto & s : states)
|
||||||
s.hash();
|
s.hash();
|
||||||
|
|
||||||
if(states.size() < 3)
|
if((renderer->param->optimize) && (states.size() > 2))
|
||||||
{
|
{
|
||||||
for(size_t i = 0; i < states.size(); ++i)
|
optimize_states();
|
||||||
states[i].depth = i;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
optimize_states();
|
for(size_t i = 0; i < states.size(); ++i)
|
||||||
|
states[i].depth = i;
|
||||||
}
|
}
|
||||||
|
|
||||||
states.resize(states.size() + 1);
|
states.resize(states.size() + 1);
|
||||||
@ -203,6 +203,10 @@ void HTMLRenderer::LineBuffer::optimize_states (void)
|
|||||||
p += (incre--);
|
p += (incre--);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// depth 0
|
||||||
|
for(int i = 0; i < n; ++i)
|
||||||
|
flattened_dp_buffer[i].min_cost = 0;
|
||||||
|
|
||||||
int last_at_this_depth = n;
|
int last_at_this_depth = n;
|
||||||
for(int depth = 1; depth < n; ++depth)
|
for(int depth = 1; depth < n; ++depth)
|
||||||
@ -240,8 +244,8 @@ void HTMLRenderer::LineBuffer::optimize_states (void)
|
|||||||
while(depth > 0)
|
while(depth > 0)
|
||||||
{
|
{
|
||||||
int last_child = dp_buffer[depth][idx].last_child;
|
int last_child = dp_buffer[depth][idx].last_child;
|
||||||
assert(last_child > idx);
|
assert((last_child > idx) && (last_child <= idx + depth));
|
||||||
func(last_child, depth - last_child, tree_depth + 1);
|
func(last_child, idx + depth - last_child, tree_depth + 1);
|
||||||
depth = last_child - idx - 1;
|
depth = last_child - idx - 1;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -310,7 +314,7 @@ int HTMLRenderer::LineBuffer::State::diff(const State & s) const
|
|||||||
/*
|
/*
|
||||||
* A quick check based on hash_value
|
* A quick check based on hash_value
|
||||||
* it could be wrong when there are more then 256 classes,
|
* it could be wrong when there are more then 256 classes,
|
||||||
* in which case the output may not be optimal, but still 'correct'
|
* in which case the output may not be optimal, but still 'correct' in terms of HTML
|
||||||
*/
|
*/
|
||||||
if(hash_value == s.hash_value) return 0;
|
if(hash_value == s.hash_value) return 0;
|
||||||
|
|
||||||
|
@ -36,6 +36,7 @@ struct Param
|
|||||||
double space_threshold;
|
double space_threshold;
|
||||||
double font_size_multiplier;
|
double font_size_multiplier;
|
||||||
int always_apply_tounicode;
|
int always_apply_tounicode;
|
||||||
|
int optimize;
|
||||||
|
|
||||||
std::string font_suffix, font_format;
|
std::string font_suffix, font_format;
|
||||||
|
|
||||||
|
@ -84,6 +84,7 @@ po::variables_map parse_options (int argc, char **argv)
|
|||||||
("space-threshold", po::value<double>(¶m.space_threshold)->default_value(1.0/6), "distance no thiner than (threshold * em) will be considered as a space character")
|
("space-threshold", po::value<double>(¶m.space_threshold)->default_value(1.0/6), "distance no thiner than (threshold * em) will be considered as a space character")
|
||||||
("font-size-multiplier", po::value<double>(¶m.font_size_multiplier)->default_value(10.0), "setting a value greater than 1 would increase the rendering accuracy")
|
("font-size-multiplier", po::value<double>(¶m.font_size_multiplier)->default_value(10.0), "setting a value greater than 1 would increase the rendering accuracy")
|
||||||
("always-apply-tounicode", po::value<int>(¶m.always_apply_tounicode)->default_value(0), "ToUnicode map is ignore for non-TTF fonts unless this switch is on")
|
("always-apply-tounicode", po::value<int>(¶m.always_apply_tounicode)->default_value(0), "ToUnicode map is ignore for non-TTF fonts unless this switch is on")
|
||||||
|
("optimize", po::value<int>(¶m.optimize)->default_value(0), "Optimize HTML, might be very slow")
|
||||||
|
|
||||||
("font-suffix", po::value<string>(¶m.font_suffix)->default_value(".ttf"), "suffix for extracted font files")
|
("font-suffix", po::value<string>(¶m.font_suffix)->default_value(".ttf"), "suffix for extracted font files")
|
||||||
("font-format", po::value<string>(¶m.font_format)->default_value("truetype"), "format for extracted font files")
|
("font-format", po::value<string>(¶m.font_format)->default_value("truetype"), "format for extracted font files")
|
||||||
|
Loading…
Reference in New Issue
Block a user