diff --git a/ChangeLog b/ChangeLog index aa4ae51..6d6d3dd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,7 @@ Latest v0.5 +* New options: --stretch-narrow-glyph, --squeeze-wide-glyph + v0.4 2012.09.26 diff --git a/README.md b/README.md index a4f5380..63952e5 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ ### [**Donate Now**](http://coolwanglu.github.com/pdf2htmlEX/donate.html) +### [**Feature Commision**](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ#wiki-feature_commission) are now accepted. + A beautiful demo is worth a thousand words: [**Typography**](http://coolwanglu.github.com/pdf2htmlEX/demo/geneve.html) [Original](https://github.com/raphink/geneve_1564/raw/master/geneve_1564.pdf) diff --git a/TODO b/TODO index fba441a..faf9ff4 100644 --- a/TODO +++ b/TODO @@ -1,6 +1,18 @@ + +about glyph width: + - IE + - stretching ratio might not be correct.. letter 'f' + +draw lines with CSS + +create a glyph for ' ', if there is not in a font + +position history stack (popstate) + +==Wait until someone asks== + try harder finding glyph names (using fontforge) for CID Type 0 rename single-html -> embed-font/image/css ... -create a glyph for ' ', if there is not in a font merge sub/sup into one line bug found in baidu(ubuntu...) diff --git a/pdf2htmlEX.1.in b/pdf2htmlEX.1.in index e84d668..f708532 100644 --- a/pdf2htmlEX.1.in +++ b/pdf2htmlEX.1.in @@ -35,7 +35,7 @@ Specify owner password .B -u, --user-password Specify user password .TP -.B --dest-dir (Default: ".") +.B --dest-dir (Default: .) Specify destination folder .TP .B --data-dir (Default: @CMAKE_INSTALL_PREFIX@/share/pdf2htmlEX) @@ -117,15 +117,21 @@ Treat space characters as offsets, which may increase the size of the output. Turn it on if space characters are not displayed correctly, or you want to remove positional spaces. .TP -.B --css-filename (Default: "") +.B --stretch-narrow-glyph <0|1> (Default: 0) +If set to 1, glyphs narrower than described in PDF will be strecth; otherwise space will be padded to the right of the glyphs +.TP +.B --squeeze_wide_glyph <0|1> (Default: 0) +If set to 1, glyphs wider than described in PDF will be squeezed; otherwise it will be truncated. +.TP +.B --css-filename (Default: ) Specify the filename of the generated css file, if not embedded. If it's empty, the file name will be determined automatically. .TP -.B --font-suffix (Default: ".ttf"), --font-format (Default: "truetype") +.B --font-suffix (Default: .ttf), --font-format (Default: truetype) Specify the suffix and format of fonts extracted from the PDF file. They should be consistent. .TP -.B --external-hint-tool (Default: "") +.B --external-hint-tool (Default: ) If specified, the tool will be called in order to enhanced hinting for fonts, this will precede --auto-hint. The tool will be called as ' ', where suffix will be the same as specified for --font-suffix. @@ -141,10 +147,10 @@ If switched off, intermediate files won't be cleaned in the end. .B pdf2htmlEX /path/to/file.pdf Convert file.pdf into file.html .TP -.B pdf2htmlEX --tmp-dir tmp --clean-tmp 0 --debug 1 /path/to/file.pdf +.B pdf2htmlEX --clean-tmp 0 --debug 1 /path/to/file.pdf Convert file.pdf and leave all intermediate files. .TP -.B pdf2htmlEX --dest-dir out --single-html 0 --debug 1 /path/to/file.pdf +.B pdf2htmlEX --dest-dir out --single-html 0 /path/to/file.pdf Convert file.pdf into out/file.html and leave font/image files separated. .SH COPYRIGHT diff --git a/src/HTMLRenderer/install.cc b/src/HTMLRenderer/install.cc index 81de4bd..e2bd654 100644 --- a/src/HTMLRenderer/install.cc +++ b/src/HTMLRenderer/install.cc @@ -11,8 +11,9 @@ #include #include -#include "Param.h" +#include +#include "Param.h" #include "HTMLRenderer.h" #include "namespace.h" #include "util.h" @@ -114,7 +115,7 @@ void HTMLRenderer::install_base_font(GfxFont * font, GfxFontLoc * font_loc, Font { if(localfontloc != nullptr) { - embed_font(string(localfontloc->path->getCString()), font, info); + embed_font(localfontloc->path->getCString(), font, info); export_remote_font(info, param->font_suffix, param->font_format, font); delete localfontloc; return; diff --git a/src/HTMLRenderer/text.cc b/src/HTMLRenderer/text.cc index 2750626..47a24ec 100644 --- a/src/HTMLRenderer/text.cc +++ b/src/HTMLRenderer/text.cc @@ -25,7 +25,7 @@ namespace pdf2htmlEX { using std::unordered_set; using std::min; using std::all_of; -using std::round; +using std::floor; using std::swap; string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id) @@ -189,14 +189,17 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo info.em_size = ffw_get_em_size(); if(get_metric_only) + { + ffw_metric(&info.ascent, &info.descent); + ffw_close(); return; + } used_map = preprocessor.get_code_map(hash_ref(font->getID())); /* * Step 1 - * dump the font file directly from the font descriptor and put the glyphs into the correct slots - * + * dump the font file directly from the font descriptor and put the glyphs into the correct slots * * for 8bit + nonTrueType * re-encoding the font using a PostScript encoding list (glyph id <-> glpyh name) * @@ -384,19 +387,19 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo if(font_8bit) { - width_list[k] = (int)round(font_8bit->getWidth(i) * info.em_size); + width_list[k] = (int)floor(font_8bit->getWidth(i) * info.em_size + 0.5); } else { char buf[2]; buf[0] = (i >> 8) & 0xff; buf[1] = (i & 0xff); - width_list[k] = (int)round(font_cid->getWidth(buf, 2) * info.em_size); + width_list[k] = (int)floor(font_cid->getWidth(buf, 2) * info.em_size + 0.5); } } + ffw_set_widths(width_list, max_key + 1, param->stretch_narrow_glyph, param->squeeze_wide_glyph); ffw_reencode_raw(cur_mapping, max_key + 1, 1); - ffw_set_widths(width_list, max_key + 1); if(ctu) ctu->decRefCnt(); @@ -458,6 +461,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo ffw_load_font(cur_tmp_fn.c_str()); ffw_metric(&info.ascent, &info.descent); ffw_save(fn.c_str()); + ffw_close(); } @@ -522,14 +526,22 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s) } else { - if((param->decompose_ligature) && all_of(u, u+uLen, isLegalUnicode)) + if((param->decompose_ligature) && (uLen > 1) && all_of(u, u+uLen, isLegalUnicode)) { line_buf.append_unicodes(u, uLen); } else { - Unicode uu = (cur_font_info->use_tounicode ? check_unicode(u, uLen, code, font) : unicode_from_font(code, font)); - line_buf.append_unicodes(&uu, 1); + if(cur_font_info->use_tounicode) + { + Unicode uu = check_unicode(u, uLen, code, font); + line_buf.append_unicodes(&uu, 1); + } + else + { + Unicode uu = unicode_from_font(code, font); + line_buf.append_unicodes(&uu, 1); + } } } diff --git a/src/ffw.c b/src/ffw.c index e52e29e..dc8e913 100644 --- a/src/ffw.c +++ b/src/ffw.c @@ -19,6 +19,8 @@ #include "ffw.h" +static real EPS=1e-6; + static inline int min(int a, int b) { return (a 0) d = 0; + /* sf->ascent = min(a, em); sf->descent = em - bb.maxy; + */ info->os2_winascent = a; info->os2_typoascent = a; @@ -292,8 +296,17 @@ void ffw_metric(double * ascent, double * descent) /* * TODO:bitmap, reference have not been considered in this function */ -void ffw_set_widths(int * width_list, int mapping_len) +void ffw_set_widths(int * width_list, int mapping_len, int stretch_narrow, int squeeze_wide) { + /* + * Disabled, because it causes crashing + + memset(cur_fv->selected, 1, cur_fv->map->enccount); + // remove kern + FVRemoveKerns(cur_fv); + FVRemoveVKerns(cur_fv); + */ + SplineFont * sf = cur_fv->sf; if(sf->onlybitmaps @@ -319,6 +332,20 @@ void ffw_set_widths(int * width_list, int mapping_len) SplineChar * sc = sf->glyphs[j]; if(sc == NULL) continue; + DBounds bb; + SplineCharFindBounds(sc, &bb); + + double glyph_width = bb.maxx - bb.minx; + if((glyph_width > EPS) + && (((glyph_width > width_list[i] + EPS) && (squeeze_wide)) + || ((glyph_width < width_list[i] - EPS) && (stretch_narrow)))) + { + real transform[6]; transform[0] = ((double)width_list[i]) / glyph_width; + transform[3] = 1.0; + transform[1] = transform[2] = transform[4] = transform[5] = 0; + FVTrans(cur_fv, sc, transform, NULL, fvt_alllayers | fvt_dontmovewidth); + } + sc->width = width_list[i]; } } diff --git a/src/include/Param.h b/src/include/Param.h index 02548dc..db90188 100644 --- a/src/include/Param.h +++ b/src/include/Param.h @@ -40,9 +40,13 @@ struct Param double h_eps, v_eps; double space_threshold; double font_size_multiplier; + int auto_hint; int tounicode; int space_as_offset; + + int stretch_narrow_glyph; + int squeeze_wide_glyph; std::string css_filename; std::string font_suffix, font_format; diff --git a/src/include/ffw.h b/src/include/ffw.h index 6a1f27a..939f241 100644 --- a/src/include/ffw.h +++ b/src/include/ffw.h @@ -34,7 +34,7 @@ int ffw_get_em_size(void); // fix metrics and get them void ffw_metric(double * ascent, double * descent); -void ffw_set_widths(int * width_list, int mapping_len); +void ffw_set_widths(int * width_list, int mapping_len, int stretch_narrow, int squeeze_wide); void ffw_auto_hint(void); diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 5197026..c712df0 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -41,6 +41,8 @@ void show_usage_and_exit(const char * dummy = nullptr) cerr << "Options:" << endl; argparser.show_usage(cerr); cerr << endl; + cerr << "Run 'man pdf2htmlEX' for detailed information" << endl; + cerr << endl; exit(EXIT_FAILURE); } @@ -79,6 +81,8 @@ void parse_options (int argc, char **argv) .add("auto-hint", ¶m.auto_hint, 0, "Whether to generate hints for fonts") .add("tounicode", ¶m.tounicode, 0, "Specify how to deal with ToUnicode map, 0 for auto, 1 for forced, -1 for disabled") .add("space-as-offset", ¶m.space_as_offset, 0, "treat space characters as offsets") + .add("stretch_narrow_glyph", ¶m.stretch_narrow_glyph, 0, "stretch narrow glyphs instead of padding space") + .add("squeeze_wide_glyph", ¶m.squeeze_wide_glyph, 0, "squeeze wide glyphs instead of truncating") .add("css-filename", ¶m.css_filename, "", "Specify the file name of the generated css file") .add("font-suffix", ¶m.font_suffix, ".ttf", "suffix for extracted font files")