1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-07-03 08:38:39 +00:00

Merge branch 'devv'

Conflicts:
	README.md
This commit is contained in:
Lu Wang 2012-10-01 18:38:47 +08:00
commit 3b54f1a019
10 changed files with 90 additions and 20 deletions

View File

@ -1,5 +1,7 @@
Latest v0.5
* New options: --stretch-narrow-glyph, --squeeze-wide-glyph
v0.4
2012.09.26

View File

@ -2,6 +2,8 @@
### [**Donate Now**](http://coolwanglu.github.com/pdf2htmlEX/donate.html)
### [**Feature Commision**](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ#wiki-feature_commission) are now accepted.
A beautiful demo is worth a thousand words:
[**Typography**](http://coolwanglu.github.com/pdf2htmlEX/demo/geneve.html) [Original](https://github.com/raphink/geneve_1564/raw/master/geneve_1564.pdf)

14
TODO
View File

@ -1,6 +1,18 @@
about glyph width:
- IE
- stretching ratio might not be correct.. letter 'f'
draw lines with CSS
create a glyph for ' ', if there is not in a font
position history stack (popstate)
==Wait until someone asks==
try harder finding glyph names (using fontforge) for CID Type 0
rename single-html -> embed-font/image/css ...
create a glyph for ' ', if there is not in a font
merge sub/sup into one line
bug found in baidu(ubuntu...)

View File

@ -35,7 +35,7 @@ Specify owner password
.B -u, --user-password <password>
Specify user password
.TP
.B --dest-dir <dir> (Default: ".")
.B --dest-dir <dir> (Default: .)
Specify destination folder
.TP
.B --data-dir <dir> (Default: @CMAKE_INSTALL_PREFIX@/share/pdf2htmlEX)
@ -117,15 +117,21 @@ Treat space characters as offsets, which may increase the size of the output.
Turn it on if space characters are not displayed correctly, or you want to remove positional spaces.
.TP
.B --css-filename <filename> (Default: "")
.B --stretch-narrow-glyph <0|1> (Default: 0)
If set to 1, glyphs narrower than described in PDF will be strecth; otherwise space will be padded to the right of the glyphs
.TP
.B --squeeze_wide_glyph <0|1> (Default: 0)
If set to 1, glyphs wider than described in PDF will be squeezed; otherwise it will be truncated.
.TP
.B --css-filename <filename> (Default: <none>)
Specify the filename of the generated css file, if not embedded.
If it's empty, the file name will be determined automatically.
.TP
.B --font-suffix <suffix> (Default: ".ttf"), --font-format <format> (Default: "truetype")
.B --font-suffix <suffix> (Default: .ttf), --font-format <format> (Default: truetype)
Specify the suffix and format of fonts extracted from the PDF file. They should be consistent.
.TP
.B --external-hint-tool <tool> (Default: "")
.B --external-hint-tool <tool> (Default: <none>)
If specified, the tool will be called in order to enhanced hinting for fonts, this will precede --auto-hint.
The tool will be called as '<tool> <in.suffix> <out.suffix>', where suffix will be the same as specified for --font-suffix.
@ -141,10 +147,10 @@ If switched off, intermediate files won't be cleaned in the end.
.B pdf2htmlEX /path/to/file.pdf
Convert file.pdf into file.html
.TP
.B pdf2htmlEX --tmp-dir tmp --clean-tmp 0 --debug 1 /path/to/file.pdf
.B pdf2htmlEX --clean-tmp 0 --debug 1 /path/to/file.pdf
Convert file.pdf and leave all intermediate files.
.TP
.B pdf2htmlEX --dest-dir out --single-html 0 --debug 1 /path/to/file.pdf
.B pdf2htmlEX --dest-dir out --single-html 0 /path/to/file.pdf
Convert file.pdf into out/file.html and leave font/image files separated.
.SH COPYRIGHT

View File

@ -11,8 +11,9 @@
#include <cmath>
#include <algorithm>
#include "Param.h"
#include <GlobalParams.h>
#include "Param.h"
#include "HTMLRenderer.h"
#include "namespace.h"
#include "util.h"
@ -114,7 +115,7 @@ void HTMLRenderer::install_base_font(GfxFont * font, GfxFontLoc * font_loc, Font
{
if(localfontloc != nullptr)
{
embed_font(string(localfontloc->path->getCString()), font, info);
embed_font(localfontloc->path->getCString(), font, info);
export_remote_font(info, param->font_suffix, param->font_format, font);
delete localfontloc;
return;

View File

@ -25,7 +25,7 @@ namespace pdf2htmlEX {
using std::unordered_set;
using std::min;
using std::all_of;
using std::round;
using std::floor;
using std::swap;
string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
@ -189,14 +189,17 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
info.em_size = ffw_get_em_size();
if(get_metric_only)
{
ffw_metric(&info.ascent, &info.descent);
ffw_close();
return;
}
used_map = preprocessor.get_code_map(hash_ref(font->getID()));
/*
* Step 1
* dump the font file directly from the font descriptor and put the glyphs into the correct slots
*
* dump the font file directly from the font descriptor and put the glyphs into the correct slots *
* for 8bit + nonTrueType
* re-encoding the font using a PostScript encoding list (glyph id <-> glpyh name)
*
@ -384,19 +387,19 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
if(font_8bit)
{
width_list[k] = (int)round(font_8bit->getWidth(i) * info.em_size);
width_list[k] = (int)floor(font_8bit->getWidth(i) * info.em_size + 0.5);
}
else
{
char buf[2];
buf[0] = (i >> 8) & 0xff;
buf[1] = (i & 0xff);
width_list[k] = (int)round(font_cid->getWidth(buf, 2) * info.em_size);
width_list[k] = (int)floor(font_cid->getWidth(buf, 2) * info.em_size + 0.5);
}
}
ffw_set_widths(width_list, max_key + 1, param->stretch_narrow_glyph, param->squeeze_wide_glyph);
ffw_reencode_raw(cur_mapping, max_key + 1, 1);
ffw_set_widths(width_list, max_key + 1);
if(ctu)
ctu->decRefCnt();
@ -458,6 +461,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
ffw_load_font(cur_tmp_fn.c_str());
ffw_metric(&info.ascent, &info.descent);
ffw_save(fn.c_str());
ffw_close();
}
@ -522,14 +526,22 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
}
else
{
if((param->decompose_ligature) && all_of(u, u+uLen, isLegalUnicode))
if((param->decompose_ligature) && (uLen > 1) && all_of(u, u+uLen, isLegalUnicode))
{
line_buf.append_unicodes(u, uLen);
}
else
{
Unicode uu = (cur_font_info->use_tounicode ? check_unicode(u, uLen, code, font) : unicode_from_font(code, font));
line_buf.append_unicodes(&uu, 1);
if(cur_font_info->use_tounicode)
{
Unicode uu = check_unicode(u, uLen, code, font);
line_buf.append_unicodes(&uu, 1);
}
else
{
Unicode uu = unicode_from_font(code, font);
line_buf.append_unicodes(&uu, 1);
}
}
}

View File

@ -19,6 +19,8 @@
#include "ffw.h"
static real EPS=1e-6;
static inline int min(int a, int b)
{
return (a<b)?a:b;
@ -268,8 +270,10 @@ void ffw_metric(double * ascent, double * descent)
if(a < 0) a = 0;
if(d > 0) d = 0;
/*
sf->ascent = min(a, em);
sf->descent = em - bb.maxy;
*/
info->os2_winascent = a;
info->os2_typoascent = a;
@ -292,8 +296,17 @@ void ffw_metric(double * ascent, double * descent)
/*
* TODO:bitmap, reference have not been considered in this function
*/
void ffw_set_widths(int * width_list, int mapping_len)
void ffw_set_widths(int * width_list, int mapping_len, int stretch_narrow, int squeeze_wide)
{
/*
* Disabled, because it causes crashing
memset(cur_fv->selected, 1, cur_fv->map->enccount);
// remove kern
FVRemoveKerns(cur_fv);
FVRemoveVKerns(cur_fv);
*/
SplineFont * sf = cur_fv->sf;
if(sf->onlybitmaps
@ -319,6 +332,20 @@ void ffw_set_widths(int * width_list, int mapping_len)
SplineChar * sc = sf->glyphs[j];
if(sc == NULL) continue;
DBounds bb;
SplineCharFindBounds(sc, &bb);
double glyph_width = bb.maxx - bb.minx;
if((glyph_width > EPS)
&& (((glyph_width > width_list[i] + EPS) && (squeeze_wide))
|| ((glyph_width < width_list[i] - EPS) && (stretch_narrow))))
{
real transform[6]; transform[0] = ((double)width_list[i]) / glyph_width;
transform[3] = 1.0;
transform[1] = transform[2] = transform[4] = transform[5] = 0;
FVTrans(cur_fv, sc, transform, NULL, fvt_alllayers | fvt_dontmovewidth);
}
sc->width = width_list[i];
}
}

View File

@ -40,9 +40,13 @@ struct Param
double h_eps, v_eps;
double space_threshold;
double font_size_multiplier;
int auto_hint;
int tounicode;
int space_as_offset;
int stretch_narrow_glyph;
int squeeze_wide_glyph;
std::string css_filename;
std::string font_suffix, font_format;

View File

@ -34,7 +34,7 @@ int ffw_get_em_size(void);
// fix metrics and get them
void ffw_metric(double * ascent, double * descent);
void ffw_set_widths(int * width_list, int mapping_len);
void ffw_set_widths(int * width_list, int mapping_len, int stretch_narrow, int squeeze_wide);
void ffw_auto_hint(void);

View File

@ -41,6 +41,8 @@ void show_usage_and_exit(const char * dummy = nullptr)
cerr << "Options:" << endl;
argparser.show_usage(cerr);
cerr << endl;
cerr << "Run 'man pdf2htmlEX' for detailed information" << endl;
cerr << endl;
exit(EXIT_FAILURE);
}
@ -79,6 +81,8 @@ void parse_options (int argc, char **argv)
.add("auto-hint", &param.auto_hint, 0, "Whether to generate hints for fonts")
.add("tounicode", &param.tounicode, 0, "Specify how to deal with ToUnicode map, 0 for auto, 1 for forced, -1 for disabled")
.add("space-as-offset", &param.space_as_offset, 0, "treat space characters as offsets")
.add("stretch_narrow_glyph", &param.stretch_narrow_glyph, 0, "stretch narrow glyphs instead of padding space")
.add("squeeze_wide_glyph", &param.squeeze_wide_glyph, 0, "squeeze wide glyphs instead of truncating")
.add("css-filename", &param.css_filename, "", "Specify the file name of the generated css file")
.add("font-suffix", &param.font_suffix, ".ttf", "suffix for extracted font files")