1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 13:00:08 +00:00

fix --optimize-text

This commit is contained in:
Lu Wang 2013-07-06 10:10:41 +08:00
parent 136991135d
commit 0044e9b17c
3 changed files with 19 additions and 18 deletions

View File

@ -454,13 +454,24 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
buf[1] = (cur_code & 0xff);
cur_width = font_cid->getWidth(buf, 2) ;
}
width_list[mapped_code] = (int)floor(cur_width * info.em_size + 0.5);
if(u == ' ')
{
has_space = true;
/*
* Internet Explorer will ignore `word-spacing` if
* the width of the 'space' glyph is 0
*
* space_width==0 often means no spaces are used in the PDF
* so setting it to be 0.001 should be safe
*/
if(equal(cur_width, 0))
cur_width = 0.001;
info.space_width = cur_width;
has_space = true;
}
width_list[mapped_code] = (int)floor(cur_width * info.em_size + 0.5);
}
if(param.debug)
@ -487,6 +498,10 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
char buf[2] = {0, ' '};
info.space_width = font_cid->getWidth(buf, 2);
}
/* See comments above */
if(equal(info.space_width,0))
info.space_width = 0.001;
ffw_add_empty_char((int32_t)' ', (int)floor(info.space_width * info.em_size + 0.5));
if(param.debug)
{
@ -494,19 +509,6 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
}
}
if(info.space_width == 0)
{
/*
* Internet Explorer will ignore `word-spacing` if
* the width of the 'space' glyph is 0
*
* usually the em_size is 1000 or 2048,
* and space_width==0 often means no spaces are used in the PDF
* so setting it to be 1 should be safe
*/
info.space_width = 1;
}
if(param.debug)
{
cerr << "space width: " << info.space_width << endl;

View File

@ -36,7 +36,7 @@ struct HTMLTextState
double single_space_offset(void) const {
double offset = word_space + letter_space;
if(font_info->em_size != 0)
offset += font_info->space_width / font_info->em_size * font_size;
offset += font_info->space_width * font_size;
return offset;
}
// calculate em_size of this state

View File

@ -13,11 +13,10 @@ with open('out.html','w') as outf:
if not f.lower().endswith('.pdf'):
continue
print f
if os.system('pdf2htmlEX -l 10 --no-drm 1 --fit-width 1024 --dest-dir html --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) != 0:
if os.system('pdf2htmlEX -l 10 --optimize-text 1 --no-drm 1 --fit-width 1024 --dest-dir html --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) != 0:
print "error on ", f
sys.exit(-1)
#os.system('pdf2htmlEX --dest-dir html --process-nontext 0 --css-draw 1 "%s/%s"' % (DIR,f))
ff = f[:-3]
outf.write('<a href="html/%shtml" target="pdf">%s</a><br/>' % (ff,ff))
outf.flush();