1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 04:50:09 +00:00

Merge branch 'type3'

This commit is contained in:
Lu Wang 2013-09-21 13:57:01 +08:00
commit ea4c6f4f90
12 changed files with 273 additions and 71 deletions

View File

@ -3,6 +3,7 @@ Developing v0.10
* Background image optimization
* Support output background image in JPEG (--bg-format jpg)
* [Experimental] Support output background image in SVG (--bg-format svg)
* [Experimental] Support Type 3 fonts
* New options
--bg-format
--font-format (same as --font-sufix, but without the leading dot)

View File

@ -184,6 +184,13 @@ Clear the fstype bits in TTF/OTF fonts.
Turn this on if Internet Explorer complains about 'Permission must be Installable' AND you have permission to do so.
.TP
.B --process-type3 <0|1> (Default: 0)
If turned on, pdf2htmlEX will try to convert Type 3 fonts such that text can be rendered natively in HTML.
Otherwise all text with Type 3 fonts will be rendered as image.
This feature is highly experimental.
.SS Text
.TP

View File

@ -166,8 +166,8 @@ protected:
* local font: to be substituted with a local (client side) font
*/
////////////////////////////////////////////////////
std::string dump_embedded_font(GfxFont * font, long long fn_id);
std::string dump_type3_font(GfxFont * font, long long fn_id);
std::string dump_embedded_font(GfxFont * font, FontInfo & info);
std::string dump_type3_font(GfxFont * font, FontInfo & info);
void embed_font(const std::string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only = false);
const FontInfo * install_font(GfxFont * font);
void install_embedded_font(GfxFont * font, FontInfo & info);

View File

@ -36,6 +36,8 @@
#include <cairo-ft.h>
#include <cairo-svg.h>
#include "BackgroundRenderer/CairoOutputDev/CairoFontEngine.h"
#include "BackgroundRenderer/CairoOutputDev/CairoOutputDev.h"
#include <Gfx.h>
#endif
namespace pdf2htmlEX {
@ -45,16 +47,18 @@ using std::unordered_set;
using std::cerr;
using std::endl;
string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
string HTMLRenderer::dump_embedded_font (GfxFont * font, FontInfo & info)
{
if(font->getType() == fontType3)
return dump_type3_font(font, fn_id);
if(info.is_type3)
return dump_type3_font(font, info);
Object obj, obj1, obj2;
Object font_obj, font_obj2, fontdesc_obj;
string suffix;
string filepath;
long long fn_id = info.id;
try
{
// inspired by mupdf
@ -178,62 +182,164 @@ string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id)
return filepath;
}
string HTMLRenderer::dump_type3_font (GfxFont * font, long long fn_id)
string HTMLRenderer::dump_type3_font (GfxFont * font, FontInfo & info)
{
assert(font->getFontType() == fontType3);
assert(info.is_type3);
#if ENABLE_SVG
long long fn_id = info.id;
FT_Library ft_lib;
FT_Init_FreeType(&ft_lib);
CairoFontEngine font_engine(ft_lib);
auto * cur_font = font_engine.getFont(font, cur_doc, true, xref);
auto used_map = preprocessor.get_code_map(hash_ref(font->getID()));
//calculate transformed metrics
double * font_bbox = font->getFontBBox();
double glyph_width = font_bbox[2] - font_bbox[0];
double glyph_height = font_bbox[3] - font_bbox[1];
double * font_matrix = font->getFontMatrix();
double transformed_bbox[4];
memcpy(transformed_bbox, font_bbox, 4 * sizeof(double));
/*
// add the origin to the bbox
if(transformed_bbox[0] > 0) transformed_bbox[0] = 0;
if(transformed_bbox[1] > 0) transformed_bbox[1] = 0;
if(transformed_bbox[2] < 0) transformed_bbox[2] = 0;
if(transformed_bbox[3] < 0) transformed_bbox[3] = 0;
*/
tm_transform_bbox(font_matrix, transformed_bbox);
double transformed_bbox_width = transformed_bbox[2] - transformed_bbox[0];
double transformed_bbox_height = transformed_bbox[3] - transformed_bbox[1];
info.font_size_scale = std::max(transformed_bbox_width, transformed_bbox_height);
// glyph_width /= 10;
// glyph_height /= 10;
// we want the glyphs is rendered in a box of size around GLYPH_DUMP_EM_SIZE x GLYPH_DUMP_EM_SIZE
// for rectangles, the longer edge should be GLYPH_DUMP_EM_SIZE
const double GLYPH_DUMP_EM_SIZE = 100.0;
double scale = GLYPH_DUMP_EM_SIZE / info.font_size_scale;
// dumpy each glyph into svg and combine them
// we choose ttf as it does not use char names
// or actually we don't use char names for ttf (see embed_font)
ffw_new_font();
// dump each glyph into svg and combine them
for(int code = 0; code < 256; ++code)
{
if(!used_map[code]) continue;
cairo_glyph_t glyph;
glyph.index = cur_font->getGlyph(code, nullptr, 0);
glyph.x = 0;
glyph.y = glyph_height;
cairo_surface_t * surface = nullptr;
string glyph_filename = (char*)str_fmt("%s/f%llx-%x.svg", param.tmp_dir.c_str(), fn_id, code);
tmp_files.add(glyph_filename);
surface = cairo_svg_surface_create(glyph_filename.c_str(), glyph_height, glyph_width);
surface = cairo_svg_surface_create(glyph_filename.c_str(), transformed_bbox_width * scale, transformed_bbox_height * scale);
cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2);
cairo_surface_set_fallback_resolution(surface, param.h_dpi, param.v_dpi);
cairo_t * cr = cairo_create(surface);
// zoom the image to prevent CairoOutputDev from rounding/increasing thin borders
//cairo_matrix_t matrix;
/*
double * font_matrix = font->getFontMatrix();
cairo_matrix_init(&matrix, font_matrix[0], font_matrix[1], font_matrix[2], font_matrix[3], font_matrix[4], font_matrix[5]);
cairo_set_font_matrix(cr, &matrix);
cairo_matrix_init_identity(&matrix);
// cairo_matrix_scale(&matrix, 10, 10);
cairo_transform(cr, &matrix);
*/
// track the positio of the origin
double ox, oy;
ox = oy = 0.0;
cairo_set_font_size(cr, 1000);
auto glyph_width = ((Gfx8BitFont*)font)->getWidth(code);
// cairo_set_source_rgb(cr, 0., 0., 0.);
#if 1
{
// pain the glyph
cairo_set_font_face(cr, cur_font->getFontFace());
cairo_matrix_t m1, m2, m3;
// set up m1
// m1 shift the bottom-left corner of the glyph bbox to the origin
// also set font size to scale
cairo_matrix_init_translate(&m1, -transformed_bbox[0], transformed_bbox[1]);
cairo_matrix_init_scale(&m2, scale, scale);
cairo_matrix_multiply(&m1, &m1, &m2);
cairo_set_font_matrix(cr, &m1);
cairo_glyph_t glyph;
glyph.index = cur_font->getGlyph(code, nullptr, 0);
glyph.x = 0;
glyph.y = GLYPH_DUMP_EM_SIZE;
cairo_show_glyphs(cr, &glyph, 1);
// apply the type 3 font's font matrix before m1
// such that we got the mapping from type 3 font space to user space, then we will be able to calculate mapped position for ox,oy and glyph_width
cairo_matrix_init(&m2, font_matrix[0], font_matrix[1], font_matrix[2], font_matrix[3], font_matrix[4], font_matrix[5]);
cairo_matrix_init_scale(&m3, 1, -1);
cairo_matrix_multiply(&m2, &m2, &m3);
cairo_matrix_multiply(&m2, &m2, &m1);
cairo_matrix_transform_point(&m2, &ox, &oy);
double dummy = 0;
cairo_matrix_transform_distance(&m2, &glyph_width, &dummy);
}
#else
{
// manually draw the char to get the metrics
// adapted from _render_type3_glyph of poppler
cairo_matrix_t ctm, m, m1;
cairo_matrix_init_identity(&ctm);
// apply font-matrix
cairo_matrix_init(&m, font_matrix[0], font_matrix[1], font_matrix[2], font_matrix[3], font_matrix[4], font_matrix[5]);
cairo_matrix_multiply(&ctm, &ctm, &m);
// shift origin
cairo_matrix_init_translate(&m1, -transformed_bbox[0], -transformed_bbox[1]);
cairo_matrix_multiply(&ctm, &ctm, &m1);
// make it upside down since the difference between the glyph coordination and cairo coordination
cairo_matrix_init_scale(&m1, 1, -1);
cairo_matrix_multiply(&ctm, &ctm, &m1);
// save m*m1 to m1 for later use
cairo_matrix_multiply(&m1, &m, &m1);
// shift up to the bounding box
cairo_matrix_init_translate(&m, 0.0, transformed_bbox_height);
cairo_matrix_multiply(&ctm, &ctm, &m);
// scale up
cairo_matrix_init_scale(&m, scale, scale);
cairo_matrix_multiply(&ctm, &ctm, &m);
// set ctm
cairo_set_matrix(cr, &ctm);
// calculate the position of origin
cairo_matrix_transform_point(&ctm, &ox, &oy);
oy -= transformed_bbox_height * scale;
// calculate glyph width
double dummy = 0;
cairo_matrix_transform_distance(&ctm, &glyph_width, &dummy);
// draw the glyph
auto output_dev = new CairoOutputDev();
output_dev->setCairo(cr);
output_dev->setPrinting(true);
PDFRectangle box;
box.x1 = font_bbox[0];
box.y1 = font_bbox[1];
box.x2 = font_bbox[2];
box.y2 = font_bbox[3];
auto gfx = new Gfx(cur_doc, output_dev,
((Gfx8BitFont*)font)->getResources(),
&box, nullptr);
output_dev->startDoc(cur_doc, &font_engine);
output_dev->startPage(1, gfx->getState(), gfx->getXRef());
output_dev->setInType3Char(gTrue);
auto char_procs = ((Gfx8BitFont*)font)->getCharProcs();
Object char_proc_obj;
auto glyph_index = cur_font->getGlyph(code, nullptr, 0);
gfx->display(char_procs->getVal(glyph_index, &char_proc_obj));
char_proc_obj.free();
delete gfx;
delete output_dev;
}
#endif
cairo_set_font_face(cr, cur_font->getFontFace());
cairo_show_glyphs(cr, &glyph, 1);
{
auto status = cairo_status(cr);
cairo_destroy(cr);
@ -249,11 +355,9 @@ string HTMLRenderer::dump_type3_font (GfxFont * font, long long fn_id)
throw string("Error in cairo: ") + cairo_status_to_string(status);
}
ffw_import_svg_glyph(code, glyph_filename.c_str());
ffw_import_svg_glyph(code, glyph_filename.c_str(), ox / GLYPH_DUMP_EM_SIZE, -oy / GLYPH_DUMP_EM_SIZE, glyph_width / GLYPH_DUMP_EM_SIZE);
}
// we choose ttf as it does not use char names
// or actually we don't use char names for ttf (see embed_font)
string font_filename = (char*)str_fmt("%s/f%llx.ttf", param.tmp_dir.c_str(), fn_id);
tmp_files.add(font_filename);
ffw_save(font_filename.c_str());
@ -321,7 +425,8 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
if(get_metric_only)
{
ffw_metric(&info.ascent, &info.descent);
ffw_fix_metric();
ffw_get_metric(&info.ascent, &info.descent);
ffw_close();
return;
}
@ -568,6 +673,8 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
cur_width = font_cid->getWidth(buf, 2) ;
}
cur_width /= info.font_size_scale;
if(u == ' ')
{
/*
@ -611,6 +718,8 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
char buf[2] = {0, ' '};
info.space_width = font_cid->getWidth(buf, 2);
}
info.space_width /= info.font_size_scale;
/* See comments above */
if(equal(info.space_width,0))
info.space_width = 0.001;
@ -692,7 +801,8 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
tmp_files.add(fn);
ffw_load_font(cur_tmp_fn.c_str());
ffw_metric(&info.ascent, &info.descent);
ffw_fix_metric();
ffw_get_metric(&info.ascent, &info.descent);
if(param.override_fstype)
ffw_override_fstype();
ffw_save(fn.c_str());
@ -718,6 +828,7 @@ const FontInfo * HTMLRenderer::install_font(GfxFont * font)
FontInfo & new_font_info = cur_info_iter->second;
new_font_info.id = new_fn_id;
new_font_info.use_tounicode = true;
new_font_info.font_size_scale = 1.0;
if(font == nullptr)
{
@ -802,7 +913,7 @@ const FontInfo * HTMLRenderer::install_font(GfxFont * font)
void HTMLRenderer::install_embedded_font(GfxFont * font, FontInfo & info)
{
auto path = dump_embedded_font(font, info.id);
auto path = dump_embedded_font(font, info);
if(path != "")
{

View File

@ -206,7 +206,6 @@ void HTMLRenderer::check_state_change(GfxState * state)
{
// The width of the type 3 font text, if shown, is likely to be wrong
// So we will create separate (absolute positioned) blocks for them, such that it won't affect other text
// TODO: consider the font matrix and estimate the metrics
if((new_font_info->is_type3 || cur_text_state.font_info->is_type3) && (!param.process_type3))
{
set_line_state(new_line_state, NLS_NEWLINE);
@ -218,6 +217,12 @@ void HTMLRenderer::check_state_change(GfxState * state)
cur_text_state.font_info = new_font_info;
}
/*
* For Type 3 fonts, we need to take type3_font_size_scale into consideration
*/
if((new_font_info->is_type3 || cur_text_state.font_info->is_type3) && param.process_type3)
need_rescale_font = true;
double new_font_size = state->getFontSize();
if(!equal(cur_font_size, new_font_size))
{
@ -268,6 +273,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
double new_draw_text_scale = 1.0/text_scale_factor2 * hypot(new_draw_text_tm[2], new_draw_text_tm[3]);
double new_draw_font_size = cur_font_size;
if(is_positive(new_draw_text_scale))
{
// scale both font size and matrix

View File

@ -17,6 +17,20 @@ struct FontInfo
double space_width;
double ascent, descent;
bool is_type3;
/*
* As Type 3 fonts have a font matrix
* a glyph of 1pt can be very large or very small
* however it might not be true for other font formats such as ttf
*
* Therefore when we save a Type 3 font into ttf,
* we have to scale the font to about 1,
* then apply the scaling when using the font
*
* The scaling factor is stored as font_size_scale
*
* The value is 1 for other fonts
*/
double font_size_scale;
};
struct HTMLTextState

View File

@ -60,7 +60,10 @@ void HTMLTextLine::append_state(const HTMLTextState & text_state)
states.back().hash_umask = 0;
}
(HTMLTextState&)(states.back()) = text_state;
HTMLTextState & last_state = states.back();
last_state = text_state;
//apply font scale
last_state.font_size *= last_state.font_info->font_size_scale;
}
void HTMLTextLine::dump_text(ostream & out)
@ -251,7 +254,8 @@ void HTMLTextLine::prepare(void)
// note that vertical_align cannot be calculated here
for(auto iter = states.begin(); iter != states.end(); ++iter)
{
iter->ids[State::FONT_ID] = iter->font_info->id;
auto font_info = iter->font_info;
iter->ids[State::FONT_ID] = font_info->id;
iter->ids[State::FONT_SIZE_ID] = all_manager.font_size.install(iter->font_size);
iter->ids[State::FILL_COLOR_ID] = all_manager.fill_color.install(iter->fill_color);
iter->ids[State::STROKE_COLOR_ID] = all_manager.stroke_color.install(iter->stroke_color);
@ -260,10 +264,10 @@ void HTMLTextLine::prepare(void)
iter->hash();
accum_vertical_align += iter->vertical_align;
double cur_ascent = accum_vertical_align + iter->font_info->ascent * iter->font_size;
double cur_ascent = accum_vertical_align + font_info->ascent * iter->font_size;
if(cur_ascent > ascent)
ascent = cur_ascent;
double cur_descent = accum_vertical_align + iter->font_info->descent * iter->font_size;
double cur_descent = accum_vertical_align + font_info->descent * iter->font_size;
if(cur_descent < descent)
descent = cur_descent;
}

View File

@ -146,6 +146,7 @@ void parse_options (int argc, char **argv)
.add("stretch-narrow-glyph", &param.stretch_narrow_glyph, 0, "stretch narrow glyphs instead of padding them")
.add("squeeze-wide-glyph", &param.squeeze_wide_glyph, 1, "shrink wide glyphs instead of truncating them")
.add("override-fstype", &param.override_fstype, 0, "clear the fstype bits in TTF/OTF fonts")
.add("process-type3", &param.process_type3, 0, "convert Type 3 fonts for web (experimental)")
// text
.add("heps", &param.h_eps, 1.0, "horizontal threshold for merging text, in pixels")
@ -299,8 +300,13 @@ void check_param()
exit(EXIT_FAILURE);
}
//test
//param.process_type3 = 1;
#if not ENABLE_SVG
if(param.process_type3)
{
cerr << "process-type3 is enabled, however SVG support is not built in this version of pdf2htmlEX." << endl;
exit(EXIT_FAILURE);
}
#endif
}
int main(int argc, char **argv)

View File

@ -302,29 +302,20 @@ int ffw_get_em_size(void)
return cur_fv->sf->ascent + cur_fv->sf->descent;
}
void ffw_metric(double * ascent, double * descent)
void ffw_fix_metric()
{
double ascent, descent;
ffw_get_metric(&ascent, &descent);
ffw_set_metric(ascent, descent);
}
void ffw_get_metric(double * ascent, double * descent)
{
SplineFont * sf = cur_fv->sf;
struct pfminfo * info = &sf->pfminfo;
SFDefaultOS2Info(info, sf, sf->fontname);
info->pfmset = 1;
sf->changed = 1;
DBounds bb;
SplineFontFindBounds(sf, &bb);
/*
printf("bb %lf %lf\n", bb.maxy, bb.miny);
printf("_ %d %d\n", sf->ascent, sf->descent);
printf("win %d %d\n", info->os2_winascent, info->os2_windescent);
printf("%d %d\n", info->winascent_add, info->windescent_add);
printf("typo %d %d\n", info->os2_typoascent, info->os2_typodescent);
printf("%d %d\n", info->typoascent_add, info->typodescent_add);
printf("hhead %d %d\n", info->hhead_ascent, info->hhead_descent);
printf("%d %d\n", info->hheadascent_add, info->hheaddescent_add);
*/
int em = sf->ascent + sf->descent;
if (em > 0)
@ -336,9 +327,20 @@ void ffw_metric(double * ascent, double * descent)
{
*ascent = *descent = 0;
}
}
int a = floor(bb.maxy + 0.5);
int d = floor(bb.miny + 0.5);
void ffw_set_metric(double ascent, double descent)
{
SplineFont * sf = cur_fv->sf;
struct pfminfo * info = &sf->pfminfo;
SFDefaultOS2Info(info, sf, sf->fontname);
info->pfmset = 1;
sf->changed = 1;
int em = sf->ascent + sf->descent;
int a = floor(ascent * em + 0.5);
int d = floor(descent * em + 0.5);
if(a < 0) a = 0;
if(d > 0) d = 0;
@ -354,7 +356,6 @@ void ffw_metric(double * ascent, double * descent)
* But have to unify them, for different browsers on different platforms
* Things may become easier when there are CSS rules for baseline-based positioning.
*/
info->os2_winascent = a;
info->os2_typoascent = a;
info->hhead_ascent = a;
@ -424,19 +425,34 @@ void ffw_set_widths(int * width_list, int mapping_len,
}
}
void ffw_import_svg_glyph(int code, const char * filename)
void ffw_import_svg_glyph(int code, const char * filename, double ox, double oy, double width)
{
int enc = SFFindSlot(cur_fv->sf, cur_fv->map, code, "");
if(enc == -1)
return;
SFMakeChar(cur_fv->sf, cur_fv->map, enc);
SplineChar * sc = SFMakeChar(cur_fv->sf, cur_fv->map, enc);
memset(cur_fv->selected, 0, cur_fv->map->enccount);
cur_fv->selected[enc] = 1;
int ok = FVImportImages(cur_fv, (char*)filename, fv_svg, 0, -1);
if(!ok)
err("Import SVG glyph failed");
// correct origin and width
{
int a = cur_fv->sf->ascent;
int d = cur_fv->sf->descent;
real transform[6];
transform[0] = 1.0;
transform[3] = 1.0;
transform[1] = transform[2] = 0.0;
transform[4] = -ox * (a+d);
transform[5] = -oy * (a+d) + d;
FVTrans(cur_fv, sc, transform, NULL, fvt_alllayers | fvt_dontmovewidth);
SCSynchronizeWidth(sc, floor(width * (a+d) + 0.5), sc->width, cur_fv);
}
}
void ffw_auto_hint(void)

View File

@ -48,15 +48,23 @@ void ffw_add_empty_char(int32_t unicode, int width);
////////////////////////
// metrics
int ffw_get_em_size(void);
// fix metrics and get them
void ffw_metric(double * ascent, double * descent);
// manipulate ascent and descent
// asscent is between 0 and 1
// descent is between -1 and 0
void ffw_fix_metric();
// get ascent/descent based on the shape
void ffw_get_metric(double * ascent, double * descent);
// set corresponding fields
void ffw_set_metric(double ascent, double descent);
void ffw_set_widths(int * width_list, int mapping_len,
int stretch_narrow, int squeeze_wide);
////////////////////////
// others
void ffw_import_svg_glyph(int code, const char * filename);
// (ox,oy) is the position of the true origin, fractions related to em_size
// also true for glyph_width
void ffw_import_svg_glyph(int code, const char * filename, double ox, double oy, double glyph_width);
void ffw_auto_hint(void);
void ffw_override_fstype(void);

View File

@ -1,4 +1,6 @@
#include <cstring>
#include <limits>
#include "math.h"
namespace pdf2htmlEX {
@ -28,5 +30,31 @@ void tm_multiply(double * tm_left, const double * tm_right)
tm_left[5] += old[1] * tm_right[4] + old[3] * tm_right[5];
}
void tm_transform_bbox(const double * tm, double * bbox)
{
double & x1 = bbox[0];
double & y1 = bbox[1];
double & x2 = bbox[2];
double & y2 = bbox[3];
double _[4][2];
_[0][0] = _[1][0] = x1;
_[0][1] = _[2][1] = y1;
_[2][0] = _[3][0] = x2;
_[1][1] = _[3][1] = y2;
x1 = y1 = std::numeric_limits<double>::max();
x2 = y2 = std::numeric_limits<double>::min();
for(int i = 0; i < 4; ++i)
{
auto & x = _[i][0];
auto & y = _[i][1];
tm_transform(tm, x, y);
if(x < x1) x1 = x;
if(x > x2) x2 = x;
if(y < y1) y1 = y;
if(y > y2) y2 = y;
}
}
} //namespace pdf2htmlEX

View File

@ -38,6 +38,7 @@ static inline double hypot(double x, double y) { return std::sqrt(x*x+y*y); }
void tm_transform(const double * tm, double & x, double & y, bool is_delta = false);
void tm_multiply(double * tm_left, const double * tm_right);
void tm_transform_bbox(const double * tm, double * bbox);
} //namespace pdf2htmlEX
#endif //MATH_H__