1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-07-05 09:38:40 +00:00

support jpeg

This commit is contained in:
Lu Wang 2013-09-18 20:24:48 +08:00
parent b4bf67629c
commit f04941bed0
10 changed files with 168 additions and 74 deletions

View File

@ -68,7 +68,7 @@ Read `LICENSE` for more detail.
### Contacts ### Contacts
* Lu Wang <coolwanglu@gmail.com> or [coolwanglu@twitter](https://twitter.com/coolwanglu) * 王璐 (Lu Wang) <coolwanglu@gmail.com> or [coolwanglu@twitter](https://twitter.com/coolwanglu)
- For general and person questions - For general and person questions
- If you want to report an issue, please refer to [How to report](https://github.com/coolwanglu/pdf2htmlEX/wiki/How-to-report) - If you want to report an issue, please refer to [How to report](https://github.com/coolwanglu/pdf2htmlEX/wiki/How-to-report)
- Accepting messages in **中文**, **English** or **日本語** - Accepting messages in **中文**, **English** or **日本語**

View File

@ -151,8 +151,8 @@ Specify whether the local matched fonts, for fonts not embedded in PDF, should b
If this switch is off, only font names are exported such that web browsers may try to find proper fonts themselves, and that might cause issues about incorrect font metrics. If this switch is off, only font names are exported such that web browsers may try to find proper fonts themselves, and that might cause issues about incorrect font metrics.
.TP .TP
.B --font-suffix <suffix> (Default: .ttf) .B --font-format <format> (Default: ttf)
Specify the suffix of fonts extracted from the PDF file. Specify the format of fonts extracted from the PDF file.
.TP .TP
.B --decompose-ligature <0|1> (Default: 0) .B --decompose-ligature <0|1> (Default: 0)
@ -227,7 +227,13 @@ If set to 1, pdf2htmlEX will try to reduce the number of HTML elements used for
.TP .TP
.B --bg-format <format> (Default: "png") .B --bg-format <format> (Default: "png")
Specify the format for background images, currently "png" and "svg" are supported. Specify the format for background images, run `pdf2htmlEX -v` to check all supported formats.
.SS Background Image
.TP
.B --bg-format <format> (Default: png)
Specify the background image format. Run `pdf2htmlEX -v` to check all supported formats.
.SS PDF Protection .SS PDF Protection

View File

@ -5,6 +5,8 @@
* Copyright (C) 2013 Lu Wang <coolwanglu@gmail.com> * Copyright (C) 2013 Lu Wang <coolwanglu@gmail.com>
*/ */
#include <poppler-config.h>
#include "HTMLRenderer/HTMLRenderer.h" #include "HTMLRenderer/HTMLRenderer.h"
#include "Param.h" #include "Param.h"
@ -18,22 +20,26 @@ namespace pdf2htmlEX {
BackgroundRenderer * BackgroundRenderer::getBackgroundRenderer(const std::string & format, HTMLRenderer * html_renderer, const Param & param) BackgroundRenderer * BackgroundRenderer::getBackgroundRenderer(const std::string & format, HTMLRenderer * html_renderer, const Param & param)
{ {
#ifdef ENABLE_LIBPNG
if(format == "png") if(format == "png")
{ {
return new SplashBackgroundRenderer(html_renderer, param); return new SplashBackgroundRenderer(html_renderer, param);
} }
else if (format == "svg") #endif
#ifdef ENABLE_LIBJPEG
if(format == "jpg")
{ {
return new SplashBackgroundRenderer(html_renderer, param);
}
#endif
#if ENABLE_SVG #if ENABLE_SVG
return new CairoBackgroundRenderer(html_renderer, param); if (format == "svg")
#else
return nullptr;
#endif
}
else
{ {
return nullptr; return new CairoBackgroundRenderer(html_renderer, param);
} }
#endif
return nullptr;
} }
} // namespace pdf2htmlEX } // namespace pdf2htmlEX

View File

@ -26,7 +26,21 @@ void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y,
double originX, double originY, double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen) CharCode code, int nBytes, Unicode *u, int uLen)
{ {
// CairoOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen); // draw characters as image when
// - in fallback mode
// - OR there is special filling method
// - OR using a writing mode font
// - OR using a Type 3 font
if((param.fallback)
|| ( (state->getFont())
&& ( (state->getFont()->getWMode())
|| (state->getFont()->getType() == fontType3)
)
)
)
{
CairoOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code,nBytes,u,uLen);
}
} }
void CairoBackgroundRenderer::init(PDFDoc * doc) void CairoBackgroundRenderer::init(PDFDoc * doc)
@ -58,7 +72,7 @@ void CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
if(param.embed_image) if(param.embed_image)
html_renderer->tmp_files.add((char*)fn); html_renderer->tmp_files.add((char*)fn);
surface = cairo_svg_surface_create((char*)fn, page_width, page_height); surface = cairo_svg_surface_create((char*)fn, page_width * DEFAULT_DPI, page_height * DEFAULT_DPI);
} }
cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2); cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2);
cairo_surface_set_fallback_resolution(surface, param.h_dpi, param.v_dpi); cairo_surface_set_fallback_resolution(surface, param.h_dpi, param.v_dpi);
@ -68,7 +82,13 @@ void CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
setPrinting(false); // TODO, check the parameter setPrinting(false); // TODO, check the parameter
cairo_save(cr); cairo_save(cr);
// TODO apply crop box // zoom the image to prevent CairoOutputDev from rounding/increasing thin borders
{
cairo_matrix_t matrix;
cairo_matrix_init_identity(&matrix);
cairo_matrix_scale(&matrix, DEFAULT_DPI, DEFAULT_DPI);
cairo_transform(cr, &matrix);
}
doc->displayPage(this, pageno, DEFAULT_DPI, DEFAULT_DPI, doc->displayPage(this, pageno, DEFAULT_DPI, DEFAULT_DPI,
0, 0,

View File

@ -8,10 +8,13 @@
#include <vector> #include <vector>
#include <memory> #include <memory>
#include <poppler-config.h>
#include <PDFDoc.h> #include <PDFDoc.h>
#include <goo/PNGWriter.h> #include <goo/PNGWriter.h>
#include <goo/JpegWriter.h>
#include "Base64Stream.h" #include "Base64Stream.h"
#include "util/const.h"
#include "SplashBackgroundRenderer.h" #include "SplashBackgroundRenderer.h"
@ -31,13 +34,11 @@ const SplashColor SplashBackgroundRenderer::white = {255,255,255};
*/ */
#if POPPLER_OLDER_THAN_0_23_0 #if POPPLER_OLDER_THAN_0_23_0
void SplashBackgroundRenderer::startPage(int pageNum, GfxState *state) void SplashBackgroundRenderer::startPage(int pageNum, GfxState *state)
#else
void SplashBackgroundRenderer::startPage(int pageNum, GfxState *state, XRef *xrefA)
#endif
{ {
#if POPPLER_OLDER_THAN_0_23_0
SplashOutputDev::startPage(pageNum, state); SplashOutputDev::startPage(pageNum, state);
#else #else
void SplashBackgroundRenderer::startPage(int pageNum, GfxState *state, XRef *xrefA)
{
SplashOutputDev::startPage(pageNum, state, xrefA); SplashOutputDev::startPage(pageNum, state, xrefA);
#endif #endif
clearModRegion(); clearModRegion();
@ -93,7 +94,7 @@ void SplashBackgroundRenderer::embed_image(int pageno)
if((xmin <= xmax) && (ymin <= ymax)) if((xmin <= xmax) && (ymin <= ymax))
{ {
{ {
auto fn = html_renderer->str_fmt("%s/bg%x.png", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno); auto fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, param.bg_format.c_str());
if(param.embed_image) if(param.embed_image)
html_renderer->tmp_files.add((char*)fn); html_renderer->tmp_files.add((char*)fn);
@ -115,11 +116,17 @@ void SplashBackgroundRenderer::embed_image(int pageno)
if(param.embed_image) if(param.embed_image)
{ {
auto path = html_renderer->str_fmt("%s/bg%x.png", param.tmp_dir.c_str(), pageno); auto path = html_renderer->str_fmt("%s/bg%x.%s", param.tmp_dir.c_str(), pageno, param.bg_format.c_str());
ifstream fin((char*)path, ifstream::binary); ifstream fin((char*)path, ifstream::binary);
if(!fin) if(!fin)
throw string("Cannot read background image ") + (char*)path; throw string("Cannot read background image ") + (char*)path;
f_page << "data:image/png;base64," << Base64Stream(fin);
auto iter = FORMAT_MIME_TYPE_MAP.find(param.bg_format);
if(iter == FORMAT_MIME_TYPE_MAP.end())
throw string("Image format not supported: ") + param.bg_format;
string mime_type = iter->second;
f_page << "data:" << mime_type << ";base64," << Base64Stream(fin);
} }
else else
{ {
@ -142,7 +149,26 @@ void SplashBackgroundRenderer::dump_image(const char * filename, int x1, int y1,
throw string("Cannot open file for background image " ) + filename; throw string("Cannot open file for background image " ) + filename;
// use unique_ptr to auto delete the object upon exception // use unique_ptr to auto delete the object upon exception
auto writer = unique_ptr<ImgWriter>(new PNGWriter); unique_ptr<ImgWriter> writer;
if(false) { }
#ifdef ENABLE_LIBPNG
else if(param.bg_format == "png")
{
writer = unique_ptr<ImgWriter>(new PNGWriter);
}
#endif
#ifdef ENABLE_LIBJPEG
else if(param.bg_format == "jpg")
{
writer = unique_ptr<ImgWriter>(new JpegWriter);
}
#endif
else
{
throw string("Image format not supported: ") + param.bg_format;
}
if(!writer->init(f, width, height, param.h_dpi, param.v_dpi)) if(!writer->init(f, width, height, param.h_dpi, param.v_dpi))
throw "Cannot initialize PNGWriter"; throw "Cannot initialize PNGWriter";

View File

@ -178,7 +178,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
if(param.debug) if(param.debug)
{ {
auto fn = str_fmt("%s/__raw_font_%llx", param.tmp_dir.c_str(), info.id, param.font_suffix.c_str()); auto fn = str_fmt("%s/__raw_font_%llx%s", param.tmp_dir.c_str(), info.id, get_suffix(filepath).c_str());
tmp_files.add((char*)fn); tmp_files.add((char*)fn);
ofstream((char*)fn, ofstream::binary) << ifstream(filepath).rdbuf(); ofstream((char*)fn, ofstream::binary) << ifstream(filepath).rdbuf();
} }
@ -528,9 +528,9 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
*/ */
ffw_reencode_unicode_full(); ffw_reencode_unicode_full();
string cur_tmp_fn = (char*)str_fmt("%s/__tmp_font1%s", param.tmp_dir.c_str(), param.font_suffix.c_str()); string cur_tmp_fn = (char*)str_fmt("%s/__tmp_font1.%s", param.tmp_dir.c_str(), param.font_format.c_str());
tmp_files.add(cur_tmp_fn); tmp_files.add(cur_tmp_fn);
string other_tmp_fn = (char*)str_fmt("%s/__tmp_font2%s", param.tmp_dir.c_str(), param.font_suffix.c_str()); string other_tmp_fn = (char*)str_fmt("%s/__tmp_font2.%s", param.tmp_dir.c_str(), param.font_format.c_str());
tmp_files.add(other_tmp_fn); tmp_files.add(other_tmp_fn);
ffw_save(cur_tmp_fn.c_str()); ffw_save(cur_tmp_fn.c_str());
@ -571,9 +571,9 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
* Ascent/Descent are not used in PDF, and the values in PDF may be wrong or inconsistent (there are 3 sets of them) * Ascent/Descent are not used in PDF, and the values in PDF may be wrong or inconsistent (there are 3 sets of them)
* We need to reload in order to retrieve/fix accurate ascent/descent, some info won't be written to the font by fontforge until saved. * We need to reload in order to retrieve/fix accurate ascent/descent, some info won't be written to the font by fontforge until saved.
*/ */
string fn = (char*)str_fmt("%s/f%llx%s", string fn = (char*)str_fmt("%s/f%llx.%s",
(param.embed_font ? param.tmp_dir : param.dest_dir).c_str(), (param.embed_font ? param.tmp_dir : param.dest_dir).c_str(),
info.id, param.font_suffix.c_str()); info.id, param.font_format.c_str());
if(param.embed_font) if(param.embed_font)
tmp_files.add(fn); tmp_files.add(fn);
@ -685,7 +685,7 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, FontInfo & info)
if(path != "") if(path != "")
{ {
embed_font(path, font, info); embed_font(path, font, info);
export_remote_font(info, param.font_suffix, font); export_remote_font(info, param.font_format, font);
} }
else else
{ {
@ -712,7 +712,7 @@ void HTMLRenderer::install_external_font(GfxFont * font, FontInfo & info)
if(localfontloc != nullptr) if(localfontloc != nullptr)
{ {
embed_font(string(localfontloc->path->getCString()), font, info); embed_font(string(localfontloc->path->getCString()), font, info);
export_remote_font(info, param.font_suffix, font); export_remote_font(info, param.font_format, font);
delete localfontloc; delete localfontloc;
return; return;
} }
@ -739,45 +739,46 @@ void HTMLRenderer::install_external_font(GfxFont * font, FontInfo & info)
export_local_font(info, font, fontname, ""); export_local_font(info, font, fontname, "");
} }
void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suffix, GfxFont * font) void HTMLRenderer::export_remote_font(const FontInfo & info, const string & format, GfxFont * font)
{ {
string mime_type, format; string css_font_format;
if(suffix == ".ttf") if(format == "ttf")
{ {
format = "truetype"; css_font_format = "truetype";
mime_type = "application/x-font-ttf";
} }
else if(suffix == ".otf") else if(format == "otf")
{ {
format = "opentype"; css_font_format = "opentype";
mime_type = "application/x-font-otf";
} }
else if(suffix == ".woff") else if(format == "woff")
{ {
format = "woff"; css_font_format = "woff";
mime_type = "application/font-woff";
} }
else if(suffix == ".eot") else if(format == "eot")
{ {
format = "embedded-opentype"; css_font_format = "embedded-opentype";
mime_type = "application/vnd.ms-fontobject";
} }
else if(suffix == ".svg") else if(format == "svg")
{ {
format = "svg"; css_font_format = "svg";
mime_type = "image/svg+xml";
} }
else else
{ {
cerr << "Warning: unknown font suffix: " << suffix << endl; throw string("Warning: unknown font format: ") + format;
} }
auto iter = FORMAT_MIME_TYPE_MAP.find(format);
if(iter == FORMAT_MIME_TYPE_MAP.end())
{
throw string("Warning: unknown font format: ") + format;
}
string mime_type = iter->second;
f_css.fs << "@font-face{" f_css.fs << "@font-face{"
<< "font-family:" << CSS::FONT_FAMILY_CN << info.id << ";" << "font-family:" << CSS::FONT_FAMILY_CN << info.id << ";"
<< "src:url("; << "src:url(";
{ {
auto fn = str_fmt("f%llx%s", info.id, suffix.c_str()); auto fn = str_fmt("f%llx.%s", info.id, format.c_str());
if(param.embed_font) if(param.embed_font)
{ {
auto path = param.tmp_dir + "/" + (char*)fn; auto path = param.tmp_dir + "/" + (char*)fn;
@ -793,7 +794,7 @@ void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suff
} }
f_css.fs << ")" f_css.fs << ")"
<< "format(\"" << format << "\");" << "format(\"" << css_font_format << "\");"
<< "}" // end of @font-face << "}" // end of @font-face
<< "." << CSS::FONT_FAMILY_CN << info.id << "{" << "." << CSS::FONT_FAMILY_CN << info.id << "{"
<< "font-family:" << CSS::FONT_FAMILY_CN << info.id << ";" << "font-family:" << CSS::FONT_FAMILY_CN << info.id << ";"

View File

@ -42,7 +42,7 @@ struct Param
// fonts // fonts
int embed_external_font; int embed_external_font;
std::string font_suffix; std::string font_format;
int decompose_ligature; int decompose_ligature;
int auto_hint; int auto_hint;
std::string external_hint_tool; std::string external_hint_tool;

View File

@ -23,6 +23,11 @@
#include <GlobalParams.h> #include <GlobalParams.h>
#include "pdf2htmlEX-config.h" #include "pdf2htmlEX-config.h"
#if ENABLE_SVG
#include <cairo.h>
#endif
#include "ArgParser.h" #include "ArgParser.h"
#include "Param.h" #include "Param.h"
#include "HTMLRenderer/HTMLRenderer.h" #include "HTMLRenderer/HTMLRenderer.h"
@ -46,11 +51,28 @@ void show_usage_and_exit(const char * dummy = nullptr)
void show_version_and_exit(const char * dummy = nullptr) void show_version_and_exit(const char * dummy = nullptr)
{ {
cerr << "pdf2htmlEX version " << PDF2HTMLEX_VERSION << endl; cerr << "pdf2htmlEX version " << PDF2HTMLEX_VERSION << endl;
cerr << "Copyright 2012,2013 Lu Wang <coolwanglu@gmail.com>" << endl; cerr << "Copyright 2012,2013 Lu Wang <coolwanglu@gmail.com> and other contributers" << endl;
cerr << endl;
cerr << "Libraries: "; cerr << "Libraries: ";
cerr << "poppler " << POPPLER_VERSION << ", "; cerr << "poppler " << POPPLER_VERSION << ", ";
cerr << "libfontforge " << ffw_get_version() << endl; cerr << "libfontforge " << ffw_get_version() << endl;
#if ENABLE_SVG
cerr << "cairo " << cairo_version_string() << endl;
#endif
cerr << endl;
cerr << "Default data-dir: " << PDF2HTMLEX_DATA_PATH << endl; cerr << "Default data-dir: " << PDF2HTMLEX_DATA_PATH << endl;
cerr << "Supported image format:";
#ifdef ENABLE_LIBPNG
cerr << " png";
#endif
#ifdef ENABLE_LIBJPEG
cerr << " jpg";
#endif
#if ENABLE_SVG
cerr << " svg";
#endif
cerr << endl;
cerr << endl;
exit(EXIT_SUCCESS); exit(EXIT_SUCCESS);
} }
@ -113,7 +135,7 @@ void parse_options (int argc, char **argv)
// fonts // fonts
.add("embed-external-font", &param.embed_external_font, 1, "embed local match for external fonts") .add("embed-external-font", &param.embed_external_font, 1, "embed local match for external fonts")
.add("font-suffix", &param.font_suffix, ".ttf", "suffix for embedded font files (.ttf,.otf,.woff,.svg)") .add("font-format", &param.font_format, "ttf", "suffix for embedded font files (ttf,otf,woff,svg)")
.add("decompose-ligature", &param.decompose_ligature, 0, "decompose ligatures, such as \uFB01 -> fi") .add("decompose-ligature", &param.decompose_ligature, 0, "decompose ligatures, such as \uFB01 -> fi")
.add("auto-hint", &param.auto_hint, 0, "use fontforge autohint on fonts without hints") .add("auto-hint", &param.auto_hint, 0, "use fontforge autohint on fonts without hints")
.add("external-hint-tool", &param.external_hint_tool, "", "external tool for hinting fonts (overrides --auto-hint)") .add("external-hint-tool", &param.external_hint_tool, "", "external tool for hinting fonts (overrides --auto-hint)")
@ -253,20 +275,20 @@ void check_param()
param.outline_filename = s + ".outline"; param.outline_filename = s + ".outline";
} }
} }
if(param.bg_format == "svg")
{ if(false) { }
#if not ENABLE_SVG #ifdef ENABLE_LIBPNG
cerr << "SVG support is not built" << endl; else if (param.bg_format == "png") { }
exit(EXIT_FAILURE); #endif
#ifdef ENABLE_LIBJPEG
else if (param.bg_format == "jpg") { }
#endif
#if not ENABLE_SVG
else if(param.bg_format == "svg") { }
#endif #endif
}
else if (param.bg_format == "png")
{
// pass
}
else else
{ {
cerr << "Unknown format for background: " << param.bg_format << endl; cerr << "Image format not supported: " << param.bg_format << endl;
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
} }

View File

@ -23,15 +23,26 @@ const map<string, string> GB_ENCODED_FONT_NAME_MAP({
}); });
const std::map<std::string, EmbedStringEntry> EMBED_STRING_MAP({ const std::map<std::string, EmbedStringEntry> EMBED_STRING_MAP({
{".css", {&Param::embed_css, {".css", {&Param::embed_css,
"<style type=\"text/css\">", "<style type=\"text/css\">",
"</style>", "</style>",
"<link rel=\"stylesheet\" type=\"text/css\" href=\"", "<link rel=\"stylesheet\" type=\"text/css\" href=\"",
"\"/>" }}, "\"/>" }},
{".js", {&Param::embed_javascript, {".js", {&Param::embed_javascript,
"<script type=\"text/javascript\">", "<script type=\"text/javascript\">",
"</script>", "</script>",
"<script type=\"text/javascript\" src=\"", "<script type=\"text/javascript\" src=\"",
"\"></script>" }} "\"></script>" }}
}); });
const std::map<std::string, std::string> FORMAT_MIME_TYPE_MAP({
{"eot", "application/vnd.ms-fontobject"},
{"jpg", "image/jpeg"},
{"otf", "appilcation/x-font-otf"},
{"png", "image/png"},
{"svg", "image/svg+xml"},
{"ttf", "application/x-font-ttf"},
{"woff", "application/font-woff"},
});
} //namespace pdf2htmlEX } //namespace pdf2htmlEX

View File

@ -38,6 +38,8 @@ struct EmbedStringEntry
}; };
extern const std::map<std::string, EmbedStringEntry> EMBED_STRING_MAP; extern const std::map<std::string, EmbedStringEntry> EMBED_STRING_MAP;
extern const std::map<std::string, std::string> FORMAT_MIME_TYPE_MAP;
} // namespace pdf2htmlEX } // namespace pdf2htmlEX
#endif //CONST_H__ #endif //CONST_H__