1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 13:00:08 +00:00

support jpeg

This commit is contained in:
Lu Wang 2013-09-18 20:24:48 +08:00
parent b4bf67629c
commit f04941bed0
10 changed files with 168 additions and 74 deletions

View File

@ -68,7 +68,7 @@ Read `LICENSE` for more detail.
### Contacts
* Lu Wang <coolwanglu@gmail.com> or [coolwanglu@twitter](https://twitter.com/coolwanglu)
* 王璐 (Lu Wang) <coolwanglu@gmail.com> or [coolwanglu@twitter](https://twitter.com/coolwanglu)
- For general and person questions
- If you want to report an issue, please refer to [How to report](https://github.com/coolwanglu/pdf2htmlEX/wiki/How-to-report)
- Accepting messages in **中文**, **English** or **日本語**

View File

@ -151,8 +151,8 @@ Specify whether the local matched fonts, for fonts not embedded in PDF, should b
If this switch is off, only font names are exported such that web browsers may try to find proper fonts themselves, and that might cause issues about incorrect font metrics.
.TP
.B --font-suffix <suffix> (Default: .ttf)
Specify the suffix of fonts extracted from the PDF file.
.B --font-format <format> (Default: ttf)
Specify the format of fonts extracted from the PDF file.
.TP
.B --decompose-ligature <0|1> (Default: 0)
@ -227,7 +227,13 @@ If set to 1, pdf2htmlEX will try to reduce the number of HTML elements used for
.TP
.B --bg-format <format> (Default: "png")
Specify the format for background images, currently "png" and "svg" are supported.
Specify the format for background images, run `pdf2htmlEX -v` to check all supported formats.
.SS Background Image
.TP
.B --bg-format <format> (Default: png)
Specify the background image format. Run `pdf2htmlEX -v` to check all supported formats.
.SS PDF Protection

View File

@ -5,6 +5,8 @@
* Copyright (C) 2013 Lu Wang <coolwanglu@gmail.com>
*/
#include <poppler-config.h>
#include "HTMLRenderer/HTMLRenderer.h"
#include "Param.h"
@ -18,22 +20,26 @@ namespace pdf2htmlEX {
BackgroundRenderer * BackgroundRenderer::getBackgroundRenderer(const std::string & format, HTMLRenderer * html_renderer, const Param & param)
{
#ifdef ENABLE_LIBPNG
if(format == "png")
{
return new SplashBackgroundRenderer(html_renderer, param);
}
else if (format == "svg")
{
#if ENABLE_SVG
return new CairoBackgroundRenderer(html_renderer, param);
#else
return nullptr;
#endif
}
else
#ifdef ENABLE_LIBJPEG
if(format == "jpg")
{
return nullptr;
return new SplashBackgroundRenderer(html_renderer, param);
}
#endif
#if ENABLE_SVG
if (format == "svg")
{
return new CairoBackgroundRenderer(html_renderer, param);
}
#endif
return nullptr;
}
} // namespace pdf2htmlEX

View File

@ -26,7 +26,21 @@ void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y,
double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen)
{
// CairoOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen);
// draw characters as image when
// - in fallback mode
// - OR there is special filling method
// - OR using a writing mode font
// - OR using a Type 3 font
if((param.fallback)
|| ( (state->getFont())
&& ( (state->getFont()->getWMode())
|| (state->getFont()->getType() == fontType3)
)
)
)
{
CairoOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code,nBytes,u,uLen);
}
}
void CairoBackgroundRenderer::init(PDFDoc * doc)
@ -58,7 +72,7 @@ void CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
if(param.embed_image)
html_renderer->tmp_files.add((char*)fn);
surface = cairo_svg_surface_create((char*)fn, page_width, page_height);
surface = cairo_svg_surface_create((char*)fn, page_width * DEFAULT_DPI, page_height * DEFAULT_DPI);
}
cairo_svg_surface_restrict_to_version(surface, CAIRO_SVG_VERSION_1_2);
cairo_surface_set_fallback_resolution(surface, param.h_dpi, param.v_dpi);
@ -68,7 +82,13 @@ void CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
setPrinting(false); // TODO, check the parameter
cairo_save(cr);
// TODO apply crop box
// zoom the image to prevent CairoOutputDev from rounding/increasing thin borders
{
cairo_matrix_t matrix;
cairo_matrix_init_identity(&matrix);
cairo_matrix_scale(&matrix, DEFAULT_DPI, DEFAULT_DPI);
cairo_transform(cr, &matrix);
}
doc->displayPage(this, pageno, DEFAULT_DPI, DEFAULT_DPI,
0,

View File

@ -8,10 +8,13 @@
#include <vector>
#include <memory>
#include <poppler-config.h>
#include <PDFDoc.h>
#include <goo/PNGWriter.h>
#include <goo/JpegWriter.h>
#include "Base64Stream.h"
#include "util/const.h"
#include "SplashBackgroundRenderer.h"
@ -31,13 +34,11 @@ const SplashColor SplashBackgroundRenderer::white = {255,255,255};
*/
#if POPPLER_OLDER_THAN_0_23_0
void SplashBackgroundRenderer::startPage(int pageNum, GfxState *state)
#else
void SplashBackgroundRenderer::startPage(int pageNum, GfxState *state, XRef *xrefA)
#endif
{
#if POPPLER_OLDER_THAN_0_23_0
SplashOutputDev::startPage(pageNum, state);
#else
void SplashBackgroundRenderer::startPage(int pageNum, GfxState *state, XRef *xrefA)
{
SplashOutputDev::startPage(pageNum, state, xrefA);
#endif
clearModRegion();
@ -93,7 +94,7 @@ void SplashBackgroundRenderer::embed_image(int pageno)
if((xmin <= xmax) && (ymin <= ymax))
{
{
auto fn = html_renderer->str_fmt("%s/bg%x.png", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno);
auto fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, param.bg_format.c_str());
if(param.embed_image)
html_renderer->tmp_files.add((char*)fn);
@ -115,11 +116,17 @@ void SplashBackgroundRenderer::embed_image(int pageno)
if(param.embed_image)
{
auto path = html_renderer->str_fmt("%s/bg%x.png", param.tmp_dir.c_str(), pageno);
auto path = html_renderer->str_fmt("%s/bg%x.%s", param.tmp_dir.c_str(), pageno, param.bg_format.c_str());
ifstream fin((char*)path, ifstream::binary);
if(!fin)
throw string("Cannot read background image ") + (char*)path;
f_page << "data:image/png;base64," << Base64Stream(fin);
auto iter = FORMAT_MIME_TYPE_MAP.find(param.bg_format);
if(iter == FORMAT_MIME_TYPE_MAP.end())
throw string("Image format not supported: ") + param.bg_format;
string mime_type = iter->second;
f_page << "data:" << mime_type << ";base64," << Base64Stream(fin);
}
else
{
@ -142,7 +149,26 @@ void SplashBackgroundRenderer::dump_image(const char * filename, int x1, int y1,
throw string("Cannot open file for background image " ) + filename;
// use unique_ptr to auto delete the object upon exception
auto writer = unique_ptr<ImgWriter>(new PNGWriter);
unique_ptr<ImgWriter> writer;
if(false) { }
#ifdef ENABLE_LIBPNG
else if(param.bg_format == "png")
{
writer = unique_ptr<ImgWriter>(new PNGWriter);
}
#endif
#ifdef ENABLE_LIBJPEG
else if(param.bg_format == "jpg")
{
writer = unique_ptr<ImgWriter>(new JpegWriter);
}
#endif
else
{
throw string("Image format not supported: ") + param.bg_format;
}
if(!writer->init(f, width, height, param.h_dpi, param.v_dpi))
throw "Cannot initialize PNGWriter";

View File

@ -178,7 +178,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
if(param.debug)
{
auto fn = str_fmt("%s/__raw_font_%llx", param.tmp_dir.c_str(), info.id, param.font_suffix.c_str());
auto fn = str_fmt("%s/__raw_font_%llx%s", param.tmp_dir.c_str(), info.id, get_suffix(filepath).c_str());
tmp_files.add((char*)fn);
ofstream((char*)fn, ofstream::binary) << ifstream(filepath).rdbuf();
}
@ -528,9 +528,9 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
*/
ffw_reencode_unicode_full();
string cur_tmp_fn = (char*)str_fmt("%s/__tmp_font1%s", param.tmp_dir.c_str(), param.font_suffix.c_str());
string cur_tmp_fn = (char*)str_fmt("%s/__tmp_font1.%s", param.tmp_dir.c_str(), param.font_format.c_str());
tmp_files.add(cur_tmp_fn);
string other_tmp_fn = (char*)str_fmt("%s/__tmp_font2%s", param.tmp_dir.c_str(), param.font_suffix.c_str());
string other_tmp_fn = (char*)str_fmt("%s/__tmp_font2.%s", param.tmp_dir.c_str(), param.font_format.c_str());
tmp_files.add(other_tmp_fn);
ffw_save(cur_tmp_fn.c_str());
@ -571,9 +571,9 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
* Ascent/Descent are not used in PDF, and the values in PDF may be wrong or inconsistent (there are 3 sets of them)
* We need to reload in order to retrieve/fix accurate ascent/descent, some info won't be written to the font by fontforge until saved.
*/
string fn = (char*)str_fmt("%s/f%llx%s",
string fn = (char*)str_fmt("%s/f%llx.%s",
(param.embed_font ? param.tmp_dir : param.dest_dir).c_str(),
info.id, param.font_suffix.c_str());
info.id, param.font_format.c_str());
if(param.embed_font)
tmp_files.add(fn);
@ -685,7 +685,7 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, FontInfo & info)
if(path != "")
{
embed_font(path, font, info);
export_remote_font(info, param.font_suffix, font);
export_remote_font(info, param.font_format, font);
}
else
{
@ -712,7 +712,7 @@ void HTMLRenderer::install_external_font(GfxFont * font, FontInfo & info)
if(localfontloc != nullptr)
{
embed_font(string(localfontloc->path->getCString()), font, info);
export_remote_font(info, param.font_suffix, font);
export_remote_font(info, param.font_format, font);
delete localfontloc;
return;
}
@ -739,45 +739,46 @@ void HTMLRenderer::install_external_font(GfxFont * font, FontInfo & info)
export_local_font(info, font, fontname, "");
}
void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suffix, GfxFont * font)
void HTMLRenderer::export_remote_font(const FontInfo & info, const string & format, GfxFont * font)
{
string mime_type, format;
if(suffix == ".ttf")
string css_font_format;
if(format == "ttf")
{
format = "truetype";
mime_type = "application/x-font-ttf";
css_font_format = "truetype";
}
else if(suffix == ".otf")
else if(format == "otf")
{
format = "opentype";
mime_type = "application/x-font-otf";
css_font_format = "opentype";
}
else if(suffix == ".woff")
else if(format == "woff")
{
format = "woff";
mime_type = "application/font-woff";
css_font_format = "woff";
}
else if(suffix == ".eot")
else if(format == "eot")
{
format = "embedded-opentype";
mime_type = "application/vnd.ms-fontobject";
css_font_format = "embedded-opentype";
}
else if(suffix == ".svg")
else if(format == "svg")
{
format = "svg";
mime_type = "image/svg+xml";
css_font_format = "svg";
}
else
{
cerr << "Warning: unknown font suffix: " << suffix << endl;
throw string("Warning: unknown font format: ") + format;
}
auto iter = FORMAT_MIME_TYPE_MAP.find(format);
if(iter == FORMAT_MIME_TYPE_MAP.end())
{
throw string("Warning: unknown font format: ") + format;
}
string mime_type = iter->second;
f_css.fs << "@font-face{"
<< "font-family:" << CSS::FONT_FAMILY_CN << info.id << ";"
<< "src:url(";
{
auto fn = str_fmt("f%llx%s", info.id, suffix.c_str());
auto fn = str_fmt("f%llx.%s", info.id, format.c_str());
if(param.embed_font)
{
auto path = param.tmp_dir + "/" + (char*)fn;
@ -793,7 +794,7 @@ void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suff
}
f_css.fs << ")"
<< "format(\"" << format << "\");"
<< "format(\"" << css_font_format << "\");"
<< "}" // end of @font-face
<< "." << CSS::FONT_FAMILY_CN << info.id << "{"
<< "font-family:" << CSS::FONT_FAMILY_CN << info.id << ";"

View File

@ -42,7 +42,7 @@ struct Param
// fonts
int embed_external_font;
std::string font_suffix;
std::string font_format;
int decompose_ligature;
int auto_hint;
std::string external_hint_tool;

View File

@ -23,6 +23,11 @@
#include <GlobalParams.h>
#include "pdf2htmlEX-config.h"
#if ENABLE_SVG
#include <cairo.h>
#endif
#include "ArgParser.h"
#include "Param.h"
#include "HTMLRenderer/HTMLRenderer.h"
@ -46,11 +51,28 @@ void show_usage_and_exit(const char * dummy = nullptr)
void show_version_and_exit(const char * dummy = nullptr)
{
cerr << "pdf2htmlEX version " << PDF2HTMLEX_VERSION << endl;
cerr << "Copyright 2012,2013 Lu Wang <coolwanglu@gmail.com>" << endl;
cerr << "Copyright 2012,2013 Lu Wang <coolwanglu@gmail.com> and other contributers" << endl;
cerr << endl;
cerr << "Libraries: ";
cerr << "poppler " << POPPLER_VERSION << ", ";
cerr << "libfontforge " << ffw_get_version() << endl;
#if ENABLE_SVG
cerr << "cairo " << cairo_version_string() << endl;
#endif
cerr << endl;
cerr << "Default data-dir: " << PDF2HTMLEX_DATA_PATH << endl;
cerr << "Supported image format:";
#ifdef ENABLE_LIBPNG
cerr << " png";
#endif
#ifdef ENABLE_LIBJPEG
cerr << " jpg";
#endif
#if ENABLE_SVG
cerr << " svg";
#endif
cerr << endl;
cerr << endl;
exit(EXIT_SUCCESS);
}
@ -113,7 +135,7 @@ void parse_options (int argc, char **argv)
// fonts
.add("embed-external-font", &param.embed_external_font, 1, "embed local match for external fonts")
.add("font-suffix", &param.font_suffix, ".ttf", "suffix for embedded font files (.ttf,.otf,.woff,.svg)")
.add("font-format", &param.font_format, "ttf", "suffix for embedded font files (ttf,otf,woff,svg)")
.add("decompose-ligature", &param.decompose_ligature, 0, "decompose ligatures, such as \uFB01 -> fi")
.add("auto-hint", &param.auto_hint, 0, "use fontforge autohint on fonts without hints")
.add("external-hint-tool", &param.external_hint_tool, "", "external tool for hinting fonts (overrides --auto-hint)")
@ -253,20 +275,20 @@ void check_param()
param.outline_filename = s + ".outline";
}
}
if(param.bg_format == "svg")
{
#if not ENABLE_SVG
cerr << "SVG support is not built" << endl;
exit(EXIT_FAILURE);
if(false) { }
#ifdef ENABLE_LIBPNG
else if (param.bg_format == "png") { }
#endif
#ifdef ENABLE_LIBJPEG
else if (param.bg_format == "jpg") { }
#endif
#if not ENABLE_SVG
else if(param.bg_format == "svg") { }
#endif
}
else if (param.bg_format == "png")
{
// pass
}
else
{
cerr << "Unknown format for background: " << param.bg_format << endl;
cerr << "Image format not supported: " << param.bg_format << endl;
exit(EXIT_FAILURE);
}
}

View File

@ -23,15 +23,26 @@ const map<string, string> GB_ENCODED_FONT_NAME_MAP({
});
const std::map<std::string, EmbedStringEntry> EMBED_STRING_MAP({
{".css", {&Param::embed_css,
"<style type=\"text/css\">",
"</style>",
"<link rel=\"stylesheet\" type=\"text/css\" href=\"",
"\"/>" }},
{".js", {&Param::embed_javascript,
"<script type=\"text/javascript\">",
"</script>",
"<script type=\"text/javascript\" src=\"",
"\"></script>" }}
{".css", {&Param::embed_css,
"<style type=\"text/css\">",
"</style>",
"<link rel=\"stylesheet\" type=\"text/css\" href=\"",
"\"/>" }},
{".js", {&Param::embed_javascript,
"<script type=\"text/javascript\">",
"</script>",
"<script type=\"text/javascript\" src=\"",
"\"></script>" }}
});
const std::map<std::string, std::string> FORMAT_MIME_TYPE_MAP({
{"eot", "application/vnd.ms-fontobject"},
{"jpg", "image/jpeg"},
{"otf", "appilcation/x-font-otf"},
{"png", "image/png"},
{"svg", "image/svg+xml"},
{"ttf", "application/x-font-ttf"},
{"woff", "application/font-woff"},
});
} //namespace pdf2htmlEX

View File

@ -38,6 +38,8 @@ struct EmbedStringEntry
};
extern const std::map<std::string, EmbedStringEntry> EMBED_STRING_MAP;
extern const std::map<std::string, std::string> FORMAT_MIME_TYPE_MAP;
} // namespace pdf2htmlEX
#endif //CONST_H__