1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 04:50:09 +00:00

new option: --bg-format

This commit is contained in:
Lu Wang 2013-09-18 18:01:56 +08:00
parent bcadc6be6a
commit dd9d21cb19
13 changed files with 203 additions and 156 deletions

View File

@ -181,6 +181,7 @@ set(PDF2HTMLEX_SRC ${PDF2HTMLEX_SRC}
src/HTMLRenderer/state.cc
src/HTMLRenderer/text.cc
src/BackgroundRenderer/BackgroundRenderer.h
src/BackgroundRenderer/BackgroundRenderer.cc
src/BackgroundRenderer/SplashBackgroundRenderer.h
src/BackgroundRenderer/SplashBackgroundRenderer.cc
src/BackgroundRenderer/CairoBackgroundRenderer.h

View File

@ -223,6 +223,12 @@ If set to 0, pdf2htmlEX would try its best to balance the two methods above.
.B --optimize-text <0|1> (Deafult: 0)
If set to 1, pdf2htmlEX will try to reduce the number of HTML elements used for text. Turn it off if anything goes wrong.
.SS Background Image
.TP
.B --bg-format <format> (Default: "png")
Specify the format for background images, currently "png" and "svg" are supported.
.SS PDF Protection
.TP

View File

@ -0,0 +1,39 @@
/*
* Background renderer
* Render all those things not supported as Image
*
* Copyright (C) 2013 Lu Wang <coolwanglu@gmail.com>
*/
#include "HTMLRenderer/HTMLRenderer.h"
#include "Param.h"
#include "BackgroundRenderer.h"
#include "SplashBackgroundRenderer.h"
#if ENABLE_SVG
#include "CairoBackgroundRenderer.h"
#endif
namespace pdf2htmlEX {
BackgroundRenderer * BackgroundRenderer::getBackgroundRenderer(const std::string & format, HTMLRenderer * html_renderer, const Param & param)
{
if(format == "png")
{
return new SplashBackgroundRenderer(html_renderer, param);
}
else if (format == "svg")
{
#if ENABLE_SVG
return new CairoBackgroundRenderer(html_renderer, param);
#else
return nullptr;
#endif
}
else
{
return nullptr;
}
}
} // namespace pdf2htmlEX

View File

@ -2,31 +2,36 @@
* Background renderer
* Render all those things not supported as Image
*
* Copyright (C) 2012 Lu Wang <coolwanglu@gmail.com>
* Copyright (C) 2012,2013 Lu Wang <coolwanglu@gmail.com>
*/
#ifndef BACKGROUND_RENDERER_H__
#define BACKGROUND_RENDERER_H__
#include "pdf2htmlEX-config.h"
#include <string>
#if ENABLE_SVG
#include "CairoBackgroundRenderer.h"
class PDFDoc;
namespace pdf2htmlEX {
typedef CairoBackgroundRenderer BackgroundRenderer;
}
#else
class Param;
class HTMLRenderer;
class BackgroundRenderer
{
public:
// return nullptr upon failure
static BackgroundRenderer * getBackgroundRenderer(const std::string & format, HTMLRenderer * html_renderer, const Param & param);
#include "SplashBackgroundRenderer.h"
BackgroundRenderer() {}
virtual ~BackgroundRenderer() {}
namespace pdf2htmlEX {
typedef SplashBackgroundRenderer BackgroundRenderer;
}
virtual void init(PDFDoc * doc) = 0;
virtual void render_page(PDFDoc * doc, int pageno) = 0;
virtual void embed_image(int pageno) = 0;
#endif // ENABLE_SVG
};
} // namespace pdf2htmlEX
#endif //BACKGROUND_RENDERER_H__

View File

@ -1,29 +0,0 @@
/*
* Background renderer
* Render all those things not supported as Image
*
* Copyright (C) 2013 Lu Wang <coolwanglu@gmail.com>
*/
/*
* This file includes forward declarations since HTMLRenderer and BackgroundRendere have cross references
*/
#ifndef BACKGROUND_RENDERER_FORWARD_H__
#define BACKGROUND_RENDERER_FORWARD_H__
#include "pdf2htmlEX-config.h"
namespace pdf2htmlEX {
#if ENABLE_SVG
class CairoBackgroundRenderer;
typedef CairoBackgroundRenderer BackgroundRenderer;
#else
class SplashBackgroundRenderer;
typedef SplashBackgroundRenderer BackgroundRenderer;
#endif // ENABLE_SVG
}
#endif //BACKGROUND_RENDERER_FORWARD_H__

View File

@ -1,7 +1,7 @@
/*
* CairoBackgroundRenderer.cc
*
* Copyright (C) 2012 Lu Wang <coolwanglu@gmail.com>
* Copyright (C) 2012,2013 Lu Wang <coolwanglu@gmail.com>
*/
#include <string>
@ -29,6 +29,11 @@ void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y,
// CairoOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen);
}
void CairoBackgroundRenderer::init(PDFDoc * doc)
{
startDoc(doc);
}
static GBool annot_cb(Annot *, void *) {
return false;
};

View File

@ -2,7 +2,7 @@
* Cairo Background renderer
* Render all those things not supported as Image, with Cairo
*
* Copyright (C) 2012 Lu Wang <coolwanglu@gmail.com>
* Copyright (C) 2012,2013 Lu Wang <coolwanglu@gmail.com>
*/
@ -21,7 +21,7 @@
namespace pdf2htmlEX {
// Based on BackgroundRenderer from poppler
class CairoBackgroundRenderer : public CairoOutputDev
class CairoBackgroundRenderer : public BackgroundRenderer, CairoOutputDev
{
public:
CairoBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param)
@ -33,14 +33,15 @@ public:
virtual ~CairoBackgroundRenderer() { }
virtual void init(PDFDoc * doc);
virtual void render_page(PDFDoc * doc, int pageno);
virtual void embed_image(int pageno);
virtual void drawChar(GfxState *state, double x, double y,
double dx, double dy,
double originX, double originY,
CharCode code, int nBytes, Unicode *u, int uLen);
void render_page(PDFDoc * doc, int pageno);
void embed_image(int pageno);
protected:
HTMLRenderer * html_renderer;
const Param & param;

View File

@ -1,11 +1,12 @@
/*
* SplashBackgroundRenderer.cc
*
* Copyright (C) 2012 Lu Wang <coolwanglu@gmail.com>
* Copyright (C) 2012,2013 Lu Wang <coolwanglu@gmail.com>
*/
#include <fstream>
#include <vector>
#include <memory>
#include <PDFDoc.h>
#include <goo/PNGWriter.h>
@ -19,6 +20,7 @@ namespace pdf2htmlEX {
using std::string;
using std::ifstream;
using std::vector;
using std::unique_ptr;
const SplashColor SplashBackgroundRenderer::white = {255,255,255};
@ -63,6 +65,11 @@ void SplashBackgroundRenderer::drawChar(GfxState *state, double x, double y,
}
}
void SplashBackgroundRenderer::init(PDFDoc * doc)
{
startDoc(doc);
}
static GBool annot_cb(Annot *, void *) {
return false;
};
@ -134,7 +141,8 @@ void SplashBackgroundRenderer::dump_image(const char * filename, int x1, int y1,
if(!f)
throw string("Cannot open file for background image " ) + filename;
ImgWriter * writer = new PNGWriter();
// use unique_ptr to auto delete the object upon exception
auto writer = unique_ptr<ImgWriter>(new PNGWriter);
if(!writer->init(f, width, height, param.h_dpi, param.v_dpi))
throw "Cannot initialize PNGWriter";
@ -157,7 +165,6 @@ void SplashBackgroundRenderer::dump_image(const char * filename, int x1, int y1,
throw "Cannot write background image";
}
delete writer;
fclose(f);
}

View File

@ -2,8 +2,7 @@
* Splash Background renderer
* Render all those things not supported as Image, with Splash
*
* by WangLu
* 2012.08.06
* Copyright (C) 2012,2013 Lu Wang <coolwanglu@gmail.com>
*/
@ -23,7 +22,7 @@
namespace pdf2htmlEX {
// Based on BackgroundRenderer from poppler
class SplashBackgroundRenderer : public SplashOutputDev
class SplashBackgroundRenderer : public BackgroundRenderer, SplashOutputDev
{
public:
static const SplashColor white;
@ -36,6 +35,10 @@ public:
virtual ~SplashBackgroundRenderer() { }
virtual void init(PDFDoc * doc);
virtual void render_page(PDFDoc * doc, int pageno);
virtual void embed_image(int pageno);
#if POPPLER_OLDER_THAN_0_23_0
virtual void startPage(int pageNum, GfxState *state);
#else
@ -57,11 +60,8 @@ public:
SplashOutputDev::fill(state);
}
void render_page(PDFDoc * doc, int pageno);
void embed_image(int pageno);
void dump_image(const char * filename, int x1, int y1, int x2, int y2);
protected:
void dump_image(const char * filename, int x1, int y1, int x2, int y2);
HTMLRenderer * html_renderer;
const Param & param;
};

View File

@ -30,7 +30,7 @@
#include "StateManager.h"
#include "HTMLTextPage.h"
#include "BackgroundRenderer/BackgroundRenderer_forward.h"
#include "BackgroundRenderer/BackgroundRenderer.h"
#include "util/const.h"
#include "util/misc.h"
@ -321,8 +321,10 @@ protected:
StringFormatter str_fmt;
// render background image
friend class SplashBackgroundRenderer;
friend class CairoBackgroundRenderer;
friend class SplashBackgroundRenderer; // ugly!
#if ENABLE_SVG
friend class CairoBackgroundRenderer; // ugly!
#endif
BackgroundRenderer * bg_renderer;

View File

@ -100,8 +100,10 @@ void HTMLRenderer::process(PDFDoc *doc)
bg_renderer = nullptr;
if(param.process_nontext)
{
bg_renderer = new BackgroundRenderer(this, param);
bg_renderer->startDoc(doc);
bg_renderer = BackgroundRenderer::getBackgroundRenderer(param.bg_format, this, param);
if(!bg_renderer)
throw "Cannot initialize background renderer, unsupported format";
bg_renderer->init(doc);
}
int page_count = (param.last_page - param.first_page + 1);

View File

@ -57,6 +57,9 @@ struct Param
int space_as_offset;
int tounicode;
int optimize_text;
// background image
std::string bg_format;
// encryption
std::string owner_password, user_password;
@ -67,9 +70,7 @@ struct Param
std::string data_dir;
int css_draw;
int debug;
int wa_unicode;
// non-optional
std::string input_filename, output_filename;
// not a paramater

View File

@ -36,18 +36,6 @@ using namespace pdf2htmlEX;
Param param;
ArgParser argparser;
void deprecated_single_html(const char * dummy = nullptr)
{
cerr << "--single_html is deprecated. Use `--embed CFIJO` instead." << endl;
exit(EXIT_FAILURE);
}
void removed_remove_unsed_glyph(const char * dummy = nullptr)
{
cerr << "--remove-unsed-glyph is removed. Use a PDF optimization tool instead." << endl;
exit(EXIT_FAILURE);
}
void show_usage_and_exit(const char * dummy = nullptr)
{
cerr << "Usage: pdf2htmlEX [options] <input.pdf> [<output.html>]" << endl;
@ -141,6 +129,9 @@ void parse_options (int argc, char **argv)
.add("space-as-offset", &param.space_as_offset, 0, "treat space characters as offsets")
.add("tounicode", &param.tounicode, 0, "how to handle ToUnicode CMaps (0=auto, 1=force, -1=ignore)")
.add("optimize-text", &param.optimize_text, 0, "try to reduce the number of HTML elements used for text")
// background image
.add("bg-format", &param.bg_format, "png", "specify background image format")
// encryption
.add("owner-password,o", &param.owner_password, "", "owner password (for encrypted files)", true)
@ -158,10 +149,6 @@ void parse_options (int argc, char **argv)
.add("version,v", "print copyright and version info", &show_version_and_exit)
.add("help,h", "print usage information", &show_usage_and_exit)
// deprecated
.add("single-html", "", &deprecated_single_html)
.add("remove-unused-glyph", "", &removed_remove_unsed_glyph)
.add("", &param.input_filename, "", "")
.add("", &param.output_filename, "", "")
;
@ -192,14 +179,103 @@ void parse_options (int argc, char **argv)
}
}
int main(int argc, char **argv)
void check_param()
{
parse_options(argc, argv);
if (param.input_filename == "")
{
show_usage_and_exit();
}
if(param.output_filename.empty())
{
const string s = get_filename(param.input_filename);
if(get_suffix(param.input_filename) == ".pdf")
{
param.output_filename = s.substr(0, s.size() - 4) + ".html";
}
else
{
param.output_filename = s + ".html";
}
}
if(param.page_filename.empty())
{
const string s = get_filename(param.input_filename);
if(get_suffix(param.input_filename) == ".pdf")
{
param.page_filename = s.substr(0, s.size() - 4) + "%d.page";
}
else
{
param.page_filename = s + "%d.page";
}
sanitize_filename(param.page_filename);
}
else
{
// Need to make sure we have a page number placeholder in the filename
if(!sanitize_filename(param.page_filename))
{
// Inject the placeholder just before the file extension
const string suffix = get_suffix(param.page_filename);
param.page_filename = param.page_filename.substr(0, param.page_filename.size() - suffix.size()) + "%d" + suffix;
sanitize_filename(param.page_filename);
}
}
if(param.css_filename.empty())
{
const string s = get_filename(param.input_filename);
if(get_suffix(param.input_filename) == ".pdf")
{
param.css_filename = s.substr(0, s.size() - 4) + ".css";
}
else
{
if(!param.split_pages)
param.css_filename = s + ".css";
}
}
if(param.outline_filename.empty())
{
const string s = get_filename(param.input_filename);
if(get_suffix(param.input_filename) == ".pdf")
{
param.outline_filename = s.substr(0, s.size() - 4) + ".outline";
}
else
{
if(!param.split_pages)
param.outline_filename = s + ".outline";
}
}
if(param.bg_format == "svg")
{
#if not ENABLE_SVG
cerr << "SVG support is not built" << endl;
exit(EXIT_FAILURE);
#endif
}
else if (param.bg_format == "png")
{
// pass
}
else
{
cerr << "Unknown format for background: " << param.bg_format << endl;
exit(EXIT_FAILURE);
}
}
int main(int argc, char **argv)
{
parse_options(argc, argv);
check_param();
//prepare the directories
{
char buf[] = "/tmp/pdf2htmlEX-XXXXXX";
@ -243,89 +319,20 @@ int main(int argc, char **argv)
delete ownerPW;
}
if (!doc->isOk()) {
if (!doc->isOk())
throw "Cannot read the file";
}
// check for copy permission
if (!doc->okToCopy()) {
if (param.no_drm == 0) {
if (!doc->okToCopy())
{
if (param.no_drm == 0)
throw "Copying of text from this document is not allowed.";
}
cerr << "Document has copy-protection bit set." << endl;
}
param.first_page = min<int>(max<int>(param.first_page, 1), doc->getNumPages());
param.last_page = min<int>(max<int>(param.last_page, param.first_page), doc->getNumPages());
if(param.output_filename.empty())
{
const string s = get_filename(param.input_filename);
if(get_suffix(param.input_filename) == ".pdf")
{
param.output_filename = s.substr(0, s.size() - 4) + ".html";
}
else
{
param.output_filename = s + ".html";
}
}
if(param.page_filename.empty())
{
const string s = get_filename(param.input_filename);
if(get_suffix(param.input_filename) == ".pdf")
{
param.page_filename = s.substr(0, s.size() - 4) + "%d.page";
}
else
{
param.page_filename = s + "%d.page";
}
sanitize_filename(param.page_filename);
}
else
{
// Need to make sure we have a page number placeholder in the filename
if(!sanitize_filename(param.page_filename))
{
// Inject the placeholder just before the file extension
const string suffix = get_suffix(param.page_filename);
param.page_filename = param.page_filename.substr(0, param.page_filename.size() - suffix.size()) + "%d" + suffix;
sanitize_filename(param.page_filename);
}
}
if(param.css_filename.empty())
{
const string s = get_filename(param.input_filename);
if(get_suffix(param.input_filename) == ".pdf")
{
param.css_filename = s.substr(0, s.size() - 4) + ".css";
}
else
{
if(!param.split_pages)
param.css_filename = s + ".css";
}
}
if(param.outline_filename.empty())
{
const string s = get_filename(param.input_filename);
if(get_suffix(param.input_filename) == ".pdf")
{
param.outline_filename = s.substr(0, s.size() - 4) + ".outline";
}
else
{
if(!param.split_pages)
param.outline_filename = s + ".outline";
}
}
unique_ptr<HTMLRenderer>(new HTMLRenderer(param))->process(doc);