mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 04:50:09 +00:00
new option: --bg-format
This commit is contained in:
parent
bcadc6be6a
commit
dd9d21cb19
@ -181,6 +181,7 @@ set(PDF2HTMLEX_SRC ${PDF2HTMLEX_SRC}
|
||||
src/HTMLRenderer/state.cc
|
||||
src/HTMLRenderer/text.cc
|
||||
src/BackgroundRenderer/BackgroundRenderer.h
|
||||
src/BackgroundRenderer/BackgroundRenderer.cc
|
||||
src/BackgroundRenderer/SplashBackgroundRenderer.h
|
||||
src/BackgroundRenderer/SplashBackgroundRenderer.cc
|
||||
src/BackgroundRenderer/CairoBackgroundRenderer.h
|
||||
|
@ -223,6 +223,12 @@ If set to 0, pdf2htmlEX would try its best to balance the two methods above.
|
||||
.B --optimize-text <0|1> (Deafult: 0)
|
||||
If set to 1, pdf2htmlEX will try to reduce the number of HTML elements used for text. Turn it off if anything goes wrong.
|
||||
|
||||
.SS Background Image
|
||||
|
||||
.TP
|
||||
.B --bg-format <format> (Default: "png")
|
||||
Specify the format for background images, currently "png" and "svg" are supported.
|
||||
|
||||
.SS PDF Protection
|
||||
|
||||
.TP
|
||||
|
39
src/BackgroundRenderer/BackgroundRenderer.cc
Normal file
39
src/BackgroundRenderer/BackgroundRenderer.cc
Normal file
@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Background renderer
|
||||
* Render all those things not supported as Image
|
||||
*
|
||||
* Copyright (C) 2013 Lu Wang <coolwanglu@gmail.com>
|
||||
*/
|
||||
|
||||
#include "HTMLRenderer/HTMLRenderer.h"
|
||||
#include "Param.h"
|
||||
|
||||
#include "BackgroundRenderer.h"
|
||||
#include "SplashBackgroundRenderer.h"
|
||||
#if ENABLE_SVG
|
||||
#include "CairoBackgroundRenderer.h"
|
||||
#endif
|
||||
|
||||
namespace pdf2htmlEX {
|
||||
|
||||
BackgroundRenderer * BackgroundRenderer::getBackgroundRenderer(const std::string & format, HTMLRenderer * html_renderer, const Param & param)
|
||||
{
|
||||
if(format == "png")
|
||||
{
|
||||
return new SplashBackgroundRenderer(html_renderer, param);
|
||||
}
|
||||
else if (format == "svg")
|
||||
{
|
||||
#if ENABLE_SVG
|
||||
return new CairoBackgroundRenderer(html_renderer, param);
|
||||
#else
|
||||
return nullptr;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace pdf2htmlEX
|
@ -2,31 +2,36 @@
|
||||
* Background renderer
|
||||
* Render all those things not supported as Image
|
||||
*
|
||||
* Copyright (C) 2012 Lu Wang <coolwanglu@gmail.com>
|
||||
* Copyright (C) 2012,2013 Lu Wang <coolwanglu@gmail.com>
|
||||
*/
|
||||
|
||||
|
||||
#ifndef BACKGROUND_RENDERER_H__
|
||||
#define BACKGROUND_RENDERER_H__
|
||||
|
||||
#include "pdf2htmlEX-config.h"
|
||||
#include <string>
|
||||
|
||||
#if ENABLE_SVG
|
||||
|
||||
#include "CairoBackgroundRenderer.h"
|
||||
class PDFDoc;
|
||||
|
||||
namespace pdf2htmlEX {
|
||||
typedef CairoBackgroundRenderer BackgroundRenderer;
|
||||
}
|
||||
|
||||
#else
|
||||
class Param;
|
||||
class HTMLRenderer;
|
||||
class BackgroundRenderer
|
||||
{
|
||||
public:
|
||||
// return nullptr upon failure
|
||||
static BackgroundRenderer * getBackgroundRenderer(const std::string & format, HTMLRenderer * html_renderer, const Param & param);
|
||||
|
||||
#include "SplashBackgroundRenderer.h"
|
||||
BackgroundRenderer() {}
|
||||
virtual ~BackgroundRenderer() {}
|
||||
|
||||
namespace pdf2htmlEX {
|
||||
typedef SplashBackgroundRenderer BackgroundRenderer;
|
||||
}
|
||||
virtual void init(PDFDoc * doc) = 0;
|
||||
virtual void render_page(PDFDoc * doc, int pageno) = 0;
|
||||
virtual void embed_image(int pageno) = 0;
|
||||
|
||||
#endif // ENABLE_SVG
|
||||
};
|
||||
|
||||
} // namespace pdf2htmlEX
|
||||
|
||||
#endif //BACKGROUND_RENDERER_H__
|
||||
|
@ -1,29 +0,0 @@
|
||||
/*
|
||||
* Background renderer
|
||||
* Render all those things not supported as Image
|
||||
*
|
||||
* Copyright (C) 2013 Lu Wang <coolwanglu@gmail.com>
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file includes forward declarations since HTMLRenderer and BackgroundRendere have cross references
|
||||
*/
|
||||
|
||||
#ifndef BACKGROUND_RENDERER_FORWARD_H__
|
||||
#define BACKGROUND_RENDERER_FORWARD_H__
|
||||
|
||||
#include "pdf2htmlEX-config.h"
|
||||
|
||||
namespace pdf2htmlEX {
|
||||
#if ENABLE_SVG
|
||||
class CairoBackgroundRenderer;
|
||||
typedef CairoBackgroundRenderer BackgroundRenderer;
|
||||
#else
|
||||
class SplashBackgroundRenderer;
|
||||
typedef SplashBackgroundRenderer BackgroundRenderer;
|
||||
#endif // ENABLE_SVG
|
||||
}
|
||||
|
||||
|
||||
#endif //BACKGROUND_RENDERER_FORWARD_H__
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* CairoBackgroundRenderer.cc
|
||||
*
|
||||
* Copyright (C) 2012 Lu Wang <coolwanglu@gmail.com>
|
||||
* Copyright (C) 2012,2013 Lu Wang <coolwanglu@gmail.com>
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
@ -29,6 +29,11 @@ void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y,
|
||||
// CairoOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen);
|
||||
}
|
||||
|
||||
void CairoBackgroundRenderer::init(PDFDoc * doc)
|
||||
{
|
||||
startDoc(doc);
|
||||
}
|
||||
|
||||
static GBool annot_cb(Annot *, void *) {
|
||||
return false;
|
||||
};
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Cairo Background renderer
|
||||
* Render all those things not supported as Image, with Cairo
|
||||
*
|
||||
* Copyright (C) 2012 Lu Wang <coolwanglu@gmail.com>
|
||||
* Copyright (C) 2012,2013 Lu Wang <coolwanglu@gmail.com>
|
||||
*/
|
||||
|
||||
|
||||
@ -21,7 +21,7 @@
|
||||
namespace pdf2htmlEX {
|
||||
|
||||
// Based on BackgroundRenderer from poppler
|
||||
class CairoBackgroundRenderer : public CairoOutputDev
|
||||
class CairoBackgroundRenderer : public BackgroundRenderer, CairoOutputDev
|
||||
{
|
||||
public:
|
||||
CairoBackgroundRenderer(HTMLRenderer * html_renderer, const Param & param)
|
||||
@ -33,14 +33,15 @@ public:
|
||||
|
||||
virtual ~CairoBackgroundRenderer() { }
|
||||
|
||||
virtual void init(PDFDoc * doc);
|
||||
virtual void render_page(PDFDoc * doc, int pageno);
|
||||
virtual void embed_image(int pageno);
|
||||
|
||||
virtual void drawChar(GfxState *state, double x, double y,
|
||||
double dx, double dy,
|
||||
double originX, double originY,
|
||||
CharCode code, int nBytes, Unicode *u, int uLen);
|
||||
|
||||
void render_page(PDFDoc * doc, int pageno);
|
||||
void embed_image(int pageno);
|
||||
|
||||
protected:
|
||||
HTMLRenderer * html_renderer;
|
||||
const Param & param;
|
||||
|
@ -1,11 +1,12 @@
|
||||
/*
|
||||
* SplashBackgroundRenderer.cc
|
||||
*
|
||||
* Copyright (C) 2012 Lu Wang <coolwanglu@gmail.com>
|
||||
* Copyright (C) 2012,2013 Lu Wang <coolwanglu@gmail.com>
|
||||
*/
|
||||
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include <PDFDoc.h>
|
||||
#include <goo/PNGWriter.h>
|
||||
@ -19,6 +20,7 @@ namespace pdf2htmlEX {
|
||||
using std::string;
|
||||
using std::ifstream;
|
||||
using std::vector;
|
||||
using std::unique_ptr;
|
||||
|
||||
const SplashColor SplashBackgroundRenderer::white = {255,255,255};
|
||||
|
||||
@ -63,6 +65,11 @@ void SplashBackgroundRenderer::drawChar(GfxState *state, double x, double y,
|
||||
}
|
||||
}
|
||||
|
||||
void SplashBackgroundRenderer::init(PDFDoc * doc)
|
||||
{
|
||||
startDoc(doc);
|
||||
}
|
||||
|
||||
static GBool annot_cb(Annot *, void *) {
|
||||
return false;
|
||||
};
|
||||
@ -134,7 +141,8 @@ void SplashBackgroundRenderer::dump_image(const char * filename, int x1, int y1,
|
||||
if(!f)
|
||||
throw string("Cannot open file for background image " ) + filename;
|
||||
|
||||
ImgWriter * writer = new PNGWriter();
|
||||
// use unique_ptr to auto delete the object upon exception
|
||||
auto writer = unique_ptr<ImgWriter>(new PNGWriter);
|
||||
if(!writer->init(f, width, height, param.h_dpi, param.v_dpi))
|
||||
throw "Cannot initialize PNGWriter";
|
||||
|
||||
@ -157,7 +165,6 @@ void SplashBackgroundRenderer::dump_image(const char * filename, int x1, int y1,
|
||||
throw "Cannot write background image";
|
||||
}
|
||||
|
||||
delete writer;
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
|
@ -2,8 +2,7 @@
|
||||
* Splash Background renderer
|
||||
* Render all those things not supported as Image, with Splash
|
||||
*
|
||||
* by WangLu
|
||||
* 2012.08.06
|
||||
* Copyright (C) 2012,2013 Lu Wang <coolwanglu@gmail.com>
|
||||
*/
|
||||
|
||||
|
||||
@ -23,7 +22,7 @@
|
||||
namespace pdf2htmlEX {
|
||||
|
||||
// Based on BackgroundRenderer from poppler
|
||||
class SplashBackgroundRenderer : public SplashOutputDev
|
||||
class SplashBackgroundRenderer : public BackgroundRenderer, SplashOutputDev
|
||||
{
|
||||
public:
|
||||
static const SplashColor white;
|
||||
@ -36,6 +35,10 @@ public:
|
||||
|
||||
virtual ~SplashBackgroundRenderer() { }
|
||||
|
||||
virtual void init(PDFDoc * doc);
|
||||
virtual void render_page(PDFDoc * doc, int pageno);
|
||||
virtual void embed_image(int pageno);
|
||||
|
||||
#if POPPLER_OLDER_THAN_0_23_0
|
||||
virtual void startPage(int pageNum, GfxState *state);
|
||||
#else
|
||||
@ -57,11 +60,8 @@ public:
|
||||
SplashOutputDev::fill(state);
|
||||
}
|
||||
|
||||
void render_page(PDFDoc * doc, int pageno);
|
||||
void embed_image(int pageno);
|
||||
void dump_image(const char * filename, int x1, int y1, int x2, int y2);
|
||||
|
||||
protected:
|
||||
void dump_image(const char * filename, int x1, int y1, int x2, int y2);
|
||||
HTMLRenderer * html_renderer;
|
||||
const Param & param;
|
||||
};
|
||||
|
@ -30,7 +30,7 @@
|
||||
#include "StateManager.h"
|
||||
#include "HTMLTextPage.h"
|
||||
|
||||
#include "BackgroundRenderer/BackgroundRenderer_forward.h"
|
||||
#include "BackgroundRenderer/BackgroundRenderer.h"
|
||||
|
||||
#include "util/const.h"
|
||||
#include "util/misc.h"
|
||||
@ -321,8 +321,10 @@ protected:
|
||||
StringFormatter str_fmt;
|
||||
|
||||
// render background image
|
||||
friend class SplashBackgroundRenderer;
|
||||
friend class CairoBackgroundRenderer;
|
||||
friend class SplashBackgroundRenderer; // ugly!
|
||||
#if ENABLE_SVG
|
||||
friend class CairoBackgroundRenderer; // ugly!
|
||||
#endif
|
||||
BackgroundRenderer * bg_renderer;
|
||||
|
||||
|
||||
|
@ -100,8 +100,10 @@ void HTMLRenderer::process(PDFDoc *doc)
|
||||
bg_renderer = nullptr;
|
||||
if(param.process_nontext)
|
||||
{
|
||||
bg_renderer = new BackgroundRenderer(this, param);
|
||||
bg_renderer->startDoc(doc);
|
||||
bg_renderer = BackgroundRenderer::getBackgroundRenderer(param.bg_format, this, param);
|
||||
if(!bg_renderer)
|
||||
throw "Cannot initialize background renderer, unsupported format";
|
||||
bg_renderer->init(doc);
|
||||
}
|
||||
|
||||
int page_count = (param.last_page - param.first_page + 1);
|
||||
|
@ -57,6 +57,9 @@ struct Param
|
||||
int space_as_offset;
|
||||
int tounicode;
|
||||
int optimize_text;
|
||||
|
||||
// background image
|
||||
std::string bg_format;
|
||||
|
||||
// encryption
|
||||
std::string owner_password, user_password;
|
||||
@ -67,9 +70,7 @@ struct Param
|
||||
std::string data_dir;
|
||||
int css_draw;
|
||||
int debug;
|
||||
int wa_unicode;
|
||||
|
||||
// non-optional
|
||||
std::string input_filename, output_filename;
|
||||
|
||||
// not a paramater
|
||||
|
@ -36,18 +36,6 @@ using namespace pdf2htmlEX;
|
||||
Param param;
|
||||
ArgParser argparser;
|
||||
|
||||
void deprecated_single_html(const char * dummy = nullptr)
|
||||
{
|
||||
cerr << "--single_html is deprecated. Use `--embed CFIJO` instead." << endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
void removed_remove_unsed_glyph(const char * dummy = nullptr)
|
||||
{
|
||||
cerr << "--remove-unsed-glyph is removed. Use a PDF optimization tool instead." << endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
void show_usage_and_exit(const char * dummy = nullptr)
|
||||
{
|
||||
cerr << "Usage: pdf2htmlEX [options] <input.pdf> [<output.html>]" << endl;
|
||||
@ -141,6 +129,9 @@ void parse_options (int argc, char **argv)
|
||||
.add("space-as-offset", ¶m.space_as_offset, 0, "treat space characters as offsets")
|
||||
.add("tounicode", ¶m.tounicode, 0, "how to handle ToUnicode CMaps (0=auto, 1=force, -1=ignore)")
|
||||
.add("optimize-text", ¶m.optimize_text, 0, "try to reduce the number of HTML elements used for text")
|
||||
|
||||
// background image
|
||||
.add("bg-format", ¶m.bg_format, "png", "specify background image format")
|
||||
|
||||
// encryption
|
||||
.add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", true)
|
||||
@ -158,10 +149,6 @@ void parse_options (int argc, char **argv)
|
||||
.add("version,v", "print copyright and version info", &show_version_and_exit)
|
||||
.add("help,h", "print usage information", &show_usage_and_exit)
|
||||
|
||||
// deprecated
|
||||
.add("single-html", "", &deprecated_single_html)
|
||||
.add("remove-unused-glyph", "", &removed_remove_unsed_glyph)
|
||||
|
||||
.add("", ¶m.input_filename, "", "")
|
||||
.add("", ¶m.output_filename, "", "")
|
||||
;
|
||||
@ -192,14 +179,103 @@ void parse_options (int argc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
void check_param()
|
||||
{
|
||||
parse_options(argc, argv);
|
||||
if (param.input_filename == "")
|
||||
{
|
||||
show_usage_and_exit();
|
||||
}
|
||||
|
||||
if(param.output_filename.empty())
|
||||
{
|
||||
const string s = get_filename(param.input_filename);
|
||||
if(get_suffix(param.input_filename) == ".pdf")
|
||||
{
|
||||
param.output_filename = s.substr(0, s.size() - 4) + ".html";
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
param.output_filename = s + ".html";
|
||||
}
|
||||
}
|
||||
|
||||
if(param.page_filename.empty())
|
||||
{
|
||||
const string s = get_filename(param.input_filename);
|
||||
if(get_suffix(param.input_filename) == ".pdf")
|
||||
{
|
||||
param.page_filename = s.substr(0, s.size() - 4) + "%d.page";
|
||||
}
|
||||
else
|
||||
{
|
||||
param.page_filename = s + "%d.page";
|
||||
}
|
||||
sanitize_filename(param.page_filename);
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
// Need to make sure we have a page number placeholder in the filename
|
||||
if(!sanitize_filename(param.page_filename))
|
||||
{
|
||||
// Inject the placeholder just before the file extension
|
||||
const string suffix = get_suffix(param.page_filename);
|
||||
param.page_filename = param.page_filename.substr(0, param.page_filename.size() - suffix.size()) + "%d" + suffix;
|
||||
sanitize_filename(param.page_filename);
|
||||
}
|
||||
}
|
||||
if(param.css_filename.empty())
|
||||
{
|
||||
const string s = get_filename(param.input_filename);
|
||||
|
||||
if(get_suffix(param.input_filename) == ".pdf")
|
||||
{
|
||||
param.css_filename = s.substr(0, s.size() - 4) + ".css";
|
||||
}
|
||||
else
|
||||
{
|
||||
if(!param.split_pages)
|
||||
param.css_filename = s + ".css";
|
||||
}
|
||||
}
|
||||
if(param.outline_filename.empty())
|
||||
{
|
||||
const string s = get_filename(param.input_filename);
|
||||
|
||||
if(get_suffix(param.input_filename) == ".pdf")
|
||||
{
|
||||
param.outline_filename = s.substr(0, s.size() - 4) + ".outline";
|
||||
}
|
||||
else
|
||||
{
|
||||
if(!param.split_pages)
|
||||
param.outline_filename = s + ".outline";
|
||||
}
|
||||
}
|
||||
if(param.bg_format == "svg")
|
||||
{
|
||||
#if not ENABLE_SVG
|
||||
cerr << "SVG support is not built" << endl;
|
||||
exit(EXIT_FAILURE);
|
||||
#endif
|
||||
}
|
||||
else if (param.bg_format == "png")
|
||||
{
|
||||
// pass
|
||||
}
|
||||
else
|
||||
{
|
||||
cerr << "Unknown format for background: " << param.bg_format << endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
parse_options(argc, argv);
|
||||
check_param();
|
||||
|
||||
//prepare the directories
|
||||
{
|
||||
char buf[] = "/tmp/pdf2htmlEX-XXXXXX";
|
||||
@ -243,89 +319,20 @@ int main(int argc, char **argv)
|
||||
delete ownerPW;
|
||||
}
|
||||
|
||||
if (!doc->isOk()) {
|
||||
if (!doc->isOk())
|
||||
throw "Cannot read the file";
|
||||
}
|
||||
|
||||
// check for copy permission
|
||||
if (!doc->okToCopy()) {
|
||||
if (param.no_drm == 0) {
|
||||
if (!doc->okToCopy())
|
||||
{
|
||||
if (param.no_drm == 0)
|
||||
throw "Copying of text from this document is not allowed.";
|
||||
}
|
||||
cerr << "Document has copy-protection bit set." << endl;
|
||||
}
|
||||
|
||||
param.first_page = min<int>(max<int>(param.first_page, 1), doc->getNumPages());
|
||||
param.last_page = min<int>(max<int>(param.last_page, param.first_page), doc->getNumPages());
|
||||
|
||||
if(param.output_filename.empty())
|
||||
{
|
||||
const string s = get_filename(param.input_filename);
|
||||
if(get_suffix(param.input_filename) == ".pdf")
|
||||
{
|
||||
param.output_filename = s.substr(0, s.size() - 4) + ".html";
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
param.output_filename = s + ".html";
|
||||
}
|
||||
}
|
||||
|
||||
if(param.page_filename.empty())
|
||||
{
|
||||
const string s = get_filename(param.input_filename);
|
||||
if(get_suffix(param.input_filename) == ".pdf")
|
||||
{
|
||||
param.page_filename = s.substr(0, s.size() - 4) + "%d.page";
|
||||
}
|
||||
else
|
||||
{
|
||||
param.page_filename = s + "%d.page";
|
||||
}
|
||||
sanitize_filename(param.page_filename);
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
// Need to make sure we have a page number placeholder in the filename
|
||||
if(!sanitize_filename(param.page_filename))
|
||||
{
|
||||
// Inject the placeholder just before the file extension
|
||||
const string suffix = get_suffix(param.page_filename);
|
||||
param.page_filename = param.page_filename.substr(0, param.page_filename.size() - suffix.size()) + "%d" + suffix;
|
||||
sanitize_filename(param.page_filename);
|
||||
}
|
||||
}
|
||||
if(param.css_filename.empty())
|
||||
{
|
||||
const string s = get_filename(param.input_filename);
|
||||
|
||||
if(get_suffix(param.input_filename) == ".pdf")
|
||||
{
|
||||
param.css_filename = s.substr(0, s.size() - 4) + ".css";
|
||||
}
|
||||
else
|
||||
{
|
||||
if(!param.split_pages)
|
||||
param.css_filename = s + ".css";
|
||||
}
|
||||
}
|
||||
if(param.outline_filename.empty())
|
||||
{
|
||||
const string s = get_filename(param.input_filename);
|
||||
|
||||
if(get_suffix(param.input_filename) == ".pdf")
|
||||
{
|
||||
param.outline_filename = s.substr(0, s.size() - 4) + ".outline";
|
||||
}
|
||||
else
|
||||
{
|
||||
if(!param.split_pages)
|
||||
param.outline_filename = s + ".outline";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
unique_ptr<HTMLRenderer>(new HTMLRenderer(param))->process(doc);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user