pdf2htmlEX/pdf2htmlEX/src/pdf2htmlEX.cc

475 lines
16 KiB
C++
Raw Normal View History

2013-04-30 11:51:11 +00:00
// pdf2htmlEX.cc
2012-08-04 18:03:53 +00:00
//
2015-05-03 03:37:46 +00:00
// Copyright (C) 2012-2015 Lu Wang <coolwanglu@gmail.com>
2012-08-28 10:27:45 +00:00
2012-08-04 18:03:53 +00:00
#include <cstdio>
#include <cstdlib>
#include <cstddef>
#include <cstring>
#include <ctime>
#include <string>
#include <limits>
2012-08-12 10:53:22 +00:00
#include <iostream>
2013-04-06 09:01:05 +00:00
#include <memory>
2013-10-17 03:52:55 +00:00
#include <errno.h>
2013-04-06 09:01:05 +00:00
2012-09-10 05:03:25 +00:00
#include <getopt.h>
2012-08-04 18:03:53 +00:00
2013-01-23 12:29:59 +00:00
#include <poppler-config.h>
2012-08-12 10:53:22 +00:00
#include <goo/GooString.h>
2012-08-13 14:20:38 +00:00
#include <Object.h>
#include <PDFDoc.h>
#include <PDFDocFactory.h>
#include <GlobalParams.h>
2012-08-04 18:03:53 +00:00
2012-09-10 05:08:47 +00:00
#include "pdf2htmlEX-config.h"
2013-09-18 12:24:48 +00:00
2019-11-22 17:43:56 +00:00
#include "util/SignalHandler.h"
2013-09-18 12:24:48 +00:00
#if ENABLE_SVG
#include <cairo.h>
#endif
2013-04-06 08:45:01 +00:00
#include "ArgParser.h"
#include "Param.h"
2012-11-29 09:28:05 +00:00
#include "HTMLRenderer/HTMLRenderer.h"
2013-04-06 08:45:01 +00:00
2012-11-29 10:16:05 +00:00
#include "util/path.h"
2013-01-23 12:29:59 +00:00
#include "util/ffw.h"
2012-08-04 18:03:53 +00:00
2014-01-15 13:29:46 +00:00
#ifdef __MINGW32__
#include "util/mingw.h"
#endif
2012-08-04 18:03:53 +00:00
using namespace std;
using namespace pdf2htmlEX;
2012-08-04 18:03:53 +00:00
Param param;
2012-09-10 09:01:15 +00:00
ArgParser argparser;
2012-08-04 18:03:53 +00:00
2013-01-23 15:02:11 +00:00
void show_usage_and_exit(const char * dummy = nullptr)
2013-01-23 12:29:59 +00:00
{
2013-01-28 22:16:38 +00:00
cerr << "Usage: pdf2htmlEX [options] <input.pdf> [<output.html>]" << endl;
2012-09-10 09:01:15 +00:00
argparser.show_usage(cerr);
exit(EXIT_FAILURE);
2012-08-04 18:03:53 +00:00
}
2013-01-23 12:29:59 +00:00
void show_version_and_exit(const char * dummy = nullptr)
{
2019-11-22 17:43:56 +00:00
const FFWVersionInfo* ffwVersionInfo = ffw_get_version_info();
2013-04-30 11:51:11 +00:00
cerr << "pdf2htmlEX version " << PDF2HTMLEX_VERSION << endl;
2015-05-03 03:37:46 +00:00
cerr << "Copyright 2012-2015 Lu Wang <coolwanglu@gmail.com> and other contributors" << endl;
2013-09-18 12:30:52 +00:00
cerr << "Libraries: " << endl;
cerr << " poppler " << POPPLER_VERSION << endl;
2019-11-22 17:43:56 +00:00
cerr << " libfontforge (date) " << ffwVersionInfo->versionDate << endl;
2013-09-18 12:24:48 +00:00
#if ENABLE_SVG
2013-09-18 12:30:52 +00:00
cerr << " cairo " << cairo_version_string() << endl;
2013-09-18 12:24:48 +00:00
#endif
cerr << "Default data-dir: " << param.data_dir << endl;
2020-05-31 18:34:48 +00:00
cerr << "Poppler data-dir: " << param.poppler_data_dir << endl;
2013-09-18 12:24:48 +00:00
cerr << "Supported image format:";
#ifdef ENABLE_LIBPNG
cerr << " png";
#endif
#ifdef ENABLE_LIBJPEG
cerr << " jpg";
#endif
#if ENABLE_SVG
cerr << " svg";
#endif
cerr << endl;
2013-09-18 12:24:48 +00:00
cerr << endl;
2013-01-25 13:13:27 +00:00
exit(EXIT_SUCCESS);
2013-01-23 12:29:59 +00:00
}
void embed_parser (const char * str)
{
while(true)
{
switch(*str)
{
case '\0': return; break;
case 'c': param.embed_css = 0; break;
case 'C': param.embed_css = 1; break;
case 'f': param.embed_font = 0; break;
case 'F': param.embed_font = 1; break;
case 'i': param.embed_image = 0; break;
case 'I': param.embed_image = 1; break;
case 'j': param.embed_javascript = 0; break;
case 'J': param.embed_javascript = 1; break;
case 'o': param.embed_outline = 0; break;
case 'O': param.embed_outline = 1; break;
default:
cerr << "Unknown character `" << (*str) << "` for --embed" << endl;
break;
}
++ str;
}
}
void prepare_directories()
{
std::string tmp_dir = param.tmp_dir + "/pdf2htmlEX-XXXXXX";
2014-01-13 12:37:00 +00:00
errno = 0;
New master (#2) * Show header in font map files * fix a usage of unique_ptr with array * Added '--quiet' argument to hide progress messages (resolves #503) * Revert cout messages to cerr (see #622) * bump version * fix build; fix some coverity warnings * Many bug fixes and improvements, including: - Incorporated latest Cairo files from cairo-0.15.2 - Moved build to out-of-source - Added clean script - Rewritten correct_text_visibility option to improve accuracy - Transparent characters drawn on background layer - Improved bad unicode detection * Many bug fixes and improvements, including: - Incorporated latest Cairo files from cairo-0.15.2 - Moved build to out-of-source - Added clean script - Rewritten correct_text_visibility option to improve accuracy - Transparent characters drawn on background layer - Improved bad unicode detection * Rationlise DPI to single number. Implement actual_dpi - clamp maximum background image size in cases of huge PDF pages * DPI fixes - increase DPI when partially covered text to covered-text-dpi Add font-style italic for oblique fonts Reduce char bbox for occlusion tests * Don't shrink bbox - not required if zoom=25 used * Ignore occlusion from stroke/fill with opacity < 0.5 Better compute char bbox for occlusion Use 10% inset for char bbox for occlusion Back out adding font-weight: bold to potentially bold fonts Fix bug to ensure CID ascent/descent matches subfont values * Removed zero char logging * Remove forced italic - missing italic is due to fontforge bug which needs fixing * Typos fixed, readme updated * Typos * Increase maximum background image width Fix private use range to avoid stupid mobile safari switching to emoji font * included -pthread switch to link included 3rdparty poppler files. * Updated files from poppler 0.59.0 and adjusted includes. * Support updated "Object" class from poppler 0.59.0
2018-01-10 19:31:38 +00:00
unique_ptr<char[]> pBuf(new char[tmp_dir.size() + 1]);
strcpy(pBuf.get(), tmp_dir.c_str());
auto p = mkdtemp(pBuf.get());
if(p == nullptr)
{
const char * errmsg = strerror(errno);
if(!errmsg)
{
errmsg = "unknown error";
}
cerr << "Cannot create temp directory: " << errmsg << endl;
exit(EXIT_FAILURE);
}
param.tmp_dir = pBuf.get();
}
2012-09-10 09:01:15 +00:00
void parse_options (int argc, char **argv)
2012-08-04 18:03:53 +00:00
{
2012-09-10 09:01:15 +00:00
argparser
// pages
2013-01-28 22:45:12 +00:00
.add("first-page,f", &param.first_page, 1, "first page to convert")
.add("last-page,l", &param.last_page, numeric_limits<int>::max(), "last page to convert")
// dimensions
.add("zoom", &param.zoom, 0, "zoom ratio", true)
.add("fit-width", &param.fit_width, 0, "fit width to <fp> pixels", true)
.add("fit-height", &param.fit_height, 0, "fit height to <fp> pixels", true)
2013-03-07 01:37:27 +00:00
.add("use-cropbox", &param.use_cropbox, 1, "use CropBox instead of MediaBox")
New master (#2) * Show header in font map files * fix a usage of unique_ptr with array * Added '--quiet' argument to hide progress messages (resolves #503) * Revert cout messages to cerr (see #622) * bump version * fix build; fix some coverity warnings * Many bug fixes and improvements, including: - Incorporated latest Cairo files from cairo-0.15.2 - Moved build to out-of-source - Added clean script - Rewritten correct_text_visibility option to improve accuracy - Transparent characters drawn on background layer - Improved bad unicode detection * Many bug fixes and improvements, including: - Incorporated latest Cairo files from cairo-0.15.2 - Moved build to out-of-source - Added clean script - Rewritten correct_text_visibility option to improve accuracy - Transparent characters drawn on background layer - Improved bad unicode detection * Rationlise DPI to single number. Implement actual_dpi - clamp maximum background image size in cases of huge PDF pages * DPI fixes - increase DPI when partially covered text to covered-text-dpi Add font-style italic for oblique fonts Reduce char bbox for occlusion tests * Don't shrink bbox - not required if zoom=25 used * Ignore occlusion from stroke/fill with opacity < 0.5 Better compute char bbox for occlusion Use 10% inset for char bbox for occlusion Back out adding font-weight: bold to potentially bold fonts Fix bug to ensure CID ascent/descent matches subfont values * Removed zero char logging * Remove forced italic - missing italic is due to fontforge bug which needs fixing * Typos fixed, readme updated * Typos * Increase maximum background image width Fix private use range to avoid stupid mobile safari switching to emoji font * included -pthread switch to link included 3rdparty poppler files. * Updated files from poppler 0.59.0 and adjusted includes. * Support updated "Object" class from poppler 0.59.0
2018-01-10 19:31:38 +00:00
.add("dpi", &param.desired_dpi, 144.0, "Resolution for graphics in DPI")
// output files
.add("embed", "specify which elements should be embedded into output", embed_parser, true)
.add("embed-css", &param.embed_css, 1, "embed CSS files into output")
.add("embed-font", &param.embed_font, 1, "embed font files into output")
.add("embed-image", &param.embed_image, 1, "embed image files into output")
.add("embed-javascript", &param.embed_javascript, 1, "embed JavaScript files into output")
.add("embed-outline", &param.embed_outline, 1, "embed outlines into output")
2013-01-28 22:45:12 +00:00
.add("split-pages", &param.split_pages, 0, "split pages into separate files")
.add("dest-dir", &param.dest_dir, ".", "specify destination directory")
.add("css-filename", &param.css_filename, "", "filename of the generated css file")
2014-07-13 23:59:30 +00:00
.add("page-filename", &param.page_filename, "", "filename template for split pages ")
.add("outline-filename", &param.outline_filename, "", "filename of the generated outline file")
2013-01-30 18:18:18 +00:00
.add("process-nontext", &param.process_nontext, 1, "render graphics in addition to text")
.add("process-outline", &param.process_outline, 1, "show outline in HTML")
2014-06-07 04:43:53 +00:00
.add("process-annotation", &param.process_annotation, 0, "show annotation in HTML")
2014-11-14 19:28:17 +00:00
.add("process-form", &param.process_form, 0, "include text fields and radio buttons")
2013-04-30 11:07:55 +00:00
.add("printing", &param.printing, 1, "enable printing support")
2013-03-08 17:45:13 +00:00
.add("fallback", &param.fallback, 0, "output in fallback mode")
New master (#2) * Show header in font map files * fix a usage of unique_ptr with array * Added '--quiet' argument to hide progress messages (resolves #503) * Revert cout messages to cerr (see #622) * bump version * fix build; fix some coverity warnings * Many bug fixes and improvements, including: - Incorporated latest Cairo files from cairo-0.15.2 - Moved build to out-of-source - Added clean script - Rewritten correct_text_visibility option to improve accuracy - Transparent characters drawn on background layer - Improved bad unicode detection * Many bug fixes and improvements, including: - Incorporated latest Cairo files from cairo-0.15.2 - Moved build to out-of-source - Added clean script - Rewritten correct_text_visibility option to improve accuracy - Transparent characters drawn on background layer - Improved bad unicode detection * Rationlise DPI to single number. Implement actual_dpi - clamp maximum background image size in cases of huge PDF pages * DPI fixes - increase DPI when partially covered text to covered-text-dpi Add font-style italic for oblique fonts Reduce char bbox for occlusion tests * Don't shrink bbox - not required if zoom=25 used * Ignore occlusion from stroke/fill with opacity < 0.5 Better compute char bbox for occlusion Use 10% inset for char bbox for occlusion Back out adding font-weight: bold to potentially bold fonts Fix bug to ensure CID ascent/descent matches subfont values * Removed zero char logging * Remove forced italic - missing italic is due to fontforge bug which needs fixing * Typos fixed, readme updated * Typos * Increase maximum background image width Fix private use range to avoid stupid mobile safari switching to emoji font * included -pthread switch to link included 3rdparty poppler files. * Updated files from poppler 0.59.0 and adjusted includes. * Support updated "Object" class from poppler 0.59.0
2018-01-10 19:31:38 +00:00
.add("tmp-file-size-limit", &param.tmp_file_size_limit, -1, "Maximum size (in KB) used by temporary files, -1 for no limit")
2013-01-29 10:38:39 +00:00
// fonts
2013-04-30 07:58:26 +00:00
.add("embed-external-font", &param.embed_external_font, 1, "embed local match for external fonts")
.add("font-format", &param.font_format, "woff", "suffix for embedded font files (ttf,otf,woff,svg)")
2013-01-28 22:45:12 +00:00
.add("decompose-ligature", &param.decompose_ligature, 0, "decompose ligatures, such as \uFB01 -> fi")
.add("turn-off-ligatures", &param.turn_off_ligatures, 0, "explicitly tell browsers not to use ligatures")
.add("auto-hint", &param.auto_hint, 0, "use fontforge autohint on fonts without hints")
.add("external-hint-tool", &param.external_hint_tool, "", "external tool for hinting fonts (overrides --auto-hint)")
.add("stretch-narrow-glyph", &param.stretch_narrow_glyph, 0, "stretch narrow glyphs instead of padding them")
.add("squeeze-wide-glyph", &param.squeeze_wide_glyph, 1, "shrink wide glyphs instead of truncating them")
2013-07-02 00:04:20 +00:00
.add("override-fstype", &param.override_fstype, 0, "clear the fstype bits in TTF/OTF fonts")
2013-09-21 05:56:57 +00:00
.add("process-type3", &param.process_type3, 0, "convert Type 3 fonts for web (experimental)")
// text
2013-01-28 22:45:12 +00:00
.add("heps", &param.h_eps, 1.0, "horizontal threshold for merging text, in pixels")
.add("veps", &param.v_eps, 1.0, "vertical threshold for merging text, in pixels")
.add("space-threshold", &param.space_threshold, (1.0/8), "word break threshold (threshold * em)")
.add("font-size-multiplier", &param.font_size_multiplier, 4.0, "a value greater than 1 increases the rendering accuracy")
2013-04-04 14:10:25 +00:00
.add("space-as-offset", &param.space_as_offset, 0, "treat space characters as offsets")
.add("tounicode", &param.tounicode, 0, "how to handle ToUnicode CMaps (0=auto, 1=force, -1=ignore)")
2013-05-06 03:08:29 +00:00
.add("optimize-text", &param.optimize_text, 0, "try to reduce the number of HTML elements used for text")
New master (#2) * Show header in font map files * fix a usage of unique_ptr with array * Added '--quiet' argument to hide progress messages (resolves #503) * Revert cout messages to cerr (see #622) * bump version * fix build; fix some coverity warnings * Many bug fixes and improvements, including: - Incorporated latest Cairo files from cairo-0.15.2 - Moved build to out-of-source - Added clean script - Rewritten correct_text_visibility option to improve accuracy - Transparent characters drawn on background layer - Improved bad unicode detection * Many bug fixes and improvements, including: - Incorporated latest Cairo files from cairo-0.15.2 - Moved build to out-of-source - Added clean script - Rewritten correct_text_visibility option to improve accuracy - Transparent characters drawn on background layer - Improved bad unicode detection * Rationlise DPI to single number. Implement actual_dpi - clamp maximum background image size in cases of huge PDF pages * DPI fixes - increase DPI when partially covered text to covered-text-dpi Add font-style italic for oblique fonts Reduce char bbox for occlusion tests * Don't shrink bbox - not required if zoom=25 used * Ignore occlusion from stroke/fill with opacity < 0.5 Better compute char bbox for occlusion Use 10% inset for char bbox for occlusion Back out adding font-weight: bold to potentially bold fonts Fix bug to ensure CID ascent/descent matches subfont values * Removed zero char logging * Remove forced italic - missing italic is due to fontforge bug which needs fixing * Typos fixed, readme updated * Typos * Increase maximum background image width Fix private use range to avoid stupid mobile safari switching to emoji font * included -pthread switch to link included 3rdparty poppler files. * Updated files from poppler 0.59.0 and adjusted includes. * Support updated "Object" class from poppler 0.59.0
2018-01-10 19:31:38 +00:00
.add("correct-text-visibility", &param.correct_text_visibility, 1, "0: Don't do text visibility checks. 1: Fully occluded text handled. 2: Partially occluded text handled")
.add("covered-text-dpi", &param.text_dpi, 300, "Rendering DPI to use if correct-text-visibility == 2 and there is partially covered text on the page")
2013-09-18 10:01:56 +00:00
// background image
.add("bg-format", &param.bg_format, "png", "specify background image format")
.add("svg-node-count-limit", &param.svg_node_count_limit, -1, "if node count in a svg background image exceeds this limit,"
New master (#2) * Show header in font map files * fix a usage of unique_ptr with array * Added '--quiet' argument to hide progress messages (resolves #503) * Revert cout messages to cerr (see #622) * bump version * fix build; fix some coverity warnings * Many bug fixes and improvements, including: - Incorporated latest Cairo files from cairo-0.15.2 - Moved build to out-of-source - Added clean script - Rewritten correct_text_visibility option to improve accuracy - Transparent characters drawn on background layer - Improved bad unicode detection * Many bug fixes and improvements, including: - Incorporated latest Cairo files from cairo-0.15.2 - Moved build to out-of-source - Added clean script - Rewritten correct_text_visibility option to improve accuracy - Transparent characters drawn on background layer - Improved bad unicode detection * Rationlise DPI to single number. Implement actual_dpi - clamp maximum background image size in cases of huge PDF pages * DPI fixes - increase DPI when partially covered text to covered-text-dpi Add font-style italic for oblique fonts Reduce char bbox for occlusion tests * Don't shrink bbox - not required if zoom=25 used * Ignore occlusion from stroke/fill with opacity < 0.5 Better compute char bbox for occlusion Use 10% inset for char bbox for occlusion Back out adding font-weight: bold to potentially bold fonts Fix bug to ensure CID ascent/descent matches subfont values * Removed zero char logging * Remove forced italic - missing italic is due to fontforge bug which needs fixing * Typos fixed, readme updated * Typos * Increase maximum background image width Fix private use range to avoid stupid mobile safari switching to emoji font * included -pthread switch to link included 3rdparty poppler files. * Updated files from poppler 0.59.0 and adjusted includes. * Support updated "Object" class from poppler 0.59.0
2018-01-10 19:31:38 +00:00
" fall back this page to bitmap background; negative value means no limit")
.add("svg-embed-bitmap", &param.svg_embed_bitmap, 1, "1: embed bitmaps in svg background; 0: dump bitmaps to external files if possible")
// encryption
.add("owner-password,o", &param.owner_password, "", "owner password (for encrypted files)", true)
.add("user-password,u", &param.user_password, "", "user password (for encrypted files)", true)
.add("no-drm", &param.no_drm, 0, "override document DRM settings")
// misc.
2013-01-28 22:45:12 +00:00
.add("clean-tmp", &param.clean_tmp, 1, "remove temporary files after conversion")
New master (#2) * Show header in font map files * fix a usage of unique_ptr with array * Added '--quiet' argument to hide progress messages (resolves #503) * Revert cout messages to cerr (see #622) * bump version * fix build; fix some coverity warnings * Many bug fixes and improvements, including: - Incorporated latest Cairo files from cairo-0.15.2 - Moved build to out-of-source - Added clean script - Rewritten correct_text_visibility option to improve accuracy - Transparent characters drawn on background layer - Improved bad unicode detection * Many bug fixes and improvements, including: - Incorporated latest Cairo files from cairo-0.15.2 - Moved build to out-of-source - Added clean script - Rewritten correct_text_visibility option to improve accuracy - Transparent characters drawn on background layer - Improved bad unicode detection * Rationlise DPI to single number. Implement actual_dpi - clamp maximum background image size in cases of huge PDF pages * DPI fixes - increase DPI when partially covered text to covered-text-dpi Add font-style italic for oblique fonts Reduce char bbox for occlusion tests * Don't shrink bbox - not required if zoom=25 used * Ignore occlusion from stroke/fill with opacity < 0.5 Better compute char bbox for occlusion Use 10% inset for char bbox for occlusion Back out adding font-weight: bold to potentially bold fonts Fix bug to ensure CID ascent/descent matches subfont values * Removed zero char logging * Remove forced italic - missing italic is due to fontforge bug which needs fixing * Typos fixed, readme updated * Typos * Increase maximum background image width Fix private use range to avoid stupid mobile safari switching to emoji font * included -pthread switch to link included 3rdparty poppler files. * Updated files from poppler 0.59.0 and adjusted includes. * Support updated "Object" class from poppler 0.59.0
2018-01-10 19:31:38 +00:00
.add("tmp-dir", &param.tmp_dir, param.tmp_dir, "specify the location of temporary directory")
.add("data-dir", &param.data_dir, param.data_dir, "specify data directory")
.add("poppler-data-dir", &param.poppler_data_dir, param.poppler_data_dir, "specify poppler data directory")
.add("debug", &param.debug, 0, "print debugging information")
New master (#2) * Show header in font map files * fix a usage of unique_ptr with array * Added '--quiet' argument to hide progress messages (resolves #503) * Revert cout messages to cerr (see #622) * bump version * fix build; fix some coverity warnings * Many bug fixes and improvements, including: - Incorporated latest Cairo files from cairo-0.15.2 - Moved build to out-of-source - Added clean script - Rewritten correct_text_visibility option to improve accuracy - Transparent characters drawn on background layer - Improved bad unicode detection * Many bug fixes and improvements, including: - Incorporated latest Cairo files from cairo-0.15.2 - Moved build to out-of-source - Added clean script - Rewritten correct_text_visibility option to improve accuracy - Transparent characters drawn on background layer - Improved bad unicode detection * Rationlise DPI to single number. Implement actual_dpi - clamp maximum background image size in cases of huge PDF pages * DPI fixes - increase DPI when partially covered text to covered-text-dpi Add font-style italic for oblique fonts Reduce char bbox for occlusion tests * Don't shrink bbox - not required if zoom=25 used * Ignore occlusion from stroke/fill with opacity < 0.5 Better compute char bbox for occlusion Use 10% inset for char bbox for occlusion Back out adding font-weight: bold to potentially bold fonts Fix bug to ensure CID ascent/descent matches subfont values * Removed zero char logging * Remove forced italic - missing italic is due to fontforge bug which needs fixing * Typos fixed, readme updated * Typos * Increase maximum background image width Fix private use range to avoid stupid mobile safari switching to emoji font * included -pthread switch to link included 3rdparty poppler files. * Updated files from poppler 0.59.0 and adjusted includes. * Support updated "Object" class from poppler 0.59.0
2018-01-10 19:31:38 +00:00
.add("proof", &param.proof, 0, "texts are drawn on both text layer and background for proof")
.add("quiet", &param.quiet, 0, "perform operations quietly")
// meta
.add("version,v", "print copyright and version info", &show_version_and_exit)
.add("help,h", "print usage information", &show_usage_and_exit)
2012-09-10 14:22:01 +00:00
.add("", &param.input_filename, "", "")
.add("", &param.output_filename, "", "")
2012-08-04 18:03:53 +00:00
;
2012-09-10 09:01:15 +00:00
try
{
argparser.parse(argc, argv);
2012-08-04 18:03:53 +00:00
}
2012-09-10 14:44:19 +00:00
catch(const char * s)
{
// if s == "", getopt_long would have printed the error message
if(s && s[0])
{
cerr << "Error when parsing the arguments:" << endl;
cerr << s << endl;
}
exit(EXIT_FAILURE);
}
2012-09-10 09:01:15 +00:00
catch(const std::string & s)
{
2012-09-10 14:44:19 +00:00
// if s == "", getopt_long would have printed the error message
if(s != "")
{
cerr << "Error when parsing the arguments:" << endl;
cerr << s << endl;
}
2012-09-10 09:01:15 +00:00
exit(EXIT_FAILURE);
2012-08-04 18:03:53 +00:00
}
}
2013-09-18 10:01:56 +00:00
void check_param()
2012-08-04 18:03:53 +00:00
{
2012-09-10 09:01:15 +00:00
if (param.input_filename == "")
2012-08-04 18:03:53 +00:00
{
2013-01-28 22:16:38 +00:00
show_usage_and_exit();
2012-08-04 18:03:53 +00:00
}
2013-09-18 10:01:56 +00:00
if(param.output_filename.empty())
{
const string s = get_filename(param.input_filename);
if(get_suffix(param.input_filename) == ".pdf")
{
param.output_filename = s.substr(0, s.size() - 4) + ".html";
}
else
{
param.output_filename = s + ".html";
}
}
if(param.page_filename.empty())
{
const string s = get_filename(param.input_filename);
if(get_suffix(param.input_filename) == ".pdf")
{
param.page_filename = s.substr(0, s.size() - 4) + "%d.page";
}
else
{
param.page_filename = s + "%d.page";
}
sanitize_filename(param.page_filename);
}
else
{
// Need to make sure we have a page number placeholder in the filename
if(!sanitize_filename(param.page_filename))
{
// Inject the placeholder just before the file extension
const string suffix = get_suffix(param.page_filename);
param.page_filename = param.page_filename.substr(0, param.page_filename.size() - suffix.size()) + "%d" + suffix;
sanitize_filename(param.page_filename);
}
}
if(param.css_filename.empty())
{
const string s = get_filename(param.input_filename);
if(get_suffix(param.input_filename) == ".pdf")
{
param.css_filename = s.substr(0, s.size() - 4) + ".css";
}
else
{
2015-05-01 05:45:30 +00:00
param.css_filename = s + ".css";
2013-09-18 10:01:56 +00:00
}
}
if(param.outline_filename.empty())
{
const string s = get_filename(param.input_filename);
if(get_suffix(param.input_filename) == ".pdf")
{
param.outline_filename = s.substr(0, s.size() - 4) + ".outline";
}
else
{
if(!param.split_pages)
param.outline_filename = s + ".outline";
}
}
2013-09-18 12:24:48 +00:00
if(false) { }
#ifdef ENABLE_LIBPNG
else if (param.bg_format == "png") { }
#endif
#ifdef ENABLE_LIBJPEG
else if (param.bg_format == "jpg") { }
#endif
2013-09-18 16:17:56 +00:00
#if ENABLE_SVG
2013-09-18 12:24:48 +00:00
else if(param.bg_format == "svg") { }
2013-09-18 10:01:56 +00:00
#endif
else
{
2013-09-18 12:24:48 +00:00
cerr << "Image format not supported: " << param.bg_format << endl;
2013-09-18 10:01:56 +00:00
exit(EXIT_FAILURE);
}
2013-09-18 21:56:57 +00:00
2013-09-21 05:56:57 +00:00
#if not ENABLE_SVG
if(param.process_type3)
{
cerr << "process-type3 is enabled, however SVG support is not built in this version of pdf2htmlEX." << endl;
exit(EXIT_FAILURE);
}
#endif
2013-10-03 07:19:41 +00:00
if((param.font_format == "ttf") && (param.external_hint_tool == ""))
{
cerr << "Warning: No hint tool is specified for truetype fonts, the result may be rendered poorly in some circumstances." << endl;
}
if (param.embed_image && (param.bg_format == "svg") && !param.svg_embed_bitmap)
{
cerr << "Warning: --svg-embed-bitmap is forced on because --embed-image is on, or the dumped bitmaps can't be loaded." << endl;
param.svg_embed_bitmap = 1;
}
2013-09-18 10:01:56 +00:00
}
int main(int argc, char **argv)
{
// We need to adjust these directories before parsing the options.
2014-01-15 13:29:46 +00:00
#if defined(__MINGW32__)
2014-01-14 14:39:14 +00:00
param.data_dir = get_exec_dir(argv[0]);
2014-01-14 01:08:23 +00:00
param.tmp_dir = get_tmp_dir();
2014-01-15 13:29:46 +00:00
#else
2014-07-13 23:59:30 +00:00
char const* tmp = getenv("TMPDIR");
#ifdef P_tmpdir
if (!tmp)
tmp = P_tmpdir;
#endif
#ifdef _PATH_TMP
if (!tmp)
tmp = _PATH_TMP;
#endif
if (!tmp)
tmp = "/tmp";
param.tmp_dir = string(tmp);
param.data_dir = PDF2HTMLEX_DATA_PATH;
2014-01-14 01:08:23 +00:00
#endif
2019-11-22 12:08:20 +00:00
if (getenv("APPDIR")) {
// we are running inside an AppImage so we need to adjust the data_dir
// however the user can supply some other absolute path later
//
param.data_dir = string(getenv("APPDIR")) + param.data_dir;
}
param.poppler_data_dir = param.data_dir + "/poppler";
2013-09-18 10:01:56 +00:00
parse_options(argc, argv);
check_param();
2012-08-14 10:12:58 +00:00
//prepare the directories
prepare_directories();
2012-09-09 18:22:49 +00:00
2014-01-11 08:25:09 +00:00
if(param.debug)
2012-09-09 17:23:28 +00:00
cerr << "temporary dir: " << (param.tmp_dir) << endl;
2012-08-14 10:12:58 +00:00
try
{
create_directories(param.dest_dir);
}
2012-09-09 17:23:28 +00:00
catch (const string & s)
2012-08-14 10:12:58 +00:00
{
2012-09-09 17:23:28 +00:00
cerr << s << endl;
2012-09-10 09:01:15 +00:00
exit(EXIT_FAILURE);
2012-08-14 10:12:58 +00:00
}
2019-11-22 17:43:56 +00:00
// setup the signal handler
setupSignalHandler(argc, (const char**)argv,
param.data_dir.c_str(),
param.poppler_data_dir.c_str(),
param.tmp_dir.c_str());
2012-09-10 09:01:15 +00:00
bool finished = false;
// read poppler config file
globalParams = std::make_unique<GlobalParams>(
!param.poppler_data_dir.empty() ? param.poppler_data_dir.c_str() : NULL
);
2012-09-10 09:01:15 +00:00
try
{
2023-12-18 10:39:47 +00:00
std::optional<GooString> ownerPW;
if (!param.owner_password.empty()) {
ownerPW = GooString(param.owner_password);
}
2012-08-04 18:03:53 +00:00
2023-12-18 10:39:47 +00:00
std::optional<GooString> userPW;
if (!param.user_password.empty()) {
userPW = GooString(param.user_password);
2012-09-10 09:01:15 +00:00
}
2012-08-04 18:03:53 +00:00
2023-12-18 10:39:47 +00:00
GooString fileName(param.input_filename);
// open PDF file
std::unique_ptr<PDFDoc> doc(PDFDocFactory().createPDFDoc(fileName, ownerPW, userPW));
if (!doc->isOk())
2012-09-10 09:01:15 +00:00
throw "Cannot read the file";
2012-08-04 18:03:53 +00:00
2012-09-10 09:01:15 +00:00
// check for copy permission
if (!doc->okToCopy())
2013-09-18 10:01:56 +00:00
{
if (param.no_drm == 0)
2013-01-25 00:56:49 +00:00
throw "Copying of text from this document is not allowed.";
cerr << "Document has copy-protection bit set." << endl;
2012-09-10 09:01:15 +00:00
}
2012-08-04 18:03:53 +00:00
param.first_page =
min<int>(max<int>(param.first_page, 1), doc->getNumPages());
param.last_page =
min<int>(max<int>(param.last_page, param.first_page),
doc->getNumPages());
2012-09-09 17:18:09 +00:00
2012-08-04 18:03:53 +00:00
2023-12-18 10:39:47 +00:00
unique_ptr<HTMLRenderer>(new HTMLRenderer(argv[0], param))->process(doc.get());
2012-08-04 18:03:53 +00:00
2012-09-10 09:01:15 +00:00
finished = true;
}
2012-09-10 14:44:19 +00:00
catch (const char * s)
{
cerr << "Error: " << s << endl;
}
2012-09-10 09:01:15 +00:00
catch (const string & s)
{
cerr << "Error: " << s << endl;
}
2012-08-04 18:03:53 +00:00
// clean up
globalParams.reset();
2012-08-04 18:03:53 +00:00
// check for memory leaks
// Poppler Object class (Object.h) no longer has memCheck
//Object::memCheck(stderr);
//gMemReport(stderr);
2012-08-04 18:03:53 +00:00
2012-09-10 09:01:15 +00:00
exit(finished ? (EXIT_SUCCESS) : (EXIT_FAILURE));
2012-09-09 19:30:54 +00:00
return 0;
2012-08-04 18:03:53 +00:00
}