mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-21 12:40:08 +00:00
Merge pull request #254 from marcsanfacon/master
MinGW Port & 2 new options
This commit is contained in:
commit
ae7c8e8d9c
1
AUTHORS
1
AUTHORS
@ -11,6 +11,7 @@ hasufell <julian.ospald@googlemail.com>
|
||||
Herbert Jones <herbert@mediafire.com>
|
||||
Hongliang Tian <tatetian@gmail.com>
|
||||
John Hewson <john@jahewson.com>
|
||||
Marc Sanfacon <marc.sanfacon@gmail.com>
|
||||
Michele Redolfi <michele@tecnicaict.com>
|
||||
Mick Giles <mick@mickgiles.com>
|
||||
Ryan Morlok <ryan.morlok@morlok.com>
|
||||
|
@ -75,7 +75,7 @@ else()
|
||||
set(FONTFORGE_INCLUDE_DIRS ${FF_INCLUDE_PATH}/fontforge)
|
||||
include_directories(${FONTFORGE_INCLUDE_DIRS})
|
||||
# MacOSX gettext is in /opt/local/include - strange
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||
find_path(GETTEXT_INCLUDE_PATH libintl.h HINTS "/usr/local/opt/gettext/include") # homebrew
|
||||
if(GETTEXT_INCLUDE_PATH)
|
||||
include_directories(${GETTEXT_INCLUDE_PATH})
|
||||
@ -97,7 +97,7 @@ else()
|
||||
macro(wl_find_library LIB_NAME RESULT)
|
||||
unset(${RESULT})
|
||||
unset(${RESULT} CACHE)
|
||||
foreach(FULL_LIB_NAME
|
||||
foreach(FULL_LIB_NAME
|
||||
${CMAKE_IMPORT_LIBRARY_PREFIX}${LIB_NAME}${CMAKE_IMPORT_LIBRARY_SUFFIX}
|
||||
${CMAKE_SHARED_LIBRARY_PREFIX}${LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}
|
||||
${CMAKE_STATIC_LIBRARY_PREFIX}${LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}
|
||||
@ -130,7 +130,6 @@ else()
|
||||
set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${PYTHON_LIBRARIES})
|
||||
endif()
|
||||
|
||||
|
||||
# debug build flags (overwrite default cmake debug flags)
|
||||
set(CMAKE_C_FLAGS_DEBUG "-ggdb -pg")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "-ggdb -pg")
|
||||
@ -159,7 +158,7 @@ endif()
|
||||
include(CheckCXXCompilerFlag)
|
||||
check_cxx_compiler_flag("${CMAKE_CXX_FLAGS}" CXX0X_SUPPORT)
|
||||
if(NOT CXX0X_SUPPORT)
|
||||
message(FATAL_ERROR "Error: you compiler does not support C++0x, please update it.")
|
||||
message(FATAL_ERROR "Error: your compiler does not support C++0x, please update it.")
|
||||
endif()
|
||||
|
||||
|
||||
@ -174,9 +173,9 @@ configure_file (${CMAKE_SOURCE_DIR}/share/pdf2htmlEX.js.in ${CMAKE_SOURCE_DIR}/s
|
||||
|
||||
set(PDF2HTMLEX_SRC ${PDF2HTMLEX_SRC}
|
||||
src/Param.h
|
||||
src/pdf2htmlEX.cc
|
||||
src/pdf2htmlEX.cc
|
||||
src/pdf2htmlEX-config.h
|
||||
src/HTMLRenderer/HTMLRenderer.h
|
||||
src/HTMLRenderer/HTMLRenderer.h
|
||||
src/HTMLRenderer/draw.cc
|
||||
src/HTMLRenderer/general.cc
|
||||
src/HTMLRenderer/image.cc
|
||||
@ -185,12 +184,12 @@ set(PDF2HTMLEX_SRC ${PDF2HTMLEX_SRC}
|
||||
src/HTMLRenderer/outline.cc
|
||||
src/HTMLRenderer/state.cc
|
||||
src/HTMLRenderer/text.cc
|
||||
src/BackgroundRenderer/BackgroundRenderer.h
|
||||
src/BackgroundRenderer/BackgroundRenderer.h
|
||||
src/BackgroundRenderer/BackgroundRenderer.cc
|
||||
src/BackgroundRenderer/SplashBackgroundRenderer.h
|
||||
src/BackgroundRenderer/SplashBackgroundRenderer.cc
|
||||
src/BackgroundRenderer/CairoBackgroundRenderer.h
|
||||
src/BackgroundRenderer/CairoBackgroundRenderer.cc
|
||||
src/BackgroundRenderer/SplashBackgroundRenderer.h
|
||||
src/BackgroundRenderer/SplashBackgroundRenderer.cc
|
||||
src/BackgroundRenderer/CairoBackgroundRenderer.h
|
||||
src/BackgroundRenderer/CairoBackgroundRenderer.cc
|
||||
src/util/const.h
|
||||
src/util/const.cc
|
||||
src/util/css_const.h
|
||||
@ -229,7 +228,7 @@ set(PDF2HTMLEX_SRC ${PDF2HTMLEX_SRC}
|
||||
add_executable(pdf2htmlEX ${PDF2HTMLEX_SRC})
|
||||
target_link_libraries(pdf2htmlEX ${PDF2HTMLEX_LIBS})
|
||||
|
||||
add_custom_target(pdf2htmlEX_resources ALL DEPENDS
|
||||
add_custom_target(pdf2htmlEX_resources ALL DEPENDS
|
||||
${CMAKE_SOURCE_DIR}/share/base.min.css
|
||||
${CMAKE_SOURCE_DIR}/share/fancy.min.css
|
||||
${CMAKE_SOURCE_DIR}/share/pdf2htmlEX.min.js
|
||||
@ -237,15 +236,15 @@ add_custom_target(pdf2htmlEX_resources ALL DEPENDS
|
||||
|
||||
add_custom_command(OUTPUT ${CMAKE_SOURCE_DIR}/share/pdf2htmlEX.min.js
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/share/build_js.sh
|
||||
DEPENDS
|
||||
DEPENDS
|
||||
${CMAKE_SOURCE_DIR}/share/pdf2htmlEX.js
|
||||
)
|
||||
|
||||
add_custom_command(OUTPUT
|
||||
add_custom_command(OUTPUT
|
||||
${CMAKE_SOURCE_DIR}/share/base.min.css
|
||||
${CMAKE_SOURCE_DIR}/share/fancy.min.css
|
||||
COMMAND ${CMAKE_SOURCE_DIR}/share/build_css.sh
|
||||
DEPENDS
|
||||
DEPENDS
|
||||
${CMAKE_SOURCE_DIR}/share/base.css
|
||||
${CMAKE_SOURCE_DIR}/share/fancy.css
|
||||
)
|
||||
@ -269,5 +268,5 @@ install (FILES ${PDF2HTMLEX_RESOURCE} DESTINATION share/pdf2htmlEX)
|
||||
install (FILES pdf2htmlEX.1 DESTINATION share/man/man1)
|
||||
|
||||
enable_testing()
|
||||
add_test(test_naming
|
||||
add_test(test_naming
|
||||
python ${CMAKE_SOURCE_DIR}/test/test_naming.py)
|
||||
|
@ -82,6 +82,13 @@ This switch is useful if you want pages to be loaded separately & dynamically --
|
||||
|
||||
Also see --page-filename.
|
||||
|
||||
.TP
|
||||
.B --tmp-file-size-limit <limit> (Default: -1)
|
||||
This limits the total size of the temporary files which will also limit the total size of the output file.
|
||||
This is an estimate and it will stop after a page, once the total temporary files size is greater than this number.
|
||||
|
||||
-1 means no limit and is the default.
|
||||
|
||||
.TP
|
||||
.B --dest-dir <dir> (Default: .)
|
||||
Specify destination folder.
|
||||
@ -262,6 +269,10 @@ If switched off, intermediate files won't be cleaned in the end.
|
||||
.B --data-dir <dir> (Default: @CMAKE_INSTALL_PREFIX@/share/pdf2htmlEX)
|
||||
Specify the folder holding the manifest and other files (see below for the manifest file)`
|
||||
|
||||
.TP
|
||||
.B --tmp-dir <dir> (Default: /tmp)
|
||||
Specify the temporary folder to use for temporary files
|
||||
|
||||
.TP
|
||||
.B --css-draw <0|1> (Default: 0)
|
||||
Experimental and unsupported CSS drawing
|
||||
|
@ -45,7 +45,7 @@ HTMLRenderer::HTMLRenderer(const Param & param)
|
||||
,param(param)
|
||||
,html_text_page(param, all_manager)
|
||||
,preprocessor(param)
|
||||
,tmp_files(param)
|
||||
,tmp_files(param)
|
||||
{
|
||||
if(!(param.debug))
|
||||
{
|
||||
@ -79,7 +79,7 @@ HTMLRenderer::HTMLRenderer(const Param & param)
|
||||
}
|
||||
|
||||
HTMLRenderer::~HTMLRenderer()
|
||||
{
|
||||
{
|
||||
ffw_finalize();
|
||||
delete [] cur_mapping;
|
||||
delete [] cur_mapping2;
|
||||
@ -96,7 +96,7 @@ void HTMLRenderer::process(PDFDoc *doc)
|
||||
|
||||
///////////////////
|
||||
// Process pages
|
||||
|
||||
|
||||
bg_renderer = nullptr;
|
||||
if(param.process_nontext)
|
||||
{
|
||||
@ -107,15 +107,20 @@ void HTMLRenderer::process(PDFDoc *doc)
|
||||
}
|
||||
|
||||
int page_count = (param.last_page - param.first_page + 1);
|
||||
for(int i = param.first_page; i <= param.last_page ; ++i)
|
||||
for(int i = param.first_page; i <= param.last_page ; ++i)
|
||||
{
|
||||
if (param.tmp_file_size_limit != -1 && tmp_files.get_total_size() > param.tmp_file_size_limit * 1024) {
|
||||
cerr << "Stop processing, reach max size\n";
|
||||
break;
|
||||
}
|
||||
|
||||
cerr << "Working: " << (i-param.first_page) << "/" << page_count << '\r' << flush;
|
||||
|
||||
if(param.split_pages)
|
||||
{
|
||||
string filled_template_filename = (char*)str_fmt(param.page_filename.c_str(), i);
|
||||
auto page_fn = str_fmt("%s/%s", param.dest_dir.c_str(), filled_template_filename.c_str());
|
||||
f_curpage = new ofstream((char*)page_fn, ofstream::binary);
|
||||
f_curpage = new ofstream((char*)page_fn, ofstream::binary);
|
||||
if(!(*f_curpage))
|
||||
throw string("Cannot open ") + (char*)page_fn + " for writing";
|
||||
set_stream_flags((*f_curpage));
|
||||
@ -128,9 +133,9 @@ void HTMLRenderer::process(PDFDoc *doc)
|
||||
bg_renderer->render_page(doc, i);
|
||||
}
|
||||
|
||||
doc->displayPage(this, i,
|
||||
doc->displayPage(this, i,
|
||||
text_zoom_factor() * DEFAULT_DPI, text_zoom_factor() * DEFAULT_DPI,
|
||||
0,
|
||||
0,
|
||||
(!(param.use_cropbox)),
|
||||
true, // crop
|
||||
false, // printing
|
||||
@ -149,7 +154,7 @@ void HTMLRenderer::process(PDFDoc *doc)
|
||||
////////////////////////
|
||||
// Process Outline
|
||||
if(param.process_outline)
|
||||
process_outline();
|
||||
process_outline();
|
||||
|
||||
post_process();
|
||||
|
||||
@ -170,7 +175,7 @@ void HTMLRenderer::setDefaultCTM(double *ctm)
|
||||
#if POPPLER_OLDER_THAN_0_23_0
|
||||
void HTMLRenderer::startPage(int pageNum, GfxState *state)
|
||||
#else
|
||||
void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
|
||||
void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
|
||||
#endif
|
||||
{
|
||||
this->pageNum = pageNum;
|
||||
@ -183,12 +188,12 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
|
||||
long long wid = all_manager.width.install(pageWidth);
|
||||
long long hid = all_manager.height.install(pageHeight);
|
||||
(*f_curpage)
|
||||
<< "<div id=\"" << CSS::PAGE_FRAME_CN << pageNum
|
||||
<< "<div id=\"" << CSS::PAGE_FRAME_CN << pageNum
|
||||
<< "\" class=\"" << CSS::PAGE_FRAME_CN
|
||||
<< " " << CSS::WIDTH_CN << wid
|
||||
<< " " << CSS::HEIGHT_CN << hid
|
||||
<< "\" data-page-no=\"" << pageNum << "\">"
|
||||
<< "<div class=\"" << CSS::PAGE_CONTENT_BOX_CN
|
||||
<< "<div class=\"" << CSS::PAGE_CONTENT_BOX_CN
|
||||
<< " " << CSS::PAGE_CONTENT_BOX_CN << pageNum
|
||||
<< " " << CSS::WIDTH_CN << wid
|
||||
<< " " << CSS::HEIGHT_CN << hid
|
||||
@ -201,11 +206,11 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
|
||||
if(param.split_pages)
|
||||
{
|
||||
f_pages.fs
|
||||
<< "<div id=\"" << CSS::PAGE_FRAME_CN << pageNum
|
||||
<< "<div id=\"" << CSS::PAGE_FRAME_CN << pageNum
|
||||
<< "\" class=\"" << CSS::PAGE_FRAME_CN
|
||||
<< " " << CSS::WIDTH_CN << wid
|
||||
<< " " << CSS::HEIGHT_CN << hid
|
||||
<< "\" data-page-no=\"" << pageNum
|
||||
<< "\" data-page-no=\"" << pageNum
|
||||
<< "\" data-page-url=\"";
|
||||
|
||||
writeAttribute(f_pages.fs, cur_page_filename);
|
||||
@ -236,7 +241,7 @@ void HTMLRenderer::endPage() {
|
||||
// TODO: create a function for this
|
||||
// BE CAREFUL WITH ESCAPES
|
||||
(*f_curpage) << "<div class=\"" << CSS::PAGE_DATA_CN << "\" data-data='{";
|
||||
|
||||
|
||||
//default CTM
|
||||
(*f_curpage) << "\"ctm\":[";
|
||||
for(int i = 0; i < 6; ++i)
|
||||
@ -247,7 +252,7 @@ void HTMLRenderer::endPage() {
|
||||
(*f_curpage) << "]";
|
||||
|
||||
(*f_curpage) << "}'></div>";
|
||||
|
||||
|
||||
// close page
|
||||
(*f_curpage) << "</div>" << endl;
|
||||
|
||||
@ -266,7 +271,7 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
|
||||
*/
|
||||
{
|
||||
vector<double> zoom_factors;
|
||||
|
||||
|
||||
if(is_positive(param.zoom))
|
||||
{
|
||||
zoom_factors.push_back(param.zoom);
|
||||
@ -283,8 +288,8 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
|
||||
}
|
||||
|
||||
double zoom = (zoom_factors.empty() ? 1.0 : (*min_element(zoom_factors.begin(), zoom_factors.end())));
|
||||
|
||||
text_scale_factor1 = max<double>(zoom, param.font_size_multiplier);
|
||||
|
||||
text_scale_factor1 = max<double>(zoom, param.font_size_multiplier);
|
||||
text_scale_factor2 = zoom / text_scale_factor1;
|
||||
}
|
||||
|
||||
@ -340,13 +345,13 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
|
||||
* we have to keep the html file for pages into a temporary place
|
||||
* because we'll have to embed css before it
|
||||
*
|
||||
* Otherwise just generate it
|
||||
* Otherwise just generate it
|
||||
*/
|
||||
auto fn = str_fmt("%s/__pages", param.tmp_dir.c_str());
|
||||
tmp_files.add((char*)fn);
|
||||
|
||||
f_pages.path = (char*)fn;
|
||||
f_pages.fs.open(f_pages.path, ofstream::binary);
|
||||
f_pages.fs.open(f_pages.path, ofstream::binary);
|
||||
if(!f_pages.fs)
|
||||
throw string("Cannot open ") + (char*)fn + " for writing";
|
||||
set_stream_flags(f_pages.fs);
|
||||
@ -371,7 +376,7 @@ void HTMLRenderer::post_process(void)
|
||||
{
|
||||
f_outline.fs.close();
|
||||
}
|
||||
f_pages.fs.close();
|
||||
f_pages.fs.close();
|
||||
f_css.fs.close();
|
||||
|
||||
// build the main HTML file
|
||||
@ -394,20 +399,6 @@ void HTMLRenderer::post_process(void)
|
||||
long line_no = 0;
|
||||
while(getline(manifest_fin, line))
|
||||
{
|
||||
++line_no;
|
||||
|
||||
if(line == "\"\"\"")
|
||||
{
|
||||
embed_string = !embed_string;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(embed_string)
|
||||
{
|
||||
output << line << endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
// trim space at both sides
|
||||
{
|
||||
static const char * whitespaces = " \t\n\v\f\r";
|
||||
@ -424,6 +415,20 @@ void HTMLRenderer::post_process(void)
|
||||
}
|
||||
}
|
||||
|
||||
++line_no;
|
||||
|
||||
if(line == "\"\"\"")
|
||||
{
|
||||
embed_string = !embed_string;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(embed_string)
|
||||
{
|
||||
output << line << endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(line.empty() || line[0] == '#')
|
||||
continue;
|
||||
|
||||
@ -492,7 +497,7 @@ void HTMLRenderer::dump_css (void)
|
||||
all_manager.width .dump_css(f_css.fs);
|
||||
all_manager.left .dump_css(f_css.fs);
|
||||
all_manager.bgimage_size .dump_css(f_css.fs);
|
||||
|
||||
|
||||
// print css
|
||||
if(param.printing)
|
||||
{
|
||||
@ -518,8 +523,8 @@ void HTMLRenderer::dump_css (void)
|
||||
void HTMLRenderer::embed_file(ostream & out, const string & path, const string & type, bool copy)
|
||||
{
|
||||
string fn = get_filename(path);
|
||||
string suffix = (type == "") ? get_suffix(fn) : type;
|
||||
|
||||
string suffix = (type == "") ? get_suffix(fn) : type;
|
||||
|
||||
// TODO
|
||||
auto iter = EMBED_STRING_MAP.find(suffix);
|
||||
if(iter == EMBED_STRING_MAP.end())
|
||||
@ -529,14 +534,14 @@ void HTMLRenderer::embed_file(ostream & out, const string & path, const string &
|
||||
}
|
||||
|
||||
const auto & entry = iter->second;
|
||||
|
||||
|
||||
if(param.*(entry.embed_flag))
|
||||
{
|
||||
ifstream fin(path, ifstream::binary);
|
||||
if(!fin)
|
||||
throw string("Cannot open file ") + path + " for embedding";
|
||||
out << entry.prefix_embed;
|
||||
|
||||
|
||||
if(entry.base64_encode)
|
||||
{
|
||||
out << Base64Stream(fin);
|
||||
|
21
src/Param.h
21
src/Param.h
@ -17,20 +17,21 @@ struct Param
|
||||
{
|
||||
// pages
|
||||
int first_page, last_page;
|
||||
|
||||
|
||||
// dimensions
|
||||
double zoom;
|
||||
double fit_width, fit_height;
|
||||
int use_cropbox;
|
||||
double h_dpi, v_dpi;
|
||||
|
||||
// output
|
||||
|
||||
// output
|
||||
int embed_css;
|
||||
int embed_font;
|
||||
int embed_image;
|
||||
int embed_javascript;
|
||||
int embed_outline;
|
||||
int split_pages;
|
||||
int tmp_file_size_limit;
|
||||
std::string dest_dir;
|
||||
std::string css_filename;
|
||||
std::string page_filename;
|
||||
@ -39,7 +40,7 @@ struct Param
|
||||
int process_outline;
|
||||
int printing;
|
||||
int fallback;
|
||||
|
||||
|
||||
// fonts
|
||||
int embed_external_font;
|
||||
std::string font_format;
|
||||
@ -50,7 +51,7 @@ struct Param
|
||||
int squeeze_wide_glyph;
|
||||
int override_fstype;
|
||||
int process_type3;
|
||||
|
||||
|
||||
// text
|
||||
double h_eps, v_eps;
|
||||
double space_threshold;
|
||||
@ -61,21 +62,19 @@ struct Param
|
||||
|
||||
// background image
|
||||
std::string bg_format;
|
||||
|
||||
|
||||
// encryption
|
||||
std::string owner_password, user_password;
|
||||
int no_drm;
|
||||
|
||||
|
||||
// misc.
|
||||
int clean_tmp;
|
||||
std::string data_dir;
|
||||
std::string tmp_dir;
|
||||
int css_draw;
|
||||
int debug;
|
||||
|
||||
std::string input_filename, output_filename;
|
||||
|
||||
// not a paramater
|
||||
std::string tmp_dir;
|
||||
std::string input_filename, output_filename;
|
||||
};
|
||||
|
||||
} // namespace pdf2htmlEX
|
||||
|
@ -9,21 +9,31 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <cstdio>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "TmpFiles.h"
|
||||
#include "Param.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
#ifndef _WIN32
|
||||
# define STAT stat
|
||||
# define RMDIR rmdir
|
||||
#else
|
||||
# include <direct.h>
|
||||
# define STAT _stat
|
||||
# define RMDIR _rmdir
|
||||
#endif
|
||||
|
||||
namespace pdf2htmlEX {
|
||||
|
||||
|
||||
TmpFiles::TmpFiles( const Param& param )
|
||||
: param( param )
|
||||
: param( param )
|
||||
{ }
|
||||
|
||||
TmpFiles::~TmpFiles()
|
||||
{
|
||||
{
|
||||
clean();
|
||||
}
|
||||
|
||||
@ -49,10 +59,23 @@ void TmpFiles::clean()
|
||||
cerr << "Remove temporary file: " << fn << endl;
|
||||
}
|
||||
|
||||
remove(param.tmp_dir.c_str());
|
||||
RMDIR(param.tmp_dir.c_str());
|
||||
if(param.debug)
|
||||
cerr << "Remove temporary directory: " << param.tmp_dir << endl;
|
||||
}
|
||||
|
||||
// Return the total size of the temporary files in bytes
|
||||
double TmpFiles::get_total_size() const
|
||||
{
|
||||
double total_size = 0;
|
||||
struct STAT st;
|
||||
for(auto iter = tmp_files.begin(); iter != tmp_files.end(); ++iter) {
|
||||
STAT(iter->c_str(), &st);
|
||||
total_size += st.st_size;
|
||||
}
|
||||
|
||||
return total_size;
|
||||
}
|
||||
|
||||
} // namespace pdf2htmlEX
|
||||
|
||||
|
@ -7,19 +7,20 @@
|
||||
|
||||
namespace pdf2htmlEX {
|
||||
|
||||
class TmpFiles
|
||||
class TmpFiles
|
||||
{
|
||||
public:
|
||||
explicit TmpFiles( const Param& param );
|
||||
~TmpFiles();
|
||||
|
||||
void add( const std::string& fn);
|
||||
void add( const std::string& fn);
|
||||
double get_total_size() const;
|
||||
|
||||
private:
|
||||
void clean();
|
||||
|
||||
void clean();
|
||||
|
||||
const Param& param;
|
||||
std::set<std::string> tmp_files;
|
||||
std::set<std::string> tmp_files;
|
||||
};
|
||||
|
||||
} // namespace pdf2htmlEX
|
||||
|
@ -42,6 +42,12 @@ using namespace pdf2htmlEX;
|
||||
Param param;
|
||||
ArgParser argparser;
|
||||
|
||||
#ifdef _WIN32
|
||||
# include <iomanip>
|
||||
# include <libgen.h>
|
||||
# include <direct.h>
|
||||
#endif
|
||||
|
||||
void deprecated_font_suffix(const char * dummy = nullptr)
|
||||
{
|
||||
cerr << "--font-suffix is deprecated. Use `--font-format` instead." << endl;
|
||||
@ -65,7 +71,7 @@ void show_version_and_exit(const char * dummy = nullptr)
|
||||
#if ENABLE_SVG
|
||||
cerr << " cairo " << cairo_version_string() << endl;
|
||||
#endif
|
||||
cerr << "Default data-dir: " << PDF2HTMLEX_DATA_PATH << endl;
|
||||
cerr << "Default data-dir: " << param.data_dir << endl;
|
||||
cerr << "Supported image format:";
|
||||
#ifdef ENABLE_LIBPNG
|
||||
cerr << " png";
|
||||
@ -106,21 +112,56 @@ void embed_parser (const char * str)
|
||||
}
|
||||
}
|
||||
|
||||
void prepare_directories()
|
||||
{
|
||||
std::string tmp_dir = param.tmp_dir + "/pdf2htmlEX-XXXXXX";
|
||||
#ifndef _WIN32
|
||||
errno = 0;
|
||||
|
||||
unique_ptr<char> pBuf(new char[tmp_dir.size() + 1]);
|
||||
strcpy(pBuf.get(), tmp_dir.c_str());
|
||||
auto p = mkdtemp(pBuf.get());
|
||||
if(p == nullptr)
|
||||
{
|
||||
const char * errmsg = strerror(errno);
|
||||
if(!errmsg)
|
||||
{
|
||||
errmsg = "unknown error";
|
||||
}
|
||||
cerr << "Cannot create temp directory: " << errmsg << endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
param.tmp_dir = pBuf.get();
|
||||
#else
|
||||
srand((unsigned)time(0));
|
||||
int rand_value = (int)((rand() / ((double)RAND_MAX+1.0)) * 1e6);
|
||||
stringstream ss;
|
||||
ss << setw(6) << rand_value;
|
||||
|
||||
tmp_dir.erase(tmp_dir.size() - 6);
|
||||
param.tmp_dir = tmp_dir + ss.str();
|
||||
if (mkdir(param.tmp_dir.c_str())) {
|
||||
cerr << "Cannot create temp directory (" << param.tmp_dir << "): " << strerror(errno) << endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void parse_options (int argc, char **argv)
|
||||
{
|
||||
argparser
|
||||
// pages
|
||||
.add("first-page,f", ¶m.first_page, 1, "first page to convert")
|
||||
.add("last-page,l", ¶m.last_page, numeric_limits<int>::max(), "last page to convert")
|
||||
|
||||
|
||||
// dimensions
|
||||
.add("zoom", ¶m.zoom, 0, "zoom ratio", true)
|
||||
.add("fit-width", ¶m.fit_width, 0, "fit width to <fp> pixels", true)
|
||||
.add("fit-width", ¶m.fit_width, 0, "fit width to <fp> pixels", true)
|
||||
.add("fit-height", ¶m.fit_height, 0, "fit height to <fp> pixels", true)
|
||||
.add("use-cropbox", ¶m.use_cropbox, 1, "use CropBox instead of MediaBox")
|
||||
.add("hdpi", ¶m.h_dpi, 144.0, "horizontal resolution for graphics in DPI")
|
||||
.add("vdpi", ¶m.v_dpi, 144.0, "vertical resolution for graphics in DPI")
|
||||
|
||||
|
||||
// output files
|
||||
.add("embed", "specify which elements should be embedded into output", embed_parser, true)
|
||||
.add("embed-css", ¶m.embed_css, 1, "embed CSS files into output")
|
||||
@ -128,6 +169,7 @@ void parse_options (int argc, char **argv)
|
||||
.add("embed-image", ¶m.embed_image, 1, "embed image files into output")
|
||||
.add("embed-javascript", ¶m.embed_javascript, 1, "embed JavaScript files into output")
|
||||
.add("embed-outline", ¶m.embed_outline, 1, "embed outlines into output")
|
||||
.add("tmp-file-size-limit", ¶m.tmp_file_size_limit, -1, "Limit the temporary file output size, in KB (-1 for no limit). This is only an estimate, the output may be bigger")
|
||||
.add("split-pages", ¶m.split_pages, 0, "split pages into separate files")
|
||||
.add("dest-dir", ¶m.dest_dir, ".", "specify destination directory")
|
||||
.add("css-filename", ¶m.css_filename, "", "filename of the generated css file")
|
||||
@ -137,7 +179,7 @@ void parse_options (int argc, char **argv)
|
||||
.add("process-outline", ¶m.process_outline, 1, "show outline in HTML")
|
||||
.add("printing", ¶m.printing, 1, "enable printing support")
|
||||
.add("fallback", ¶m.fallback, 0, "output in fallback mode")
|
||||
|
||||
|
||||
// fonts
|
||||
.add("embed-external-font", ¶m.embed_external_font, 1, "embed local match for external fonts")
|
||||
.add("font-format", ¶m.font_format, "woff", "suffix for embedded font files (ttf,otf,woff,svg)")
|
||||
@ -148,7 +190,7 @@ void parse_options (int argc, char **argv)
|
||||
.add("squeeze-wide-glyph", ¶m.squeeze_wide_glyph, 1, "shrink wide glyphs instead of truncating them")
|
||||
.add("override-fstype", ¶m.override_fstype, 0, "clear the fstype bits in TTF/OTF fonts")
|
||||
.add("process-type3", ¶m.process_type3, 0, "convert Type 3 fonts for web (experimental)")
|
||||
|
||||
|
||||
// text
|
||||
.add("heps", ¶m.h_eps, 1.0, "horizontal threshold for merging text, in pixels")
|
||||
.add("veps", ¶m.v_eps, 1.0, "vertical threshold for merging text, in pixels")
|
||||
@ -160,19 +202,20 @@ void parse_options (int argc, char **argv)
|
||||
|
||||
// background image
|
||||
.add("bg-format", ¶m.bg_format, "png", "specify background image format")
|
||||
|
||||
|
||||
// encryption
|
||||
.add("owner-password,o", ¶m.owner_password, "", "owner password (for encrypted files)", true)
|
||||
.add("user-password,u", ¶m.user_password, "", "user password (for encrypted files)", true)
|
||||
.add("no-drm", ¶m.no_drm, 0, "override document DRM settings")
|
||||
|
||||
|
||||
// misc.
|
||||
.add("clean-tmp", ¶m.clean_tmp, 1, "remove temporary files after conversion")
|
||||
.add("data-dir", ¶m.data_dir, PDF2HTMLEX_DATA_PATH, "specify data directory")
|
||||
.add("tmp-dir", ¶m.tmp_dir, param.tmp_dir, "specify the location of tempory directory.")
|
||||
.add("data-dir", ¶m.data_dir, param.data_dir, "specify data directory")
|
||||
// TODO: css drawings are hidden on print, for annot links, need to fix it for other drawings
|
||||
// .add("css-draw", ¶m.css_draw, 0, "[experimental and unsupported] CSS drawing")
|
||||
.add("debug", ¶m.debug, 0, "print debugging information")
|
||||
|
||||
|
||||
// meta
|
||||
.add("version,v", "print copyright and version info", &show_version_and_exit)
|
||||
.add("help,h", "print usage information", &show_usage_and_exit)
|
||||
@ -317,29 +360,33 @@ void check_param()
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
// We need to adjust these directories before parsing the options.
|
||||
#ifndef _WIN32
|
||||
param.tmp_dir = "/tmp";
|
||||
param.data_dir = PDF2HTMLEX_DATA_PATH;
|
||||
#else
|
||||
{
|
||||
// Under Windows, the default data_dir is under /data in the pdf2htmlEX directory
|
||||
stringstream ss;
|
||||
ss << dirname(argv[0]) << "/data";
|
||||
param.data_dir = ss.str();
|
||||
|
||||
// Under Windows, the temp path is not under /tmp, find it.
|
||||
char temppath[MAX_PATH];
|
||||
::GetTempPath(MAX_PATH, temppath);
|
||||
param.tmp_dir = temppath;
|
||||
}
|
||||
#endif
|
||||
|
||||
parse_options(argc, argv);
|
||||
check_param();
|
||||
|
||||
//prepare the directories
|
||||
{
|
||||
char buf[] = "/tmp/pdf2htmlEX-XXXXXX";
|
||||
errno = 0;
|
||||
auto p = mkdtemp(buf);
|
||||
if(p == nullptr)
|
||||
{
|
||||
const char * errmsg = strerror(errno);
|
||||
if(!errmsg)
|
||||
{
|
||||
errmsg = "unknown error";
|
||||
}
|
||||
cerr << "Cannot create temp directory: " << errmsg << endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
param.tmp_dir = buf;
|
||||
}
|
||||
prepare_directories();
|
||||
|
||||
if(param.debug)
|
||||
if(param.debug) {
|
||||
cerr << "temporary dir: " << (param.tmp_dir) << endl;
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
@ -369,11 +416,11 @@ int main(int argc, char **argv)
|
||||
delete ownerPW;
|
||||
}
|
||||
|
||||
if (!doc->isOk())
|
||||
if (!doc->isOk())
|
||||
throw "Cannot read the file";
|
||||
|
||||
// check for copy permission
|
||||
if (!doc->okToCopy())
|
||||
if (!doc->okToCopy())
|
||||
{
|
||||
if (param.no_drm == 0)
|
||||
throw "Copying of text from this document is not allowed.";
|
||||
|
@ -59,7 +59,7 @@ void ffw_init(int debug)
|
||||
if ( default_encoding==NULL )
|
||||
default_encoding=FindOrMakeEncoding("ISO8859-1");
|
||||
if ( default_encoding==NULL )
|
||||
default_encoding=&custom; /* In case iconv is broken */
|
||||
default_encoding=&custom; /* In case iconv is broken */
|
||||
|
||||
if(!debug)
|
||||
{
|
||||
@ -167,13 +167,13 @@ void ffw_save(const char * filename)
|
||||
|
||||
int r = GenerateScript(cur_fv->sf, _filename
|
||||
, _, -1, -1, NULL, NULL, cur_fv->map, NULL, ly_fore);
|
||||
|
||||
|
||||
free(_);
|
||||
free(_filename);
|
||||
|
||||
if(!r)
|
||||
err("Cannot save font to %s\n", filename);
|
||||
}
|
||||
}
|
||||
void ffw_close(void)
|
||||
{
|
||||
FontViewClose(cur_fv);
|
||||
@ -290,8 +290,8 @@ void ffw_add_empty_char(int32_t unicode, int width)
|
||||
{
|
||||
SplineChar * sc = SFMakeChar(cur_fv->sf, cur_fv->map, cur_fv->map->enccount);
|
||||
char buffer[400];
|
||||
SCSetMetaData(sc,
|
||||
strcopy(StdGlyphName(buffer, unicode,
|
||||
SCSetMetaData(sc,
|
||||
strcopy(StdGlyphName(buffer, unicode,
|
||||
cur_fv->sf->uni_interp, cur_fv->sf->for_new_glyphs)),
|
||||
unicode, sc->comment);
|
||||
SCSynchronizeWidth(sc, width, sc->width, cur_fv);
|
||||
@ -377,13 +377,13 @@ void ffw_set_metric(double ascent, double descent)
|
||||
/*
|
||||
* TODO:bitmap, reference have not been considered in this function
|
||||
*/
|
||||
void ffw_set_widths(int * width_list, int mapping_len,
|
||||
void ffw_set_widths(int * width_list, int mapping_len,
|
||||
int stretch_narrow, int squeeze_wide)
|
||||
{
|
||||
SplineFont * sf = cur_fv->sf;
|
||||
|
||||
if(sf->onlybitmaps
|
||||
&& cur_fv->active_bitmap != NULL
|
||||
if(sf->onlybitmaps
|
||||
&& cur_fv->active_bitmap != NULL
|
||||
&& sf->bitmaps != NULL)
|
||||
{
|
||||
printf("TODO: width vs bitmap\n");
|
||||
@ -397,7 +397,7 @@ void ffw_set_widths(int * width_list, int mapping_len,
|
||||
/*
|
||||
* Don't mess with it if the glyphs is not used.
|
||||
*/
|
||||
if(width_list[i] == -1)
|
||||
if(width_list[i] == -1)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
@ -412,9 +412,9 @@ void ffw_set_widths(int * width_list, int mapping_len,
|
||||
}
|
||||
else if(((sc->width > EPS)
|
||||
&& (((sc->width > width_list[i] + EPS) && (squeeze_wide))
|
||||
|| ((sc->width < width_list[i] - EPS) && (stretch_narrow)))))
|
||||
|| ((sc->width < width_list[i] - EPS) && (stretch_narrow)))))
|
||||
{
|
||||
real transform[6];
|
||||
real transform[6];
|
||||
transform[0] = ((double)width_list[i]) / (sc->width);
|
||||
transform[3] = 1.0;
|
||||
transform[1] = transform[2] = transform[4] = transform[5] = 0;
|
||||
@ -443,7 +443,7 @@ void ffw_import_svg_glyph(int code, const char * filename, double ox, double oy,
|
||||
{
|
||||
int a = cur_fv->sf->ascent;
|
||||
int d = cur_fv->sf->descent;
|
||||
real transform[6];
|
||||
real transform[6];
|
||||
transform[0] = 1.0;
|
||||
transform[3] = 1.0;
|
||||
transform[1] = transform[2] = 0.0;
|
||||
|
@ -12,6 +12,15 @@
|
||||
|
||||
#include "path.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
# include <direct.h>
|
||||
# define STAT _stat
|
||||
# define MKDIR(A, B) _mkdir(A)
|
||||
#else
|
||||
# define STAT stat
|
||||
# define MKDIR(A, B) mkdir(A, B)
|
||||
#endif
|
||||
|
||||
using std::string;
|
||||
|
||||
namespace pdf2htmlEX {
|
||||
@ -25,14 +34,14 @@ void create_directories(const string & path)
|
||||
{
|
||||
create_directories(path.substr(0, idx));
|
||||
}
|
||||
|
||||
int r = mkdir(path.c_str(), S_IRWXU);
|
||||
|
||||
int r = MKDIR(path.c_str(), S_IRWXU);
|
||||
if(r != 0)
|
||||
{
|
||||
if(errno == EEXIST)
|
||||
{
|
||||
struct stat stat_buf;
|
||||
if((stat(path.c_str(), &stat_buf) == 0) && S_ISDIR(stat_buf.st_mode))
|
||||
struct STAT stat_buf;
|
||||
if((STAT(path.c_str(), &stat_buf) == 0) && S_ISDIR(stat_buf.st_mode))
|
||||
return;
|
||||
}
|
||||
|
||||
@ -44,8 +53,8 @@ bool sanitize_filename(string & filename)
|
||||
{
|
||||
string sanitized;
|
||||
bool format_specifier_found = false;
|
||||
|
||||
for(size_t i = 0; i < filename.size(); i++)
|
||||
|
||||
for(size_t i = 0; i < filename.size(); i++)
|
||||
{
|
||||
if('%' == filename[i])
|
||||
{
|
||||
@ -54,7 +63,7 @@ bool sanitize_filename(string & filename)
|
||||
sanitized.push_back('%');
|
||||
sanitized.push_back('%');
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
// We haven't found the format specifier yet, so see if we can use this one as a valid formatter
|
||||
size_t original_i = i;
|
||||
@ -63,14 +72,14 @@ bool sanitize_filename(string & filename)
|
||||
while(++i < filename.size())
|
||||
{
|
||||
tmp.push_back(filename[i]);
|
||||
|
||||
|
||||
// If we aren't still in option specifiers, stop looking
|
||||
if(!strchr("0123456789", filename[i]))
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Check to see if we yielded a valid format specifier
|
||||
if('d' == tmp[tmp.size()-1])
|
||||
{
|
||||
@ -88,7 +97,7 @@ bool sanitize_filename(string & filename)
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
else
|
||||
{
|
||||
sanitized.push_back(filename[i]);
|
||||
}
|
||||
@ -97,7 +106,7 @@ bool sanitize_filename(string & filename)
|
||||
// Only sanitize if it is a valid format.
|
||||
if(format_specifier_found)
|
||||
{
|
||||
filename.assign(sanitized);
|
||||
filename.assign(sanitized);
|
||||
}
|
||||
|
||||
return format_specifier_found;
|
||||
@ -111,7 +120,7 @@ bool is_truetype_suffix(const string & suffix)
|
||||
string get_filename (const string & path)
|
||||
{
|
||||
size_t idx = path.rfind('/');
|
||||
if(idx == string::npos)
|
||||
if(idx == string::npos)
|
||||
return path;
|
||||
else if (idx == path.size() - 1)
|
||||
return "";
|
||||
@ -134,4 +143,4 @@ string get_suffix(const string & path)
|
||||
}
|
||||
|
||||
|
||||
} //namespace pdf2htmlEX
|
||||
} //namespace pdf2htmlEX
|
||||
|
Loading…
Reference in New Issue
Block a user