1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-21 12:40:08 +00:00

Merge pull request #254 from marcsanfacon/master

MinGW Port & 2 new options
This commit is contained in:
Lu Wang 2014-01-11 00:10:59 -08:00
commit ae7c8e8d9c
10 changed files with 224 additions and 129 deletions

View File

@ -11,6 +11,7 @@ hasufell <julian.ospald@googlemail.com>
Herbert Jones <herbert@mediafire.com> Herbert Jones <herbert@mediafire.com>
Hongliang Tian <tatetian@gmail.com> Hongliang Tian <tatetian@gmail.com>
John Hewson <john@jahewson.com> John Hewson <john@jahewson.com>
Marc Sanfacon <marc.sanfacon@gmail.com>
Michele Redolfi <michele@tecnicaict.com> Michele Redolfi <michele@tecnicaict.com>
Mick Giles <mick@mickgiles.com> Mick Giles <mick@mickgiles.com>
Ryan Morlok <ryan.morlok@morlok.com> Ryan Morlok <ryan.morlok@morlok.com>

View File

@ -75,7 +75,7 @@ else()
set(FONTFORGE_INCLUDE_DIRS ${FF_INCLUDE_PATH}/fontforge) set(FONTFORGE_INCLUDE_DIRS ${FF_INCLUDE_PATH}/fontforge)
include_directories(${FONTFORGE_INCLUDE_DIRS}) include_directories(${FONTFORGE_INCLUDE_DIRS})
# MacOSX gettext is in /opt/local/include - strange # MacOSX gettext is in /opt/local/include - strange
if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
find_path(GETTEXT_INCLUDE_PATH libintl.h HINTS "/usr/local/opt/gettext/include") # homebrew find_path(GETTEXT_INCLUDE_PATH libintl.h HINTS "/usr/local/opt/gettext/include") # homebrew
if(GETTEXT_INCLUDE_PATH) if(GETTEXT_INCLUDE_PATH)
include_directories(${GETTEXT_INCLUDE_PATH}) include_directories(${GETTEXT_INCLUDE_PATH})
@ -97,7 +97,7 @@ else()
macro(wl_find_library LIB_NAME RESULT) macro(wl_find_library LIB_NAME RESULT)
unset(${RESULT}) unset(${RESULT})
unset(${RESULT} CACHE) unset(${RESULT} CACHE)
foreach(FULL_LIB_NAME foreach(FULL_LIB_NAME
${CMAKE_IMPORT_LIBRARY_PREFIX}${LIB_NAME}${CMAKE_IMPORT_LIBRARY_SUFFIX} ${CMAKE_IMPORT_LIBRARY_PREFIX}${LIB_NAME}${CMAKE_IMPORT_LIBRARY_SUFFIX}
${CMAKE_SHARED_LIBRARY_PREFIX}${LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX} ${CMAKE_SHARED_LIBRARY_PREFIX}${LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}
${CMAKE_STATIC_LIBRARY_PREFIX}${LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX} ${CMAKE_STATIC_LIBRARY_PREFIX}${LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}
@ -130,7 +130,6 @@ else()
set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${PYTHON_LIBRARIES}) set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${PYTHON_LIBRARIES})
endif() endif()
# debug build flags (overwrite default cmake debug flags) # debug build flags (overwrite default cmake debug flags)
set(CMAKE_C_FLAGS_DEBUG "-ggdb -pg") set(CMAKE_C_FLAGS_DEBUG "-ggdb -pg")
set(CMAKE_CXX_FLAGS_DEBUG "-ggdb -pg") set(CMAKE_CXX_FLAGS_DEBUG "-ggdb -pg")
@ -159,7 +158,7 @@ endif()
include(CheckCXXCompilerFlag) include(CheckCXXCompilerFlag)
check_cxx_compiler_flag("${CMAKE_CXX_FLAGS}" CXX0X_SUPPORT) check_cxx_compiler_flag("${CMAKE_CXX_FLAGS}" CXX0X_SUPPORT)
if(NOT CXX0X_SUPPORT) if(NOT CXX0X_SUPPORT)
message(FATAL_ERROR "Error: you compiler does not support C++0x, please update it.") message(FATAL_ERROR "Error: your compiler does not support C++0x, please update it.")
endif() endif()
@ -174,9 +173,9 @@ configure_file (${CMAKE_SOURCE_DIR}/share/pdf2htmlEX.js.in ${CMAKE_SOURCE_DIR}/s
set(PDF2HTMLEX_SRC ${PDF2HTMLEX_SRC} set(PDF2HTMLEX_SRC ${PDF2HTMLEX_SRC}
src/Param.h src/Param.h
src/pdf2htmlEX.cc src/pdf2htmlEX.cc
src/pdf2htmlEX-config.h src/pdf2htmlEX-config.h
src/HTMLRenderer/HTMLRenderer.h src/HTMLRenderer/HTMLRenderer.h
src/HTMLRenderer/draw.cc src/HTMLRenderer/draw.cc
src/HTMLRenderer/general.cc src/HTMLRenderer/general.cc
src/HTMLRenderer/image.cc src/HTMLRenderer/image.cc
@ -185,12 +184,12 @@ set(PDF2HTMLEX_SRC ${PDF2HTMLEX_SRC}
src/HTMLRenderer/outline.cc src/HTMLRenderer/outline.cc
src/HTMLRenderer/state.cc src/HTMLRenderer/state.cc
src/HTMLRenderer/text.cc src/HTMLRenderer/text.cc
src/BackgroundRenderer/BackgroundRenderer.h src/BackgroundRenderer/BackgroundRenderer.h
src/BackgroundRenderer/BackgroundRenderer.cc src/BackgroundRenderer/BackgroundRenderer.cc
src/BackgroundRenderer/SplashBackgroundRenderer.h src/BackgroundRenderer/SplashBackgroundRenderer.h
src/BackgroundRenderer/SplashBackgroundRenderer.cc src/BackgroundRenderer/SplashBackgroundRenderer.cc
src/BackgroundRenderer/CairoBackgroundRenderer.h src/BackgroundRenderer/CairoBackgroundRenderer.h
src/BackgroundRenderer/CairoBackgroundRenderer.cc src/BackgroundRenderer/CairoBackgroundRenderer.cc
src/util/const.h src/util/const.h
src/util/const.cc src/util/const.cc
src/util/css_const.h src/util/css_const.h
@ -229,7 +228,7 @@ set(PDF2HTMLEX_SRC ${PDF2HTMLEX_SRC}
add_executable(pdf2htmlEX ${PDF2HTMLEX_SRC}) add_executable(pdf2htmlEX ${PDF2HTMLEX_SRC})
target_link_libraries(pdf2htmlEX ${PDF2HTMLEX_LIBS}) target_link_libraries(pdf2htmlEX ${PDF2HTMLEX_LIBS})
add_custom_target(pdf2htmlEX_resources ALL DEPENDS add_custom_target(pdf2htmlEX_resources ALL DEPENDS
${CMAKE_SOURCE_DIR}/share/base.min.css ${CMAKE_SOURCE_DIR}/share/base.min.css
${CMAKE_SOURCE_DIR}/share/fancy.min.css ${CMAKE_SOURCE_DIR}/share/fancy.min.css
${CMAKE_SOURCE_DIR}/share/pdf2htmlEX.min.js ${CMAKE_SOURCE_DIR}/share/pdf2htmlEX.min.js
@ -237,15 +236,15 @@ add_custom_target(pdf2htmlEX_resources ALL DEPENDS
add_custom_command(OUTPUT ${CMAKE_SOURCE_DIR}/share/pdf2htmlEX.min.js add_custom_command(OUTPUT ${CMAKE_SOURCE_DIR}/share/pdf2htmlEX.min.js
COMMAND ${CMAKE_SOURCE_DIR}/share/build_js.sh COMMAND ${CMAKE_SOURCE_DIR}/share/build_js.sh
DEPENDS DEPENDS
${CMAKE_SOURCE_DIR}/share/pdf2htmlEX.js ${CMAKE_SOURCE_DIR}/share/pdf2htmlEX.js
) )
add_custom_command(OUTPUT add_custom_command(OUTPUT
${CMAKE_SOURCE_DIR}/share/base.min.css ${CMAKE_SOURCE_DIR}/share/base.min.css
${CMAKE_SOURCE_DIR}/share/fancy.min.css ${CMAKE_SOURCE_DIR}/share/fancy.min.css
COMMAND ${CMAKE_SOURCE_DIR}/share/build_css.sh COMMAND ${CMAKE_SOURCE_DIR}/share/build_css.sh
DEPENDS DEPENDS
${CMAKE_SOURCE_DIR}/share/base.css ${CMAKE_SOURCE_DIR}/share/base.css
${CMAKE_SOURCE_DIR}/share/fancy.css ${CMAKE_SOURCE_DIR}/share/fancy.css
) )
@ -269,5 +268,5 @@ install (FILES ${PDF2HTMLEX_RESOURCE} DESTINATION share/pdf2htmlEX)
install (FILES pdf2htmlEX.1 DESTINATION share/man/man1) install (FILES pdf2htmlEX.1 DESTINATION share/man/man1)
enable_testing() enable_testing()
add_test(test_naming add_test(test_naming
python ${CMAKE_SOURCE_DIR}/test/test_naming.py) python ${CMAKE_SOURCE_DIR}/test/test_naming.py)

View File

@ -82,6 +82,13 @@ This switch is useful if you want pages to be loaded separately & dynamically --
Also see --page-filename. Also see --page-filename.
.TP
.B --tmp-file-size-limit <limit> (Default: -1)
This limits the total size of the temporary files which will also limit the total size of the output file.
This is an estimate and it will stop after a page, once the total temporary files size is greater than this number.
-1 means no limit and is the default.
.TP .TP
.B --dest-dir <dir> (Default: .) .B --dest-dir <dir> (Default: .)
Specify destination folder. Specify destination folder.
@ -262,6 +269,10 @@ If switched off, intermediate files won't be cleaned in the end.
.B --data-dir <dir> (Default: @CMAKE_INSTALL_PREFIX@/share/pdf2htmlEX) .B --data-dir <dir> (Default: @CMAKE_INSTALL_PREFIX@/share/pdf2htmlEX)
Specify the folder holding the manifest and other files (see below for the manifest file)` Specify the folder holding the manifest and other files (see below for the manifest file)`
.TP
.B --tmp-dir <dir> (Default: /tmp)
Specify the temporary folder to use for temporary files
.TP .TP
.B --css-draw <0|1> (Default: 0) .B --css-draw <0|1> (Default: 0)
Experimental and unsupported CSS drawing Experimental and unsupported CSS drawing

View File

@ -45,7 +45,7 @@ HTMLRenderer::HTMLRenderer(const Param & param)
,param(param) ,param(param)
,html_text_page(param, all_manager) ,html_text_page(param, all_manager)
,preprocessor(param) ,preprocessor(param)
,tmp_files(param) ,tmp_files(param)
{ {
if(!(param.debug)) if(!(param.debug))
{ {
@ -79,7 +79,7 @@ HTMLRenderer::HTMLRenderer(const Param & param)
} }
HTMLRenderer::~HTMLRenderer() HTMLRenderer::~HTMLRenderer()
{ {
ffw_finalize(); ffw_finalize();
delete [] cur_mapping; delete [] cur_mapping;
delete [] cur_mapping2; delete [] cur_mapping2;
@ -96,7 +96,7 @@ void HTMLRenderer::process(PDFDoc *doc)
/////////////////// ///////////////////
// Process pages // Process pages
bg_renderer = nullptr; bg_renderer = nullptr;
if(param.process_nontext) if(param.process_nontext)
{ {
@ -107,15 +107,20 @@ void HTMLRenderer::process(PDFDoc *doc)
} }
int page_count = (param.last_page - param.first_page + 1); int page_count = (param.last_page - param.first_page + 1);
for(int i = param.first_page; i <= param.last_page ; ++i) for(int i = param.first_page; i <= param.last_page ; ++i)
{ {
if (param.tmp_file_size_limit != -1 && tmp_files.get_total_size() > param.tmp_file_size_limit * 1024) {
cerr << "Stop processing, reach max size\n";
break;
}
cerr << "Working: " << (i-param.first_page) << "/" << page_count << '\r' << flush; cerr << "Working: " << (i-param.first_page) << "/" << page_count << '\r' << flush;
if(param.split_pages) if(param.split_pages)
{ {
string filled_template_filename = (char*)str_fmt(param.page_filename.c_str(), i); string filled_template_filename = (char*)str_fmt(param.page_filename.c_str(), i);
auto page_fn = str_fmt("%s/%s", param.dest_dir.c_str(), filled_template_filename.c_str()); auto page_fn = str_fmt("%s/%s", param.dest_dir.c_str(), filled_template_filename.c_str());
f_curpage = new ofstream((char*)page_fn, ofstream::binary); f_curpage = new ofstream((char*)page_fn, ofstream::binary);
if(!(*f_curpage)) if(!(*f_curpage))
throw string("Cannot open ") + (char*)page_fn + " for writing"; throw string("Cannot open ") + (char*)page_fn + " for writing";
set_stream_flags((*f_curpage)); set_stream_flags((*f_curpage));
@ -128,9 +133,9 @@ void HTMLRenderer::process(PDFDoc *doc)
bg_renderer->render_page(doc, i); bg_renderer->render_page(doc, i);
} }
doc->displayPage(this, i, doc->displayPage(this, i,
text_zoom_factor() * DEFAULT_DPI, text_zoom_factor() * DEFAULT_DPI, text_zoom_factor() * DEFAULT_DPI, text_zoom_factor() * DEFAULT_DPI,
0, 0,
(!(param.use_cropbox)), (!(param.use_cropbox)),
true, // crop true, // crop
false, // printing false, // printing
@ -149,7 +154,7 @@ void HTMLRenderer::process(PDFDoc *doc)
//////////////////////// ////////////////////////
// Process Outline // Process Outline
if(param.process_outline) if(param.process_outline)
process_outline(); process_outline();
post_process(); post_process();
@ -170,7 +175,7 @@ void HTMLRenderer::setDefaultCTM(double *ctm)
#if POPPLER_OLDER_THAN_0_23_0 #if POPPLER_OLDER_THAN_0_23_0
void HTMLRenderer::startPage(int pageNum, GfxState *state) void HTMLRenderer::startPage(int pageNum, GfxState *state)
#else #else
void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref) void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
#endif #endif
{ {
this->pageNum = pageNum; this->pageNum = pageNum;
@ -183,12 +188,12 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
long long wid = all_manager.width.install(pageWidth); long long wid = all_manager.width.install(pageWidth);
long long hid = all_manager.height.install(pageHeight); long long hid = all_manager.height.install(pageHeight);
(*f_curpage) (*f_curpage)
<< "<div id=\"" << CSS::PAGE_FRAME_CN << pageNum << "<div id=\"" << CSS::PAGE_FRAME_CN << pageNum
<< "\" class=\"" << CSS::PAGE_FRAME_CN << "\" class=\"" << CSS::PAGE_FRAME_CN
<< " " << CSS::WIDTH_CN << wid << " " << CSS::WIDTH_CN << wid
<< " " << CSS::HEIGHT_CN << hid << " " << CSS::HEIGHT_CN << hid
<< "\" data-page-no=\"" << pageNum << "\">" << "\" data-page-no=\"" << pageNum << "\">"
<< "<div class=\"" << CSS::PAGE_CONTENT_BOX_CN << "<div class=\"" << CSS::PAGE_CONTENT_BOX_CN
<< " " << CSS::PAGE_CONTENT_BOX_CN << pageNum << " " << CSS::PAGE_CONTENT_BOX_CN << pageNum
<< " " << CSS::WIDTH_CN << wid << " " << CSS::WIDTH_CN << wid
<< " " << CSS::HEIGHT_CN << hid << " " << CSS::HEIGHT_CN << hid
@ -201,11 +206,11 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
if(param.split_pages) if(param.split_pages)
{ {
f_pages.fs f_pages.fs
<< "<div id=\"" << CSS::PAGE_FRAME_CN << pageNum << "<div id=\"" << CSS::PAGE_FRAME_CN << pageNum
<< "\" class=\"" << CSS::PAGE_FRAME_CN << "\" class=\"" << CSS::PAGE_FRAME_CN
<< " " << CSS::WIDTH_CN << wid << " " << CSS::WIDTH_CN << wid
<< " " << CSS::HEIGHT_CN << hid << " " << CSS::HEIGHT_CN << hid
<< "\" data-page-no=\"" << pageNum << "\" data-page-no=\"" << pageNum
<< "\" data-page-url=\""; << "\" data-page-url=\"";
writeAttribute(f_pages.fs, cur_page_filename); writeAttribute(f_pages.fs, cur_page_filename);
@ -236,7 +241,7 @@ void HTMLRenderer::endPage() {
// TODO: create a function for this // TODO: create a function for this
// BE CAREFUL WITH ESCAPES // BE CAREFUL WITH ESCAPES
(*f_curpage) << "<div class=\"" << CSS::PAGE_DATA_CN << "\" data-data='{"; (*f_curpage) << "<div class=\"" << CSS::PAGE_DATA_CN << "\" data-data='{";
//default CTM //default CTM
(*f_curpage) << "\"ctm\":["; (*f_curpage) << "\"ctm\":[";
for(int i = 0; i < 6; ++i) for(int i = 0; i < 6; ++i)
@ -247,7 +252,7 @@ void HTMLRenderer::endPage() {
(*f_curpage) << "]"; (*f_curpage) << "]";
(*f_curpage) << "}'></div>"; (*f_curpage) << "}'></div>";
// close page // close page
(*f_curpage) << "</div>" << endl; (*f_curpage) << "</div>" << endl;
@ -266,7 +271,7 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
*/ */
{ {
vector<double> zoom_factors; vector<double> zoom_factors;
if(is_positive(param.zoom)) if(is_positive(param.zoom))
{ {
zoom_factors.push_back(param.zoom); zoom_factors.push_back(param.zoom);
@ -283,8 +288,8 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
} }
double zoom = (zoom_factors.empty() ? 1.0 : (*min_element(zoom_factors.begin(), zoom_factors.end()))); double zoom = (zoom_factors.empty() ? 1.0 : (*min_element(zoom_factors.begin(), zoom_factors.end())));
text_scale_factor1 = max<double>(zoom, param.font_size_multiplier); text_scale_factor1 = max<double>(zoom, param.font_size_multiplier);
text_scale_factor2 = zoom / text_scale_factor1; text_scale_factor2 = zoom / text_scale_factor1;
} }
@ -340,13 +345,13 @@ void HTMLRenderer::pre_process(PDFDoc * doc)
* we have to keep the html file for pages into a temporary place * we have to keep the html file for pages into a temporary place
* because we'll have to embed css before it * because we'll have to embed css before it
* *
* Otherwise just generate it * Otherwise just generate it
*/ */
auto fn = str_fmt("%s/__pages", param.tmp_dir.c_str()); auto fn = str_fmt("%s/__pages", param.tmp_dir.c_str());
tmp_files.add((char*)fn); tmp_files.add((char*)fn);
f_pages.path = (char*)fn; f_pages.path = (char*)fn;
f_pages.fs.open(f_pages.path, ofstream::binary); f_pages.fs.open(f_pages.path, ofstream::binary);
if(!f_pages.fs) if(!f_pages.fs)
throw string("Cannot open ") + (char*)fn + " for writing"; throw string("Cannot open ") + (char*)fn + " for writing";
set_stream_flags(f_pages.fs); set_stream_flags(f_pages.fs);
@ -371,7 +376,7 @@ void HTMLRenderer::post_process(void)
{ {
f_outline.fs.close(); f_outline.fs.close();
} }
f_pages.fs.close(); f_pages.fs.close();
f_css.fs.close(); f_css.fs.close();
// build the main HTML file // build the main HTML file
@ -394,20 +399,6 @@ void HTMLRenderer::post_process(void)
long line_no = 0; long line_no = 0;
while(getline(manifest_fin, line)) while(getline(manifest_fin, line))
{ {
++line_no;
if(line == "\"\"\"")
{
embed_string = !embed_string;
continue;
}
if(embed_string)
{
output << line << endl;
continue;
}
// trim space at both sides // trim space at both sides
{ {
static const char * whitespaces = " \t\n\v\f\r"; static const char * whitespaces = " \t\n\v\f\r";
@ -424,6 +415,20 @@ void HTMLRenderer::post_process(void)
} }
} }
++line_no;
if(line == "\"\"\"")
{
embed_string = !embed_string;
continue;
}
if(embed_string)
{
output << line << endl;
continue;
}
if(line.empty() || line[0] == '#') if(line.empty() || line[0] == '#')
continue; continue;
@ -492,7 +497,7 @@ void HTMLRenderer::dump_css (void)
all_manager.width .dump_css(f_css.fs); all_manager.width .dump_css(f_css.fs);
all_manager.left .dump_css(f_css.fs); all_manager.left .dump_css(f_css.fs);
all_manager.bgimage_size .dump_css(f_css.fs); all_manager.bgimage_size .dump_css(f_css.fs);
// print css // print css
if(param.printing) if(param.printing)
{ {
@ -518,8 +523,8 @@ void HTMLRenderer::dump_css (void)
void HTMLRenderer::embed_file(ostream & out, const string & path, const string & type, bool copy) void HTMLRenderer::embed_file(ostream & out, const string & path, const string & type, bool copy)
{ {
string fn = get_filename(path); string fn = get_filename(path);
string suffix = (type == "") ? get_suffix(fn) : type; string suffix = (type == "") ? get_suffix(fn) : type;
// TODO // TODO
auto iter = EMBED_STRING_MAP.find(suffix); auto iter = EMBED_STRING_MAP.find(suffix);
if(iter == EMBED_STRING_MAP.end()) if(iter == EMBED_STRING_MAP.end())
@ -529,14 +534,14 @@ void HTMLRenderer::embed_file(ostream & out, const string & path, const string &
} }
const auto & entry = iter->second; const auto & entry = iter->second;
if(param.*(entry.embed_flag)) if(param.*(entry.embed_flag))
{ {
ifstream fin(path, ifstream::binary); ifstream fin(path, ifstream::binary);
if(!fin) if(!fin)
throw string("Cannot open file ") + path + " for embedding"; throw string("Cannot open file ") + path + " for embedding";
out << entry.prefix_embed; out << entry.prefix_embed;
if(entry.base64_encode) if(entry.base64_encode)
{ {
out << Base64Stream(fin); out << Base64Stream(fin);

View File

@ -17,20 +17,21 @@ struct Param
{ {
// pages // pages
int first_page, last_page; int first_page, last_page;
// dimensions // dimensions
double zoom; double zoom;
double fit_width, fit_height; double fit_width, fit_height;
int use_cropbox; int use_cropbox;
double h_dpi, v_dpi; double h_dpi, v_dpi;
// output // output
int embed_css; int embed_css;
int embed_font; int embed_font;
int embed_image; int embed_image;
int embed_javascript; int embed_javascript;
int embed_outline; int embed_outline;
int split_pages; int split_pages;
int tmp_file_size_limit;
std::string dest_dir; std::string dest_dir;
std::string css_filename; std::string css_filename;
std::string page_filename; std::string page_filename;
@ -39,7 +40,7 @@ struct Param
int process_outline; int process_outline;
int printing; int printing;
int fallback; int fallback;
// fonts // fonts
int embed_external_font; int embed_external_font;
std::string font_format; std::string font_format;
@ -50,7 +51,7 @@ struct Param
int squeeze_wide_glyph; int squeeze_wide_glyph;
int override_fstype; int override_fstype;
int process_type3; int process_type3;
// text // text
double h_eps, v_eps; double h_eps, v_eps;
double space_threshold; double space_threshold;
@ -61,21 +62,19 @@ struct Param
// background image // background image
std::string bg_format; std::string bg_format;
// encryption // encryption
std::string owner_password, user_password; std::string owner_password, user_password;
int no_drm; int no_drm;
// misc. // misc.
int clean_tmp; int clean_tmp;
std::string data_dir; std::string data_dir;
std::string tmp_dir;
int css_draw; int css_draw;
int debug; int debug;
std::string input_filename, output_filename;
// not a paramater std::string input_filename, output_filename;
std::string tmp_dir;
}; };
} // namespace pdf2htmlEX } // namespace pdf2htmlEX

View File

@ -9,21 +9,31 @@
#include <iostream> #include <iostream>
#include <cstdio> #include <cstdio>
#include <sys/stat.h>
#include "TmpFiles.h" #include "TmpFiles.h"
#include "Param.h" #include "Param.h"
using namespace std; using namespace std;
#ifndef _WIN32
# define STAT stat
# define RMDIR rmdir
#else
# include <direct.h>
# define STAT _stat
# define RMDIR _rmdir
#endif
namespace pdf2htmlEX { namespace pdf2htmlEX {
TmpFiles::TmpFiles( const Param& param ) TmpFiles::TmpFiles( const Param& param )
: param( param ) : param( param )
{ } { }
TmpFiles::~TmpFiles() TmpFiles::~TmpFiles()
{ {
clean(); clean();
} }
@ -49,10 +59,23 @@ void TmpFiles::clean()
cerr << "Remove temporary file: " << fn << endl; cerr << "Remove temporary file: " << fn << endl;
} }
remove(param.tmp_dir.c_str()); RMDIR(param.tmp_dir.c_str());
if(param.debug) if(param.debug)
cerr << "Remove temporary directory: " << param.tmp_dir << endl; cerr << "Remove temporary directory: " << param.tmp_dir << endl;
} }
// Return the total size of the temporary files in bytes
double TmpFiles::get_total_size() const
{
double total_size = 0;
struct STAT st;
for(auto iter = tmp_files.begin(); iter != tmp_files.end(); ++iter) {
STAT(iter->c_str(), &st);
total_size += st.st_size;
}
return total_size;
}
} // namespace pdf2htmlEX } // namespace pdf2htmlEX

View File

@ -7,19 +7,20 @@
namespace pdf2htmlEX { namespace pdf2htmlEX {
class TmpFiles class TmpFiles
{ {
public: public:
explicit TmpFiles( const Param& param ); explicit TmpFiles( const Param& param );
~TmpFiles(); ~TmpFiles();
void add( const std::string& fn); void add( const std::string& fn);
double get_total_size() const;
private: private:
void clean(); void clean();
const Param& param; const Param& param;
std::set<std::string> tmp_files; std::set<std::string> tmp_files;
}; };
} // namespace pdf2htmlEX } // namespace pdf2htmlEX

View File

@ -42,6 +42,12 @@ using namespace pdf2htmlEX;
Param param; Param param;
ArgParser argparser; ArgParser argparser;
#ifdef _WIN32
# include <iomanip>
# include <libgen.h>
# include <direct.h>
#endif
void deprecated_font_suffix(const char * dummy = nullptr) void deprecated_font_suffix(const char * dummy = nullptr)
{ {
cerr << "--font-suffix is deprecated. Use `--font-format` instead." << endl; cerr << "--font-suffix is deprecated. Use `--font-format` instead." << endl;
@ -65,7 +71,7 @@ void show_version_and_exit(const char * dummy = nullptr)
#if ENABLE_SVG #if ENABLE_SVG
cerr << " cairo " << cairo_version_string() << endl; cerr << " cairo " << cairo_version_string() << endl;
#endif #endif
cerr << "Default data-dir: " << PDF2HTMLEX_DATA_PATH << endl; cerr << "Default data-dir: " << param.data_dir << endl;
cerr << "Supported image format:"; cerr << "Supported image format:";
#ifdef ENABLE_LIBPNG #ifdef ENABLE_LIBPNG
cerr << " png"; cerr << " png";
@ -106,21 +112,56 @@ void embed_parser (const char * str)
} }
} }
void prepare_directories()
{
std::string tmp_dir = param.tmp_dir + "/pdf2htmlEX-XXXXXX";
#ifndef _WIN32
errno = 0;
unique_ptr<char> pBuf(new char[tmp_dir.size() + 1]);
strcpy(pBuf.get(), tmp_dir.c_str());
auto p = mkdtemp(pBuf.get());
if(p == nullptr)
{
const char * errmsg = strerror(errno);
if(!errmsg)
{
errmsg = "unknown error";
}
cerr << "Cannot create temp directory: " << errmsg << endl;
exit(EXIT_FAILURE);
}
param.tmp_dir = pBuf.get();
#else
srand((unsigned)time(0));
int rand_value = (int)((rand() / ((double)RAND_MAX+1.0)) * 1e6);
stringstream ss;
ss << setw(6) << rand_value;
tmp_dir.erase(tmp_dir.size() - 6);
param.tmp_dir = tmp_dir + ss.str();
if (mkdir(param.tmp_dir.c_str())) {
cerr << "Cannot create temp directory (" << param.tmp_dir << "): " << strerror(errno) << endl;
exit(EXIT_FAILURE);
}
#endif
}
void parse_options (int argc, char **argv) void parse_options (int argc, char **argv)
{ {
argparser argparser
// pages // pages
.add("first-page,f", &param.first_page, 1, "first page to convert") .add("first-page,f", &param.first_page, 1, "first page to convert")
.add("last-page,l", &param.last_page, numeric_limits<int>::max(), "last page to convert") .add("last-page,l", &param.last_page, numeric_limits<int>::max(), "last page to convert")
// dimensions // dimensions
.add("zoom", &param.zoom, 0, "zoom ratio", true) .add("zoom", &param.zoom, 0, "zoom ratio", true)
.add("fit-width", &param.fit_width, 0, "fit width to <fp> pixels", true) .add("fit-width", &param.fit_width, 0, "fit width to <fp> pixels", true)
.add("fit-height", &param.fit_height, 0, "fit height to <fp> pixels", true) .add("fit-height", &param.fit_height, 0, "fit height to <fp> pixels", true)
.add("use-cropbox", &param.use_cropbox, 1, "use CropBox instead of MediaBox") .add("use-cropbox", &param.use_cropbox, 1, "use CropBox instead of MediaBox")
.add("hdpi", &param.h_dpi, 144.0, "horizontal resolution for graphics in DPI") .add("hdpi", &param.h_dpi, 144.0, "horizontal resolution for graphics in DPI")
.add("vdpi", &param.v_dpi, 144.0, "vertical resolution for graphics in DPI") .add("vdpi", &param.v_dpi, 144.0, "vertical resolution for graphics in DPI")
// output files // output files
.add("embed", "specify which elements should be embedded into output", embed_parser, true) .add("embed", "specify which elements should be embedded into output", embed_parser, true)
.add("embed-css", &param.embed_css, 1, "embed CSS files into output") .add("embed-css", &param.embed_css, 1, "embed CSS files into output")
@ -128,6 +169,7 @@ void parse_options (int argc, char **argv)
.add("embed-image", &param.embed_image, 1, "embed image files into output") .add("embed-image", &param.embed_image, 1, "embed image files into output")
.add("embed-javascript", &param.embed_javascript, 1, "embed JavaScript files into output") .add("embed-javascript", &param.embed_javascript, 1, "embed JavaScript files into output")
.add("embed-outline", &param.embed_outline, 1, "embed outlines into output") .add("embed-outline", &param.embed_outline, 1, "embed outlines into output")
.add("tmp-file-size-limit", &param.tmp_file_size_limit, -1, "Limit the temporary file output size, in KB (-1 for no limit). This is only an estimate, the output may be bigger")
.add("split-pages", &param.split_pages, 0, "split pages into separate files") .add("split-pages", &param.split_pages, 0, "split pages into separate files")
.add("dest-dir", &param.dest_dir, ".", "specify destination directory") .add("dest-dir", &param.dest_dir, ".", "specify destination directory")
.add("css-filename", &param.css_filename, "", "filename of the generated css file") .add("css-filename", &param.css_filename, "", "filename of the generated css file")
@ -137,7 +179,7 @@ void parse_options (int argc, char **argv)
.add("process-outline", &param.process_outline, 1, "show outline in HTML") .add("process-outline", &param.process_outline, 1, "show outline in HTML")
.add("printing", &param.printing, 1, "enable printing support") .add("printing", &param.printing, 1, "enable printing support")
.add("fallback", &param.fallback, 0, "output in fallback mode") .add("fallback", &param.fallback, 0, "output in fallback mode")
// fonts // fonts
.add("embed-external-font", &param.embed_external_font, 1, "embed local match for external fonts") .add("embed-external-font", &param.embed_external_font, 1, "embed local match for external fonts")
.add("font-format", &param.font_format, "woff", "suffix for embedded font files (ttf,otf,woff,svg)") .add("font-format", &param.font_format, "woff", "suffix for embedded font files (ttf,otf,woff,svg)")
@ -148,7 +190,7 @@ void parse_options (int argc, char **argv)
.add("squeeze-wide-glyph", &param.squeeze_wide_glyph, 1, "shrink wide glyphs instead of truncating them") .add("squeeze-wide-glyph", &param.squeeze_wide_glyph, 1, "shrink wide glyphs instead of truncating them")
.add("override-fstype", &param.override_fstype, 0, "clear the fstype bits in TTF/OTF fonts") .add("override-fstype", &param.override_fstype, 0, "clear the fstype bits in TTF/OTF fonts")
.add("process-type3", &param.process_type3, 0, "convert Type 3 fonts for web (experimental)") .add("process-type3", &param.process_type3, 0, "convert Type 3 fonts for web (experimental)")
// text // text
.add("heps", &param.h_eps, 1.0, "horizontal threshold for merging text, in pixels") .add("heps", &param.h_eps, 1.0, "horizontal threshold for merging text, in pixels")
.add("veps", &param.v_eps, 1.0, "vertical threshold for merging text, in pixels") .add("veps", &param.v_eps, 1.0, "vertical threshold for merging text, in pixels")
@ -160,19 +202,20 @@ void parse_options (int argc, char **argv)
// background image // background image
.add("bg-format", &param.bg_format, "png", "specify background image format") .add("bg-format", &param.bg_format, "png", "specify background image format")
// encryption // encryption
.add("owner-password,o", &param.owner_password, "", "owner password (for encrypted files)", true) .add("owner-password,o", &param.owner_password, "", "owner password (for encrypted files)", true)
.add("user-password,u", &param.user_password, "", "user password (for encrypted files)", true) .add("user-password,u", &param.user_password, "", "user password (for encrypted files)", true)
.add("no-drm", &param.no_drm, 0, "override document DRM settings") .add("no-drm", &param.no_drm, 0, "override document DRM settings")
// misc. // misc.
.add("clean-tmp", &param.clean_tmp, 1, "remove temporary files after conversion") .add("clean-tmp", &param.clean_tmp, 1, "remove temporary files after conversion")
.add("data-dir", &param.data_dir, PDF2HTMLEX_DATA_PATH, "specify data directory") .add("tmp-dir", &param.tmp_dir, param.tmp_dir, "specify the location of tempory directory.")
.add("data-dir", &param.data_dir, param.data_dir, "specify data directory")
// TODO: css drawings are hidden on print, for annot links, need to fix it for other drawings // TODO: css drawings are hidden on print, for annot links, need to fix it for other drawings
// .add("css-draw", &param.css_draw, 0, "[experimental and unsupported] CSS drawing") // .add("css-draw", &param.css_draw, 0, "[experimental and unsupported] CSS drawing")
.add("debug", &param.debug, 0, "print debugging information") .add("debug", &param.debug, 0, "print debugging information")
// meta // meta
.add("version,v", "print copyright and version info", &show_version_and_exit) .add("version,v", "print copyright and version info", &show_version_and_exit)
.add("help,h", "print usage information", &show_usage_and_exit) .add("help,h", "print usage information", &show_usage_and_exit)
@ -317,29 +360,33 @@ void check_param()
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
// We need to adjust these directories before parsing the options.
#ifndef _WIN32
param.tmp_dir = "/tmp";
param.data_dir = PDF2HTMLEX_DATA_PATH;
#else
{
// Under Windows, the default data_dir is under /data in the pdf2htmlEX directory
stringstream ss;
ss << dirname(argv[0]) << "/data";
param.data_dir = ss.str();
// Under Windows, the temp path is not under /tmp, find it.
char temppath[MAX_PATH];
::GetTempPath(MAX_PATH, temppath);
param.tmp_dir = temppath;
}
#endif
parse_options(argc, argv); parse_options(argc, argv);
check_param(); check_param();
//prepare the directories //prepare the directories
{ prepare_directories();
char buf[] = "/tmp/pdf2htmlEX-XXXXXX";
errno = 0;
auto p = mkdtemp(buf);
if(p == nullptr)
{
const char * errmsg = strerror(errno);
if(!errmsg)
{
errmsg = "unknown error";
}
cerr << "Cannot create temp directory: " << errmsg << endl;
exit(EXIT_FAILURE);
}
param.tmp_dir = buf;
}
if(param.debug) if(param.debug) {
cerr << "temporary dir: " << (param.tmp_dir) << endl; cerr << "temporary dir: " << (param.tmp_dir) << endl;
}
try try
{ {
@ -369,11 +416,11 @@ int main(int argc, char **argv)
delete ownerPW; delete ownerPW;
} }
if (!doc->isOk()) if (!doc->isOk())
throw "Cannot read the file"; throw "Cannot read the file";
// check for copy permission // check for copy permission
if (!doc->okToCopy()) if (!doc->okToCopy())
{ {
if (param.no_drm == 0) if (param.no_drm == 0)
throw "Copying of text from this document is not allowed."; throw "Copying of text from this document is not allowed.";

View File

@ -59,7 +59,7 @@ void ffw_init(int debug)
if ( default_encoding==NULL ) if ( default_encoding==NULL )
default_encoding=FindOrMakeEncoding("ISO8859-1"); default_encoding=FindOrMakeEncoding("ISO8859-1");
if ( default_encoding==NULL ) if ( default_encoding==NULL )
default_encoding=&custom; /* In case iconv is broken */ default_encoding=&custom; /* In case iconv is broken */
if(!debug) if(!debug)
{ {
@ -167,13 +167,13 @@ void ffw_save(const char * filename)
int r = GenerateScript(cur_fv->sf, _filename int r = GenerateScript(cur_fv->sf, _filename
, _, -1, -1, NULL, NULL, cur_fv->map, NULL, ly_fore); , _, -1, -1, NULL, NULL, cur_fv->map, NULL, ly_fore);
free(_); free(_);
free(_filename); free(_filename);
if(!r) if(!r)
err("Cannot save font to %s\n", filename); err("Cannot save font to %s\n", filename);
} }
void ffw_close(void) void ffw_close(void)
{ {
FontViewClose(cur_fv); FontViewClose(cur_fv);
@ -290,8 +290,8 @@ void ffw_add_empty_char(int32_t unicode, int width)
{ {
SplineChar * sc = SFMakeChar(cur_fv->sf, cur_fv->map, cur_fv->map->enccount); SplineChar * sc = SFMakeChar(cur_fv->sf, cur_fv->map, cur_fv->map->enccount);
char buffer[400]; char buffer[400];
SCSetMetaData(sc, SCSetMetaData(sc,
strcopy(StdGlyphName(buffer, unicode, strcopy(StdGlyphName(buffer, unicode,
cur_fv->sf->uni_interp, cur_fv->sf->for_new_glyphs)), cur_fv->sf->uni_interp, cur_fv->sf->for_new_glyphs)),
unicode, sc->comment); unicode, sc->comment);
SCSynchronizeWidth(sc, width, sc->width, cur_fv); SCSynchronizeWidth(sc, width, sc->width, cur_fv);
@ -377,13 +377,13 @@ void ffw_set_metric(double ascent, double descent)
/* /*
* TODO:bitmap, reference have not been considered in this function * TODO:bitmap, reference have not been considered in this function
*/ */
void ffw_set_widths(int * width_list, int mapping_len, void ffw_set_widths(int * width_list, int mapping_len,
int stretch_narrow, int squeeze_wide) int stretch_narrow, int squeeze_wide)
{ {
SplineFont * sf = cur_fv->sf; SplineFont * sf = cur_fv->sf;
if(sf->onlybitmaps if(sf->onlybitmaps
&& cur_fv->active_bitmap != NULL && cur_fv->active_bitmap != NULL
&& sf->bitmaps != NULL) && sf->bitmaps != NULL)
{ {
printf("TODO: width vs bitmap\n"); printf("TODO: width vs bitmap\n");
@ -397,7 +397,7 @@ void ffw_set_widths(int * width_list, int mapping_len,
/* /*
* Don't mess with it if the glyphs is not used. * Don't mess with it if the glyphs is not used.
*/ */
if(width_list[i] == -1) if(width_list[i] == -1)
{ {
continue; continue;
} }
@ -412,9 +412,9 @@ void ffw_set_widths(int * width_list, int mapping_len,
} }
else if(((sc->width > EPS) else if(((sc->width > EPS)
&& (((sc->width > width_list[i] + EPS) && (squeeze_wide)) && (((sc->width > width_list[i] + EPS) && (squeeze_wide))
|| ((sc->width < width_list[i] - EPS) && (stretch_narrow))))) || ((sc->width < width_list[i] - EPS) && (stretch_narrow)))))
{ {
real transform[6]; real transform[6];
transform[0] = ((double)width_list[i]) / (sc->width); transform[0] = ((double)width_list[i]) / (sc->width);
transform[3] = 1.0; transform[3] = 1.0;
transform[1] = transform[2] = transform[4] = transform[5] = 0; transform[1] = transform[2] = transform[4] = transform[5] = 0;
@ -443,7 +443,7 @@ void ffw_import_svg_glyph(int code, const char * filename, double ox, double oy,
{ {
int a = cur_fv->sf->ascent; int a = cur_fv->sf->ascent;
int d = cur_fv->sf->descent; int d = cur_fv->sf->descent;
real transform[6]; real transform[6];
transform[0] = 1.0; transform[0] = 1.0;
transform[3] = 1.0; transform[3] = 1.0;
transform[1] = transform[2] = 0.0; transform[1] = transform[2] = 0.0;

View File

@ -12,6 +12,15 @@
#include "path.h" #include "path.h"
#ifdef _WIN32
# include <direct.h>
# define STAT _stat
# define MKDIR(A, B) _mkdir(A)
#else
# define STAT stat
# define MKDIR(A, B) mkdir(A, B)
#endif
using std::string; using std::string;
namespace pdf2htmlEX { namespace pdf2htmlEX {
@ -25,14 +34,14 @@ void create_directories(const string & path)
{ {
create_directories(path.substr(0, idx)); create_directories(path.substr(0, idx));
} }
int r = mkdir(path.c_str(), S_IRWXU); int r = MKDIR(path.c_str(), S_IRWXU);
if(r != 0) if(r != 0)
{ {
if(errno == EEXIST) if(errno == EEXIST)
{ {
struct stat stat_buf; struct STAT stat_buf;
if((stat(path.c_str(), &stat_buf) == 0) && S_ISDIR(stat_buf.st_mode)) if((STAT(path.c_str(), &stat_buf) == 0) && S_ISDIR(stat_buf.st_mode))
return; return;
} }
@ -44,8 +53,8 @@ bool sanitize_filename(string & filename)
{ {
string sanitized; string sanitized;
bool format_specifier_found = false; bool format_specifier_found = false;
for(size_t i = 0; i < filename.size(); i++) for(size_t i = 0; i < filename.size(); i++)
{ {
if('%' == filename[i]) if('%' == filename[i])
{ {
@ -54,7 +63,7 @@ bool sanitize_filename(string & filename)
sanitized.push_back('%'); sanitized.push_back('%');
sanitized.push_back('%'); sanitized.push_back('%');
} }
else else
{ {
// We haven't found the format specifier yet, so see if we can use this one as a valid formatter // We haven't found the format specifier yet, so see if we can use this one as a valid formatter
size_t original_i = i; size_t original_i = i;
@ -63,14 +72,14 @@ bool sanitize_filename(string & filename)
while(++i < filename.size()) while(++i < filename.size())
{ {
tmp.push_back(filename[i]); tmp.push_back(filename[i]);
// If we aren't still in option specifiers, stop looking // If we aren't still in option specifiers, stop looking
if(!strchr("0123456789", filename[i])) if(!strchr("0123456789", filename[i]))
{ {
break; break;
} }
} }
// Check to see if we yielded a valid format specifier // Check to see if we yielded a valid format specifier
if('d' == tmp[tmp.size()-1]) if('d' == tmp[tmp.size()-1])
{ {
@ -88,7 +97,7 @@ bool sanitize_filename(string & filename)
} }
} }
} }
else else
{ {
sanitized.push_back(filename[i]); sanitized.push_back(filename[i]);
} }
@ -97,7 +106,7 @@ bool sanitize_filename(string & filename)
// Only sanitize if it is a valid format. // Only sanitize if it is a valid format.
if(format_specifier_found) if(format_specifier_found)
{ {
filename.assign(sanitized); filename.assign(sanitized);
} }
return format_specifier_found; return format_specifier_found;
@ -111,7 +120,7 @@ bool is_truetype_suffix(const string & suffix)
string get_filename (const string & path) string get_filename (const string & path)
{ {
size_t idx = path.rfind('/'); size_t idx = path.rfind('/');
if(idx == string::npos) if(idx == string::npos)
return path; return path;
else if (idx == path.size() - 1) else if (idx == path.size() - 1)
return ""; return "";
@ -134,4 +143,4 @@ string get_suffix(const string & path)
} }
} //namespace pdf2htmlEX } //namespace pdf2htmlEX