mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 04:50:09 +00:00
resolved conflicts
This commit is contained in:
commit
79c31f2195
@ -11,10 +11,9 @@ link_directories(${POPPLER_LIBRARY_DIRS})
|
||||
find_path(FF_INCLUDE_PATH fontforge/fontforge.h)
|
||||
if(FF_INCLUDE_PATH)
|
||||
message("Found fontforge.h: ${FF_INCLUDE_PATH}/fontforge/fontforge.h")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${FF_INCLUDE_PATH}/fontforge/config.h")
|
||||
include_directories(${FF_INCLUDE_PATH}/fontforge)
|
||||
# MacOSX gettext is in /opt/local/include - strange
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
|
||||
find_path(GETTEXT_INCLUDE_PATH libintl.h)
|
||||
if(GETTEXT_INCLUDE_PATH)
|
||||
include_directories(${GETTEXT_INCLUDE_PATH})
|
||||
@ -27,6 +26,11 @@ else()
|
||||
message(FATAL_ERROR "Error: cannot locate fontforge.h")
|
||||
endif()
|
||||
|
||||
find_path(FF_CONFIG_INCLUDE_PATH fontforge/config.h)
|
||||
if(FF_CONFIG_INCLUDE_PATH)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${FF_INCLUDE_PATH}/fontforge/config.h")
|
||||
endif()
|
||||
|
||||
foreach(FF_LIB_NAME
|
||||
${CMAKE_IMPORT_LIBRARY_PREFIX}fontforge${CMAKE_IMPORT_LIBRARY_SUFFIX}
|
||||
${CMAKE_SHARED_LIBRARY_PREFIX}fontforge${CMAKE_SHARED_LIBRARY_SUFFIX}
|
||||
@ -63,11 +67,18 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wunused-function")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++0x")
|
||||
# clang compiler need c++11 flag
|
||||
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
|
||||
endif()
|
||||
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb")
|
||||
|
||||
# CYGWIN bug
|
||||
if(CYGWIN)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=gnu++0x")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++0x")
|
||||
endif()
|
||||
|
||||
configure_file (${CMAKE_SOURCE_DIR}/src/include/pdf2htmlEX-config.h.in ${CMAKE_SOURCE_DIR}/src/include/pdf2htmlEX-config.h)
|
||||
|
||||
add_executable(pdf2htmlEX
|
||||
|
136
pdf2htmlEX.1
Normal file
136
pdf2htmlEX.1
Normal file
@ -0,0 +1,136 @@
|
||||
.TH pdf2htmlEX 1 "Aug 31, 2012" "pdf2htmlEX 0.1"
|
||||
.SH NAME
|
||||
.PP
|
||||
.nf
|
||||
pdf2htmlEX \- converts PDF to HTML without losing text and format.
|
||||
.fi
|
||||
|
||||
.SH USAGE
|
||||
.PP
|
||||
.nf
|
||||
pdf2htmlEX [options] <input\-filename> [<output\-filename>]
|
||||
.fi
|
||||
|
||||
.SH DESCRIPTION
|
||||
.PP
|
||||
pdf2htmlEX is a utility that converts PDF files to HTML files.
|
||||
|
||||
pdf2htmlEX tries its best to render the PDF precisely, maintain proper styling, while retaining text and optmizing for Web.
|
||||
|
||||
Fonts are extracted form PDF and then embedded into HTML (Type 3 fonts are not supported). Text in the converted HTML file is usually selectable and copyable.
|
||||
|
||||
Other objects are rendered as images and also embedded.
|
||||
|
||||
.SH OPTIONS
|
||||
.TP
|
||||
.B --help
|
||||
Show all options
|
||||
.TP
|
||||
.B -v, --version
|
||||
Show copyright and version
|
||||
.TP
|
||||
.B -o, --owner-password <password>
|
||||
Specify owner password
|
||||
.TP
|
||||
.B -u, --user-password <password>
|
||||
Specify user password
|
||||
.TP
|
||||
.B --dest-dir <dir> (Default: ".")
|
||||
Specify destination folder
|
||||
.TP
|
||||
.B -f, --first-page <num> (Default: 1)
|
||||
Specify the first page to process
|
||||
.TP
|
||||
.B -l, --last-page <num> (Default: last page)
|
||||
Specify the last page to process
|
||||
.TP
|
||||
.B --zoom <ratio> (Default: 1.0)
|
||||
Specify the zoom ratio of the HTML file
|
||||
.TP
|
||||
.B --hpdi <dpi>, --vpdi <dpi> (Default: 144)
|
||||
Specify the horizontal and vertical DPI for images
|
||||
.TP
|
||||
.B --process-nontext <0|1> (Default: 1)
|
||||
Whether to process non-text objects (as images)
|
||||
.TP
|
||||
.B --single-html <0|1> (Default: 1)
|
||||
Whether to embed everything into one HTML file.
|
||||
|
||||
If switched out, there will be several files generated along with the HTML file including files for fonts, css, images.
|
||||
.TP
|
||||
.B --embed-base-font <0|1> (Default: 1)
|
||||
Whether to embed base 14 fonts.
|
||||
|
||||
There are several base font defined in PDF standards, which are supposed to be provided by the PDF reader.
|
||||
|
||||
If this switch is on, local matched font will be used and embedded; otherwise only font names are exported such that web browsers may try to find proper fonts themselves.
|
||||
.TP
|
||||
.B --embed-external-font <0|1> (Default: 0)
|
||||
Similar as above but for non-base fonts.
|
||||
.TP
|
||||
.B --decompose-ligature <0|1> (Default: 0)
|
||||
Decompose ligatures. For example 'fi' -> 'f''i'.
|
||||
.TP
|
||||
.B --heps <len>, --veps <len> (Default: 1)
|
||||
Specify the maximum tolerable horizontal/vertical offset (in pixels).
|
||||
|
||||
pdf2htmlEX would try to optimize the generated HTML file moving Text within this distance.
|
||||
.TP
|
||||
.B --space-threshold <ratio> (Default: 1.0/6)
|
||||
pdf2htmlEX would insert a whitespace character ' ' if the distance between two consecutive letters in the same line is wider than ratio * font_size.
|
||||
.TP
|
||||
.B --font-size-multiplier <ratio> (Default: 10)
|
||||
Many web browsers limit the minimum font size, and many would round the given font size, which results in incorrect rendering.
|
||||
|
||||
Specify a ratio greater than 1 would resolve this issue.
|
||||
|
||||
For some versions of Firefox, however, there will be a problem when the font size is too large, in which case a smaller value should be specified here.
|
||||
.TP
|
||||
.B --tounicode <-1|0|1> (Default: 0)
|
||||
A ToUnicode map may be provided for each font in PDF which indicates the 'meaning' of the characters. However often there is better "ToUnicode" info in Type 0/1 fonts, and sometimes the ToUnicode map provided is wrong.
|
||||
|
||||
If this value is set to 1, the ToUnicode Map is always applied, if provided in PDF, and characters may not render correctly in HTML if there are collisions.
|
||||
|
||||
If set to -1, a customized map is used such that rendering will be correct in HTML (visually the same), but you may not get correct characters by select & copy & paste.
|
||||
|
||||
If set to 0, pdf2htmlEX would try it best to balance the two methods above.
|
||||
.TP
|
||||
.B --space-as-offset <0|1> (Default: 0)
|
||||
Treat space characters as offsets, which may increase the size of the output.
|
||||
|
||||
Turn it on if space characters are not displayed correctly, or you want to remove positional spaces.
|
||||
.TP
|
||||
.B --font-suffix <suffix> (Default: ".ttf"), --font-format <format> (Default: "truetype")
|
||||
Specify the suffix and format of fonts extracted from the PDF file. They should be consistent.
|
||||
.TP
|
||||
.B --debug <0|1> (Default: 0)
|
||||
Show debug information.
|
||||
.TP
|
||||
.B --clean-tmp <0|1> (Default: 1)
|
||||
If switched off, intermediate files won't be cleaned in the end.
|
||||
|
||||
.SH EXAMPLE
|
||||
.TP
|
||||
.B pdf2htmlEX /path/to/file.pdf
|
||||
Convert file.pdf into file.html
|
||||
.TP
|
||||
.B pdf2htmlEX --tmp-dir tmp --clean-tmp 0 --debug 1 /path/to/file.pdf
|
||||
Convert file.pdf and leave all intermediate files.
|
||||
.TP
|
||||
.B pdf2htmlEX --dest-dir out --single-html 0 --debug 1 /path/to/file.pdf
|
||||
Convert file.pdf into out/file.html and leave font/image files separated.
|
||||
|
||||
.SH COPYRIGHT
|
||||
.PP
|
||||
Copyright 2012 Lu Wang <coolwanglu@gmail.com>
|
||||
|
||||
pdf2htmlEX is GPLv2 & GPLv3 dual licensed
|
||||
|
||||
.SH AUTHOR
|
||||
.PP
|
||||
pdf2htmlEX is written by Lu Wang <coolwanglu@gmail.com>
|
||||
|
||||
.SH SEE ALSO
|
||||
.TP
|
||||
Home page
|
||||
http://github.com/coolwanglu/pdf2htmlEX
|
@ -64,7 +64,14 @@ void ArgParser::parse(int argc, char ** argv) const
|
||||
if(p->name != "")
|
||||
{
|
||||
int v = (256 + (iter - arg_entries.begin()));
|
||||
longopts.push_back({p->name.c_str(), ((p->need_arg) ? required_argument : no_argument), nullptr, v});
|
||||
longopts.resize(longopts.size() + 1);
|
||||
{
|
||||
auto & cur = longopts.back();
|
||||
cur.name = p->name.c_str();
|
||||
cur.has_arg = ((p->need_arg) ? required_argument : no_argument);
|
||||
cur.flag = nullptr;
|
||||
cur.val = v;
|
||||
}
|
||||
if(!(opt_map.insert(make_pair(v, p)).second))
|
||||
{
|
||||
cerr << "Warning: duplicated shortname '" << v << "' used by --" << (p->name) << " and --" << (opt_map[p->shortname]->name) << endl;
|
||||
@ -73,7 +80,14 @@ void ArgParser::parse(int argc, char ** argv) const
|
||||
}
|
||||
|
||||
optstring.push_back(0);
|
||||
longopts.push_back({0,0,0,0});
|
||||
longopts.resize(longopts.size() + 1);
|
||||
{
|
||||
auto & cur = longopts.back();
|
||||
cur.name = 0;
|
||||
cur.has_arg = 0;
|
||||
cur.flag = 0;
|
||||
cur.val = 0;
|
||||
}
|
||||
|
||||
{
|
||||
opterr = 1;
|
||||
|
@ -15,6 +15,7 @@
|
||||
using std::min;
|
||||
using std::max;
|
||||
using std::vector;
|
||||
using std::ostream;
|
||||
|
||||
void HTMLRenderer::LineBuffer::reset(GfxState * state)
|
||||
{
|
||||
|
1
src/ff.c
1
src/ff.c
@ -13,7 +13,6 @@
|
||||
#include <stdarg.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include <fontforge/config.h>
|
||||
#include <fontforge.h>
|
||||
#include <baseviews.h>
|
||||
|
||||
|
@ -14,6 +14,10 @@
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
|
||||
#ifndef nullptr
|
||||
#define nullptr (NULL)
|
||||
#endif
|
||||
|
||||
class ArgParser
|
||||
{
|
||||
public:
|
||||
|
@ -1,19 +0,0 @@
|
||||
/*
|
||||
* config.h
|
||||
* Compile time constants
|
||||
*
|
||||
* by WangLu
|
||||
*/
|
||||
|
||||
|
||||
#ifndef PDF2HTMLEX_CONFIG_H__
|
||||
#define PDF2HTMLEX_CONFIG_H__
|
||||
|
||||
#include <string>
|
||||
|
||||
static const std::string PDF2HTMLEX_VERSION = "0.3";
|
||||
static const std::string PDF2HTMLEX_PREFIX = "/usr/local";
|
||||
static const std::string PDF2HTMLEX_DATA_PATH = "/usr/local""/share/pdf2htmlEX";
|
||||
|
||||
|
||||
#endif //PDF2HTMLEX_CONFIG_H__
|
@ -10,6 +10,7 @@
|
||||
#ifndef UTIL_H__
|
||||
#define UTIL_H__
|
||||
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
@ -19,11 +20,6 @@
|
||||
|
||||
#include <UTF8.h>
|
||||
|
||||
using std::istream;
|
||||
using std::ostream;
|
||||
using std::max;
|
||||
using std::abs;
|
||||
|
||||
#ifndef nullptr
|
||||
#define nullptr (NULL)
|
||||
#endif
|
||||
@ -133,10 +129,10 @@ class base64stream
|
||||
{
|
||||
public:
|
||||
|
||||
base64stream(istream & in) : in(&in) { }
|
||||
base64stream(istream && in) : in(&in) { }
|
||||
base64stream(std::istream & in) : in(&in) { }
|
||||
base64stream(std::istream && in) : in(&in) { }
|
||||
|
||||
ostream & dumpto(ostream & out)
|
||||
std::ostream & dumpto(std::ostream & out)
|
||||
{
|
||||
unsigned char buf[3];
|
||||
while(in->read((char*)buf, 3))
|
||||
@ -170,12 +166,12 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
istream * in;
|
||||
std::istream * in;
|
||||
static const char * base64_encoding;
|
||||
};
|
||||
|
||||
static inline ostream & operator << (ostream & out, base64stream & bf) { return bf.dumpto(out); }
|
||||
static inline ostream & operator << (ostream & out, base64stream && bf) { return bf.dumpto(out); }
|
||||
static inline std::ostream & operator << (std::ostream & out, base64stream & bf) { return bf.dumpto(out); }
|
||||
static inline std::ostream & operator << (std::ostream & out, base64stream && bf) { return bf.dumpto(out); }
|
||||
|
||||
class string_formatter
|
||||
{
|
||||
@ -204,7 +200,7 @@ public:
|
||||
va_end(vlist);
|
||||
if(l >= (int)buf.capacity())
|
||||
{
|
||||
buf.reserve(max((long)(l+1), (long)buf.capacity() * 2));
|
||||
buf.reserve(std::max((long)(l+1), (long)buf.capacity() * 2));
|
||||
va_start(vlist, format);
|
||||
l = vsnprintf(&buf.front(), buf.capacity(), format, vlist);
|
||||
va_end(vlist);
|
||||
|
@ -6,6 +6,8 @@
|
||||
* 2012.08.10
|
||||
*/
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
#include <GfxState.h>
|
||||
#include <GfxFont.h>
|
||||
#include <CharTypes.h>
|
||||
@ -22,7 +24,7 @@ using std::cerr;
|
||||
using std::endl;
|
||||
using std::string;
|
||||
using std::map;
|
||||
|
||||
using std::ostream;
|
||||
|
||||
const double id_matrix[6] = {1.0, 0.0, 0.0, 1.0, 0.0, 0.0};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user