1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 13:00:08 +00:00

Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Ryan Morlok 2013-03-16 13:30:42 -05:00
commit af8e9c10ae
21 changed files with 189 additions and 91 deletions

View File

@ -7,7 +7,7 @@ cmake_minimum_required(VERSION 2.6.0 FATAL_ERROR)
include_directories(${CMAKE_SOURCE_DIR}/src) include_directories(${CMAKE_SOURCE_DIR}/src)
set(PDF2HTMLEX_VERSION "0.7") set(PDF2HTMLEX_VERSION "0.8")
set(ARCHIVE_NAME pdf2htmlex-${PDF2HTMLEX_VERSION}) set(ARCHIVE_NAME pdf2htmlex-${PDF2HTMLEX_VERSION})
add_custom_target(dist add_custom_target(dist
COMMAND git archive --prefix=${ARCHIVE_NAME}/ HEAD COMMAND git archive --prefix=${ARCHIVE_NAME}/ HEAD
@ -43,7 +43,7 @@ if(FONTFORGE_FOUND)
link_directories(${FONTFORGE_LIBRARY_DIRS}) link_directories(${FONTFORGE_LIBRARY_DIRS})
set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${FONTFORGE_LIBRARIES}) set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${FONTFORGE_LIBRARIES})
else() else()
message("Trying to locate fontforge...") message("Trying to locate old versions of fontforge...")
find_path(FF_INCLUDE_PATH fontforge/fontforge.h) find_path(FF_INCLUDE_PATH fontforge/fontforge.h)
if(FF_INCLUDE_PATH) if(FF_INCLUDE_PATH)
message("Found fontforge.h: ${FF_INCLUDE_PATH}/fontforge/fontforge.h") message("Found fontforge.h: ${FF_INCLUDE_PATH}/fontforge/fontforge.h")
@ -61,6 +61,14 @@ else()
else() else()
message(FATAL_ERROR "Error: cannot locate fontforge.h") message(FATAL_ERROR "Error: cannot locate fontforge.h")
endif() endif()
find_path(FF_CONFIG_INCLUDE_PATH config.h PATHS
${FONTFORGE_INCLUDE_DIRS} NO_DEFAULT_PATH)
if(FF_CONFIG_INCLUDE_PATH)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${FF_CONFIG_INCLUDE_PATH}/config.h")
message("Found config.h: ${FF_CONFIG_INCLUDE_PATH}/config.h")
else()
message("Cannot locate config.h for fontforge")
endif()
macro(wl_find_library LIB_NAME RESULT) macro(wl_find_library LIB_NAME RESULT)
unset(${RESULT}) unset(${RESULT})
@ -98,14 +106,6 @@ else()
set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${PYTHON_LIBRARIES}) set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${PYTHON_LIBRARIES})
endif() endif()
find_path(FF_CONFIG_INCLUDE_PATH config.h PATHS
${FONTFORGE_INCLUDE_DIRS} NO_DEFAULT_PATH)
if(FF_CONFIG_INCLUDE_PATH)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${FF_CONFIG_INCLUDE_PATH}/config.h")
message("Found config.h: ${FF_CONFIG_INCLUDE_PATH}/config.h")
else()
message("Cannot locate config.h for fontforge")
endif()
# debug build flags (overwrite default cmake debug flags) # debug build flags (overwrite default cmake debug flags)
set(CMAKE_C_FLAGS_DEBUG "-ggdb") set(CMAKE_C_FLAGS_DEBUG "-ggdb")

View File

@ -1,8 +1,13 @@
Latest v0.7 Latest v0.8
v0.7
2013.03.01
* Process outline * Process outline
* Fix build with poppler * Fix build with poppler
* Many code cleaning jobs [John Hewson] * Many code cleaning jobs [John Hewson]
* Experimental printing support
* Lots of code refinements
v0.6 v0.6
2013.01.26 2013.01.26

View File

@ -9,8 +9,8 @@ A beautiful demo is worth a thousand words:
- **Scientific Paper**: [Default](http://coolwanglu.github.com/pdf2htmlEX/demo/demo.html) / [MediaFire](http://www.mediafire.com/view/?6po429kz9czcga2) / [Original](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.148.349&rep=rep1&type=pdf) - **Scientific Paper**: [Default](http://coolwanglu.github.com/pdf2htmlEX/demo/demo.html) / [MediaFire](http://www.mediafire.com/view/?6po429kz9czcga2) / [Original](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.148.349&rep=rep1&type=pdf)
- **Full Circle Magazine**: [Default](http://coolwanglu.github.com/pdf2htmlEX/demo/issue65_en.html) / [MediaFire](http://www.mediafire.com/view/?6hxmt94k2vppnpb) / [Original](http://dl.fullcirclemagazine.org/issue65_en.pdf) <sub>The 1st link might be slow</sub> - **Full Circle Magazine**: [Default](http://coolwanglu.github.com/pdf2htmlEX/demo/issue65_en.html) / [MediaFire](http://www.mediafire.com/view/?6hxmt94k2vppnpb) / [Original](http://dl.fullcirclemagazine.org/issue65_en.pdf) <sub>The 1st link might be slow</sub>
- **Chinese**: [Default](http://coolwanglu.github.com/pdf2htmlEX/demo/chn.html) / [MediaFire](http://www.mediafire.com/view/?6550ldag9w0uuq3) / [Original](http://files.cnblogs.com/phphuaibei/git%E6%90%AD%E5%BB%BA.pdf) - **Chinese**: [Default](http://coolwanglu.github.com/pdf2htmlEX/demo/chn.html) / [MediaFire](http://www.mediafire.com/view/?6550ldag9w0uuq3) / [Original](http://files.cnblogs.com/phphuaibei/git%E6%90%AD%E5%BB%BA.pdf)
- Try your own files on [MediaFire](http://www.mediafire.com), which uses pdf2htmlEX for its PDF preview feature. - [Try your own files](https://github.com/coolwanglu/pdf2htmlEX/wiki/UploadDemo)
## Introduction ## Introduction
pdf2htmlEX renders PDF files in HTML, utilizing modern Web technologies. pdf2htmlEX renders PDF files in HTML, utilizing modern Web technologies.
@ -29,6 +29,7 @@ The generated HTML file is static, Javascript is not required.
- Correct font & position & styles - Correct font & position & styles
- Proper reencoding - Proper reencoding
- Generated HTML file is of similar size as the original (uncompressed) PDF file - Generated HTML file is of similar size as the original (uncompressed) PDF file
- Fallback (image + hidden text) - better accuracy and compatibility
* Output modes * Output modes
- Normal HTML - Normal HTML
- All-in-one HTML - portable & easy to share - All-in-one HTML - portable & easy to share
@ -36,6 +37,7 @@ The generated HTML file is static, Javascript is not required.
* More PDF stuffs that you love * More PDF stuffs that you love
- Links - Links
- Outline - Outline
- Printing (experimental)
[Full list](https://github.com/coolwanglu/pdf2htmlEX/wiki/Feature-List) [Full list](https://github.com/coolwanglu/pdf2htmlEX/wiki/Feature-List)
[Compare with others](https://github.com/coolwanglu/pdf2htmlEX/wiki/Comparison) [Compare with others](https://github.com/coolwanglu/pdf2htmlEX/wiki/Comparison)
@ -83,15 +85,15 @@ Thanks to all packagers!
## Usage ## Usage
pdf2htmlEX /path/to/foobar.pdf pdf2htmlEX /path/to/foobar.pdf
pdf2htmlEX --help
man pdf2htmlEX [Quick Start](https://github.com/coolwanglu/pdf2htmlEX/wiki/QuickStart)
## FAQ ## FAQ
* [Troubleshooting compilation errors](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ#wiki-compile) * [Troubleshooting compilation errors](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ#wiki-compile)
* [How can I help](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ#wiki-help) * [How can I help](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ#wiki-help)
* [I want more features](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ#wiki-feature_commission) * [I want more features](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ#wiki-feature_commission)
* [More about pdf2htmlEX](https://github.com/coolwanglu/pdf2htmlEX/wiki/) * [More...](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ)
## LICENSE ## LICENSE
@ -114,7 +116,7 @@ pdf2htmlEX is maintained by one person in spare time, and it needs your help!
* Lu Wang <coolwanglu@gmail.com> * Lu Wang <coolwanglu@gmail.com>
* For personal enquiries only * For personal enquiries only
* Accepting messages in **Chinese**, **English** or **Japanese**. * Accepting messages in **中文**, **English** or **日本語**.
## Acknowledge ## Acknowledge

11
TODO
View File

@ -1,19 +1,15 @@
clean css class names
print css for draw/link/image...
== Future: == == Future: ==
Too difficult/complicated to implement: Too difficult/complicated to implement:
- integrate splash/cairo - integrate splash/cairo
- naive support for image/drawing (SVG?) - naive image/drawing (SVG?)
- type 3 fonts (convert to SVG fonts?) - type 3 fonts (convert to SVG fonts?)
- reflowable text/combine lines/unwrapping - reflowable text/combine lines/unwrapping
- Printing - multi-thread
Not enough motivated/Lazy Not enough motivated/Lazy
- argument auto-completion - argument auto-completion
- use absolute positioning for long whitespace - use absolute positioning for long whitespace
- color invert
- detect duplicate base fonts when embedding - detect duplicate base fonts when embedding
- disable selection if we know unicode is wrong - disable selection if we know unicode is wrong
- check if we can add information to the font, and let browsers show ligatures automatically - check if we can add information to the font, and let browsers show ligatures automatically
@ -23,6 +19,7 @@ Not enough motivated/Lazy
- merge sub/sup into one line - merge sub/sup into one line
- precise link dest: zoom - precise link dest: zoom
- multiple charcode mapped to a same glyph - multiple charcode mapped to a same glyph
- don't dump image when there is nothing - don't dump image when it is empty
- minimum line width of css drawing - minimum line width of css drawing
- ajax in pdf2htmlEX for separated pages - ajax in pdf2htmlEX for separated pages
- separate classes for annotations (such that we don't have to hide all css drawings for printing)

13
debian/changelog vendored
View File

@ -1,3 +1,16 @@
pdf2htmlex (0.8-1~git201303011406r3bc73-0ubuntu1) quantal; urgency=low
* Experimental printing support
* New version
-- WANG Lu <coolwanglu@gmail.com> Fri, 01 Mar 2013 14:06:42 +0800
pdf2htmlex (0.7-1~git201302282259r3bc73-0ubuntu1) quantal; urgency=low
* suggests ttfautohint
-- WANG Lu <coolwanglu@gmail.com> Thu, 28 Feb 2013 22:59:45 +0800
pdf2htmlex (0.7-1~git201302271054r3bc73-0ubuntu1) precise; urgency=low pdf2htmlex (0.7-1~git201302271054r3bc73-0ubuntu1) precise; urgency=low
* Packaging for 12.04 * Packaging for 12.04

1
debian/control vendored
View File

@ -9,5 +9,6 @@ Homepage: http://github.com/coolwanglu/pdf2htmlEX
Package: pdf2htmlex Package: pdf2htmlex
Architecture: any Architecture: any
Depends: ${shlibs:Depends}, ${misc:Depends}, libpoppler27 (>= 0.20.3) | libpoppler28, libpng12-0, libfontforge1 Depends: ${shlibs:Depends}, ${misc:Depends}, libpoppler27 (>= 0.20.3) | libpoppler28, libpng12-0, libfontforge1
Suggests: ttfautohint
Description: Converts PDF to HTML without losing format Description: Converts PDF to HTML without losing format
pdf2htmlEX converts PDF to HTML while retaining text, format & style as much as possible pdf2htmlEX converts PDF to HTML while retaining text, format & style as much as possible

View File

@ -87,6 +87,10 @@ Specify the filename of the generated outline file, if not embedded.
If it's empty, the file name will be determined automatically. If it's empty, the file name will be determined automatically.
.TP
.B --fallback <0|1> (Deafult: 0)
Output in fallback mode, for better accuracy and browser compatibility, but the size becomes larger.
.TP .TP
.B --process-nontext <0|1> (Default: 1) .B --process-nontext <0|1> (Default: 1)
Whether to process non-text objects (as images) Whether to process non-text objects (as images)

View File

@ -72,6 +72,9 @@
overflow:visible; overflow:visible;
background-color:transparent; background-color:transparent;
} }
.@CSS_CSS_DRAW_CN@ {
display:none;
}
} }
/* Part 2: Page Elements: Modify with caution /* Part 2: Page Elements: Modify with caution
* The followings are base classes, which are meant to be override by PDF specific classes * The followings are base classes, which are meant to be override by PDF specific classes
@ -113,6 +116,17 @@
.@CSS_PAGE_CONTENT_BOX_CN@.opened { /* used by pdf2htmlEX.js, to show/hide pages */ .@CSS_PAGE_CONTENT_BOX_CN@.opened { /* used by pdf2htmlEX.js, to show/hide pages */
display:block; display:block;
} }
.@CSS_BACKGROUND_IMAGE_CN@ {
position:absolute;
left:0;
top:0;
width:100%;
height:100%;
-ms-user-select:none;
-moz-user-select:none;
-webkit-user-select:none;
user-select:none;
}
@media print { @media print {
.@CSS_PAGE_DECORATION_CN@ { .@CSS_PAGE_DECORATION_CN@ {
margin:0; margin:0;
@ -150,11 +164,12 @@ span { /* text blocks within a line */
color:transparent; color:transparent;
z-index:-1; z-index:-1;
} }
/* selection background should not be opaque, for fallback mode */
::selection{ ::selection{
background: rgba(127,255,255,1); background: rgba(127,255,255,0.4);
} }
::-moz-selection{ ::-moz-selection{
background: rgba(127,255,255,1); background: rgba(127,255,255,0.4);
} }
.@CSS_PAGE_DATA_CN@ { /* info for Javascript */ .@CSS_PAGE_DATA_CN@ { /* info for Javascript */
display:none; display:none;

View File

@ -15,6 +15,7 @@ var pdf2htmlEX = (function(){
page_decoration : '@CSS_PAGE_DECORATION_CN@', page_decoration : '@CSS_PAGE_DECORATION_CN@',
page_content_box : '@CSS_PAGE_CONTENT_BOX_CN@', page_content_box : '@CSS_PAGE_CONTENT_BOX_CN@',
page_data : '@CSS_PAGE_DATA_CN@', page_data : '@CSS_PAGE_DATA_CN@',
background_image : '@CSS_BACKGROUND_IMAGE_CN@',
link : '@CSS_LINK_CN@', link : '@CSS_LINK_CN@',
__dummy__ : 'no comma' __dummy__ : 'no comma'
}; };
@ -126,11 +127,12 @@ var pdf2htmlEX = (function(){
this.outline = $('#'+this.outline_id); this.outline = $('#'+this.outline_id);
this.container = $('#'+this.container_id); this.container = $('#'+this.container_id);
// need a better design // Open the outline if nonempty
if(this.outline.children().length > 0) { if(this.outline.children().length > 0) {
this.outline.addClass('opened'); this.outline.addClass('opened');
} }
// collect pages
var new_pages = new Array(); var new_pages = new Array();
var pl= $('.'+CSS_CLASS_NAMES['page_frame'], this.container); var pl= $('.'+CSS_CLASS_NAMES['page_frame'], this.container);
/* don't use for(..in..) */ /* don't use for(..in..) */
@ -140,14 +142,18 @@ var pdf2htmlEX = (function(){
} }
this.pages = new_pages; this.pages = new_pages;
// register schedule rendering
var _ = this; var _ = this;
this.container.scroll(function(){ _.schedule_render(); }); this.container.scroll(function(){ _.schedule_render(); });
//this.zoom_fixer(); //this.zoom_fixer();
// used by outline/annot_link etc // handle links
this.container.add(this.outline).on('click', '.'+CSS_CLASS_NAMES['link'], this, this.link_handler); this.container.add(this.outline).on('click', '.'+CSS_CLASS_NAMES['link'], this, this.link_handler);
// disable background image draging
$('.'+CSS_CLASS_NAMES['background_image'], this.container).on('dragstart', function(e){return false;});
this.render(); this.render();
}, },
pre_hide_pages : function() { pre_hide_pages : function() {

View File

@ -20,17 +20,19 @@ void SplashBackgroundRenderer::drawChar(GfxState *state, double x, double y,
CharCode code, int nBytes, Unicode *u, int uLen) CharCode code, int nBytes, Unicode *u, int uLen)
{ {
// draw characters as image when // draw characters as image when
// - there is special filling method // - in fallback mode
// - OR there is special filling method
// - OR using a writing mode font // - OR using a writing mode font
// - OR using a Type 3 font // - OR using a Type 3 font
if(( (state->getFont()) if((param->fallback)
&& ( (state->getFont()->getWMode()) || ( (state->getFont())
|| (state->getFont()->getType() == fontType3) && ( (state->getFont()->getWMode())
) || (state->getFont()->getType() == fontType3)
) )
)
) )
{ {
SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen); SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code,nBytes,u,uLen);
} }
} }
@ -42,7 +44,7 @@ void SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno, const strin
{ {
doc->displayPage(this, pageno, param->h_dpi, param->v_dpi, doc->displayPage(this, pageno, param->h_dpi, param->v_dpi,
0, 0,
(param->use_cropbox == 0), (!(param->use_cropbox)),
false, false, false, false,
nullptr, nullptr, &annot_cb, nullptr); nullptr, nullptr, &annot_cb, nullptr);

View File

@ -241,6 +241,9 @@ protected:
double text_scale_factor1; double text_scale_factor1;
double text_scale_factor2; double text_scale_factor2;
// 1px on screen should be printed as print_scale()pt
double print_scale (void) const { return 96.0 / DEFAULT_DPI / text_zoom_factor(); }
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
// states // states
@ -294,6 +297,7 @@ protected:
RiseManager rise_manager; RiseManager rise_manager;
LeftManager left_manager; LeftManager left_manager;
//////////////////////////////////////////////// ////////////////////////////////////////////////
BGImageSizeManager bgimage_size_manager;
// optimize for web // optimize for web
// we try to render the final font size directly // we try to render the final font size directly

View File

@ -275,7 +275,7 @@ int HTMLRenderer::TextLineBuffer::State::diff(const State & s) const
// the order should be the same as in the enum // the order should be the same as in the enum
const char * const HTMLRenderer::TextLineBuffer::State::css_class_names [] = { const char * const HTMLRenderer::TextLineBuffer::State::css_class_names [] = {
CSS::FONT_NAME_CN, CSS::FONT_FAMILY_CN,
CSS::FONT_SIZE_CN, CSS::FONT_SIZE_CN,
CSS::FILL_COLOR_CN, CSS::FILL_COLOR_CN,
CSS::STROKE_COLOR_CN, CSS::STROKE_COLOR_CN,

View File

@ -727,7 +727,7 @@ void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suff
} }
f_css.fs << "@font-face{" f_css.fs << "@font-face{"
<< "font-family:" << CSS::FONT_NAME_CN << info.id << ";" << "font-family:" << CSS::FONT_FAMILY_CN << info.id << ";"
<< "src:url("; << "src:url(";
{ {
@ -749,8 +749,8 @@ void HTMLRenderer::export_remote_font(const FontInfo & info, const string & suff
f_css.fs << ")" f_css.fs << ")"
<< "format(\"" << format << "\");" << "format(\"" << format << "\");"
<< "}" // end of @font-face << "}" // end of @font-face
<< "." << CSS::FONT_NAME_CN << info.id << "{" << "." << CSS::FONT_FAMILY_CN << info.id << "{"
<< "font-family:" << CSS::FONT_NAME_CN << info.id << ";" << "font-family:" << CSS::FONT_FAMILY_CN << info.id << ";"
<< "line-height:" << round(info.ascent - info.descent) << ";" << "line-height:" << round(info.ascent - info.descent) << ";"
<< "font-style:normal;" << "font-style:normal;"
<< "font-weight:normal;" << "font-weight:normal;"
@ -772,12 +772,12 @@ static string general_font_family(GfxFont * font)
// TODO: this function is called when some font is unable to process, may use the name there as a hint // TODO: this function is called when some font is unable to process, may use the name there as a hint
void HTMLRenderer::export_remote_default_font(long long fn_id) void HTMLRenderer::export_remote_default_font(long long fn_id)
{ {
f_css.fs << "." << CSS::FONT_NAME_CN << fn_id << "{font-family:sans-serif;visibility:hidden;}" << endl; f_css.fs << "." << CSS::FONT_FAMILY_CN << fn_id << "{font-family:sans-serif;visibility:hidden;}" << endl;
} }
void HTMLRenderer::export_local_font(const FontInfo & info, GfxFont * font, const string & original_font_name, const string & cssfont) void HTMLRenderer::export_local_font(const FontInfo & info, GfxFont * font, const string & original_font_name, const string & cssfont)
{ {
f_css.fs << "." << CSS::FONT_NAME_CN << info.id << "{"; f_css.fs << "." << CSS::FONT_FAMILY_CN << info.id << "{";
f_css.fs << "font-family:" << ((cssfont == "") ? (original_font_name + "," + general_font_family(font)) : cssfont) << ";"; f_css.fs << "font-family:" << ((cssfont == "") ? (original_font_name + "," + general_font_family(font)) : cssfont) << ";";
string fn = original_font_name; string fn = original_font_name;

View File

@ -111,7 +111,7 @@ void HTMLRenderer::process(PDFDoc *doc)
if(param->process_nontext) if(param->process_nontext)
{ {
auto fn = str_fmt("%s/p%x.png", (param->single_html ? param->tmp_dir : param->dest_dir).c_str(), i); auto fn = str_fmt("%s/bg%x.png", (param->single_html ? param->tmp_dir : param->dest_dir).c_str(), i);
if(param->single_html) if(param->single_html)
tmp_files.add((char*)fn); tmp_files.add((char*)fn);
@ -121,7 +121,7 @@ void HTMLRenderer::process(PDFDoc *doc)
doc->displayPage(this, i, doc->displayPage(this, i,
text_zoom_factor() * DEFAULT_DPI, text_zoom_factor() * DEFAULT_DPI, text_zoom_factor() * DEFAULT_DPI, text_zoom_factor() * DEFAULT_DPI,
0, 0,
(param->use_cropbox == 0), (!(param->use_cropbox)),
false, false, false, false,
nullptr, nullptr, nullptr, nullptr); nullptr, nullptr, nullptr, nullptr);
@ -173,35 +173,29 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
<< "<div id=\"" << CSS::PAGE_FRAME_CN << pageNum << "<div id=\"" << CSS::PAGE_FRAME_CN << pageNum
<< "\" class=\"" << CSS::PAGE_FRAME_CN << "\" class=\"" << CSS::PAGE_FRAME_CN
<< "\" data-page-no=\"" << pageNum << "\">" << "\" data-page-no=\"" << pageNum << "\">"
<< "<div class=\"" << CSS::PAGE_CONTENT_BOX_CN << "\" style=\""; << "<div class=\"" << CSS::PAGE_CONTENT_BOX_CN
<< " " << CSS::PAGE_CONTENT_BOX_CN << pageNum
<< "\">";
if(param->process_nontext) if(param->process_nontext)
{ {
f_pages.fs << "background-image:url("; f_pages.fs << "<img class=\"" << CSS::BACKGROUND_IMAGE_CN
<< "\" alt=\"\" src=\"";
if(param->single_html)
{ {
if(param->single_html) auto path = str_fmt("%s/bg%x.png", param->tmp_dir.c_str(), pageNum);
{ ifstream fin((char*)path, ifstream::binary);
auto path = str_fmt("%s/p%x.png", param->tmp_dir.c_str(), pageNum); if(!fin)
ifstream fin((char*)path, ifstream::binary); throw string("Cannot read background image ") + (char*)path;
if(!fin) f_pages.fs << "data:image/png;base64," << base64stream(fin);
throw string("Cannot read background image ") + (char*)path;
f_pages.fs << "'data:image/png;base64," << base64stream(fin) << "'";
}
else
{
f_pages.fs << str_fmt("p%x.png", pageNum);
}
} }
else
// TODO print css {
f_pages.fs << ");background-position:0 0;background-size:" f_pages.fs << str_fmt("bg%x.png", pageNum);
<< state->getPageWidth() << "px " }
<< state->getPageHeight() << "px;background-repeat:no-repeat;"; f_pages.fs << "\"/>";
} }
f_pages.fs << "\">";
reset_state(); reset_state();
} }
@ -462,22 +456,24 @@ void HTMLRenderer::dump_css (void)
width_manager .dump_css(f_css.fs); width_manager .dump_css(f_css.fs);
rise_manager .dump_css(f_css.fs); rise_manager .dump_css(f_css.fs);
left_manager .dump_css(f_css.fs); left_manager .dump_css(f_css.fs);
bgimage_size_manager.dump_css(f_css.fs);
// print css // print css
double print_scale = 96.0 / DEFAULT_DPI / text_zoom_factor(); double ps = print_scale();
f_css.fs << CSS::PRINT_ONLY << "{" << endl; f_css.fs << CSS::PRINT_ONLY << "{" << endl;
transform_matrix_manager.dump_print_css(f_css.fs, print_scale); transform_matrix_manager.dump_print_css(f_css.fs, ps);
letter_space_manager .dump_print_css(f_css.fs, print_scale); letter_space_manager .dump_print_css(f_css.fs, ps);
stroke_color_manager .dump_print_css(f_css.fs, print_scale); stroke_color_manager .dump_print_css(f_css.fs, ps);
word_space_manager .dump_print_css(f_css.fs, print_scale); word_space_manager .dump_print_css(f_css.fs, ps);
whitespace_manager .dump_print_css(f_css.fs, print_scale); whitespace_manager .dump_print_css(f_css.fs, ps);
fill_color_manager .dump_print_css(f_css.fs, print_scale); fill_color_manager .dump_print_css(f_css.fs, ps);
font_size_manager .dump_print_css(f_css.fs, print_scale); font_size_manager .dump_print_css(f_css.fs, ps);
bottom_manager .dump_print_css(f_css.fs, print_scale); bottom_manager .dump_print_css(f_css.fs, ps);
height_manager .dump_print_css(f_css.fs, print_scale); height_manager .dump_print_css(f_css.fs, ps);
width_manager .dump_print_css(f_css.fs, print_scale); width_manager .dump_print_css(f_css.fs, ps);
rise_manager .dump_print_css(f_css.fs, print_scale); rise_manager .dump_print_css(f_css.fs, ps);
left_manager .dump_print_css(f_css.fs, print_scale); left_manager .dump_print_css(f_css.fs, ps);
bgimage_size_manager.dump_print_css(f_css.fs, ps);
f_css.fs << "}" << endl; f_css.fs << "}" << endl;
} }

View File

@ -346,7 +346,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
} }
// fill color // fill color
if(all_changed || fill_color_changed) if((!(param->fallback)) && (all_changed || fill_color_changed))
{ {
// * PDF Spec. Table 106 Text rendering modes // * PDF Spec. Table 106 Text rendering modes
static const char FILL[8] = { true, false, true, false, true, false, true, false }; static const char FILL[8] = { true, false, true, false, true, false, true, false };
@ -369,7 +369,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
} }
// stroke color // stroke color
if(all_changed || stroke_color_changed) if((!(param->fallback)) && (all_changed || stroke_color_changed))
{ {
// * PDF Spec. Table 106 Text rendering modes // * PDF Spec. Table 106 Text rendering modes
static const char STROKE[8] = { false, true, true, false, false, true, true, false }; static const char STROKE[8] = { false, true, true, false, false, true, true, false };

View File

@ -32,6 +32,7 @@ struct Param
std::string outline_filename; std::string outline_filename;
int process_nontext; int process_nontext;
int process_outline; int process_outline;
int fallback;
// fonts // fonts
int embed_base_font; int embed_base_font;

View File

@ -2,19 +2,21 @@
# Note # Note
# don't use: (otherwise conflicted with others when there is an ID suffix) # don't use: (otherwise conflicted with others when there is an ID suffix)
# p f # p f s
set(CSS_INVALID_ID "_") set(CSS_INVALID_ID "_")
set(CSS_LINE_CN "tl") # text line set(CSS_LINE_CN "t") # text
set(CSS_TRANSFORM_MATRIX_CN "tm") # transform matrix set(CSS_TRANSFORM_MATRIX_CN "m") # matrix
set(CSS_PAGE_DECORATION_CN "pd") # page decoration set(CSS_PAGE_DECORATION_CN "pd") # page decoration
set(CSS_PAGE_FRAME_CN "pf") # page frame set(CSS_PAGE_FRAME_CN "pf") # page frame
set(CSS_PAGE_CONTENT_BOX_CN "pc") # page content set(CSS_PAGE_CONTENT_BOX_CN "pc") # page content
set(CSS_PAGE_DATA_CN "pi") # page info set(CSS_PAGE_DATA_CN "pi") # page info
set(CSS_FONT_NAME_CN "fn") # font name set(CSS_BACKGROUND_IMAGE_CN "bi") # background image
set(CSS_FONT_FAMILY_CN "ff") # font family
set(CSS_FONT_SIZE_CN "fs") # font size set(CSS_FONT_SIZE_CN "fs") # font size
set(CSS_FILL_COLOR_CN "fc") # fill color set(CSS_FILL_COLOR_CN "fc") # fill color

View File

@ -64,7 +64,7 @@ void parse_options (int argc, char **argv)
.add("zoom", &param.zoom, 0, "zoom ratio", nullptr, true) .add("zoom", &param.zoom, 0, "zoom ratio", nullptr, true)
.add("fit-width", &param.fit_width, 0, "fit width to <fp> pixels", nullptr, true) .add("fit-width", &param.fit_width, 0, "fit width to <fp> pixels", nullptr, true)
.add("fit-height", &param.fit_height, 0, "fit height to <fp> pixels", nullptr, true) .add("fit-height", &param.fit_height, 0, "fit height to <fp> pixels", nullptr, true)
.add("use-cropbox", &param.use_cropbox, 0, "use CropBox instead of MediaBox") .add("use-cropbox", &param.use_cropbox, 1, "use CropBox instead of MediaBox")
.add("hdpi", &param.h_dpi, 144.0, "horizontal resolution for graphics in DPI") .add("hdpi", &param.h_dpi, 144.0, "horizontal resolution for graphics in DPI")
.add("vdpi", &param.v_dpi, 144.0, "vertical resolution for graphics in DPI") .add("vdpi", &param.v_dpi, 144.0, "vertical resolution for graphics in DPI")
@ -76,6 +76,7 @@ void parse_options (int argc, char **argv)
.add("outline-filename", &param.outline_filename, "", "filename of the generated outline file") .add("outline-filename", &param.outline_filename, "", "filename of the generated outline file")
.add("process-nontext", &param.process_nontext, 1, "render graphics in addition to text") .add("process-nontext", &param.process_nontext, 1, "render graphics in addition to text")
.add("process-outline", &param.process_outline, 1, "show outline in HTML") .add("process-outline", &param.process_outline, 1, "show outline in HTML")
.add("fallback", &param.fallback, 0, "output in fallback mode")
// fonts // fonts
.add("embed-base-font", &param.embed_base_font, 0, "embed local match for standard 14 fonts") .add("embed-base-font", &param.embed_base_font, 0, "embed local match for standard 14 fonts")
@ -104,7 +105,8 @@ void parse_options (int argc, char **argv)
// misc. // misc.
.add("clean-tmp", &param.clean_tmp, 1, "remove temporary files after conversion") .add("clean-tmp", &param.clean_tmp, 1, "remove temporary files after conversion")
.add("data-dir", &param.data_dir, PDF2HTMLEX_DATA_PATH, "specify data directory") .add("data-dir", &param.data_dir, PDF2HTMLEX_DATA_PATH, "specify data directory")
.add("css-draw", &param.css_draw, 0, "[experimental and unsupported] CSS drawing") // TODO: css drawings are hidden on print, for annot links, need to fix it for other drawings
// .add("css-draw", &param.css_draw, 0, "[experimental and unsupported] CSS drawing")
.add("debug", &param.debug, 0, "print debugging information") .add("debug", &param.debug, 0, "print debugging information")
// meta // meta

View File

@ -453,6 +453,43 @@ public:
} }
}; };
/////////////////////////////////////
/*
* Manage the background image sizes
* Kind of similar with StateManager, but not exactly the same
* anyway temporarly leave it here
*/
class BGImageSizeManager
{
public:
void install(int page_no, double width, double height){
value_map.insert(std::make_pair(page_no, std::make_pair(width, height)));
}
void dump_css(std::ostream & out) {
for(auto iter = value_map.begin(); iter != value_map.end(); ++iter)
{
const auto & s = iter->second;
out << "." << CSS::PAGE_CONTENT_BOX_CN << iter->first << "{";
out << "background-size:" << round(s.first) << "px " << round(s.second) << "px;";
out << "}" << std::endl;
}
}
void dump_print_css(std::ostream & out, double scale) {
for(auto iter = value_map.begin(); iter != value_map.end(); ++iter)
{
const auto & s = iter->second;
out << "." << CSS::PAGE_CONTENT_BOX_CN << iter->first << "{";
out << "background-size:" << round(s.first * scale) << "pt " << round(s.second * scale) << "pt;";
out << "}" << std::endl;
}
}
private:
std::unordered_map<int, std::pair<double,double>> value_map;
};
} // namespace pdf2htmlEX } // namespace pdf2htmlEX
#endif //STATEMANAGER_H__ #endif //STATEMANAGER_H__

View File

@ -35,7 +35,9 @@ const char * const PAGE_FRAME_CN = "@CSS_PAGE_FRAME_CN@";
const char * const PAGE_CONTENT_BOX_CN = "@CSS_PAGE_CONTENT_BOX_CN@"; const char * const PAGE_CONTENT_BOX_CN = "@CSS_PAGE_CONTENT_BOX_CN@";
const char * const PAGE_DATA_CN = "@CSS_PAGE_DATA_CN@"; const char * const PAGE_DATA_CN = "@CSS_PAGE_DATA_CN@";
const char * const FONT_NAME_CN = "@CSS_FONT_NAME_CN@"; const char * const BACKGROUND_IMAGE_CN = "@CSS_BACKGROUND_IMAGE_CN@";
const char * const FONT_FAMILY_CN = "@CSS_FONT_FAMILY_CN@";
const char * const FONT_SIZE_CN = "@CSS_FONT_SIZE_CN@"; const char * const FONT_SIZE_CN = "@CSS_FONT_SIZE_CN@";
const char * const FILL_COLOR_CN = "@CSS_FILL_COLOR_CN@"; const char * const FILL_COLOR_CN = "@CSS_FILL_COLOR_CN@";
const char * const STROKE_COLOR_CN = "@CSS_STROKE_COLOR_CN@"; const char * const STROKE_COLOR_CN = "@CSS_STROKE_COLOR_CN@";

View File

@ -260,12 +260,14 @@ void ffw_cidflatten(void)
SFFlatten(cur_fv->sf->cidmaster); SFFlatten(cur_fv->sf->cidmaster);
} }
/*
* There is no check if a glyph with the same unicode exists!
*/
void ffw_add_empty_char(int32_t unicode, int width) void ffw_add_empty_char(int32_t unicode, int width)
{ {
// append the new char to Enc
SplineChar * sc = SFMakeChar(cur_fv->sf, cur_fv->map, cur_fv->map->enccount); SplineChar * sc = SFMakeChar(cur_fv->sf, cur_fv->map, cur_fv->map->enccount);
sc->unicodeenc = unicode; SCSetMetaData(sc, sc->name, unicode, sc->comment);
sc->width = width; SCSynchronizeWidth(sc, width, sc->width, cur_fv);
} }
int ffw_get_em_size(void) int ffw_get_em_size(void)
@ -319,6 +321,13 @@ void ffw_metric(double * ascent, double * descent)
sf->descent = em - bb.maxy; sf->descent = em - bb.maxy;
*/ */
/*
* The embedded fonts are likely to have inconsistent values for the 3 sets of ascent/descent
* PDF viewers don't care, since they don't even use these values
* But have to unify them, for different browsers on different platforms
* Things may become easier when there are CSS rules for baseline-based positioning.
*/
info->os2_winascent = a; info->os2_winascent = a;
info->os2_typoascent = a; info->os2_typoascent = a;
info->hhead_ascent = a; info->hhead_ascent = a;