mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 13:00:08 +00:00
merge README
This commit is contained in:
commit
37f2082817
@ -17,7 +17,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
|
|||||||
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
|
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb")
|
||||||
|
|
||||||
add_executable(pdftohtmlEX src/pdftohtmlEX.cc src/HTMLRenderer.cc src/HTMLRenderer.h src/BackgroundRenderer.cc src/BackgroundRenderer.h)
|
add_executable(pdftohtmlEX src/pdftohtmlEX.cc src/HTMLRenderer.cc src/HTMLRenderer.h src/BackgroundRenderer.cc src/BackgroundRenderer.h src/Consts.h)
|
||||||
target_link_libraries(pdftohtmlEX poppler boost_program_options)
|
target_link_libraries(pdftohtmlEX poppler boost_program_options)
|
||||||
|
|
||||||
|
|
||||||
|
@ -6,13 +6,9 @@ pdf2html**EX**
|
|||||||
|
|
||||||
Introduction
|
Introduction
|
||||||
-----------------------------
|
-----------------------------
|
||||||
Traditional pdf -> html conversion tools are more likely pdf -> text tools.
|
pdf2htmlEX renders PDF files in HTML, utilizing modern technologies of html/css, aims to provide an accuracy rendering, while keeping optimized for Web display.
|
||||||
|
|
||||||
For those who are not satisfied with them, this might be the right one for you.
|
pdf2htmlEX is optimized for recent versions of modern web browsers such as Mozilla Firefox & Google Chrome.
|
||||||
|
|
||||||
pdf2htmlEX utilizes latest technologies of html/css, aims to provide an accuracy rendering, while keeping optimized for Web display.
|
|
||||||
|
|
||||||
pdf2htmlEX is optimized for recent versions of moderm web browsers such as Mozilla Firefox & Google Chrome.
|
|
||||||
|
|
||||||
Features
|
Features
|
||||||
----------------------------
|
----------------------------
|
||||||
|
@ -14,7 +14,7 @@ void BackgroundRenderer::drawChar(GfxState *state, double x, double y,
|
|||||||
CharCode code, int nBytes, Unicode *u, int uLen)
|
CharCode code, int nBytes, Unicode *u, int uLen)
|
||||||
{
|
{
|
||||||
auto font = state->getFont();
|
auto font = state->getFont();
|
||||||
if((font->getType() == fontType3) || (font->getWMode()))
|
// if((font->getType() == fontType3) || (font->getWMode()))
|
||||||
{
|
{
|
||||||
SplashOutputDev::drawChar(state, x, y, dx, dy, originX, originY, code, nBytes, u, uLen);
|
SplashOutputDev::drawChar(state, x, y, dx, dy, originX, originY, code, nBytes, u, uLen);
|
||||||
}
|
}
|
||||||
|
79
src/Consts.h
Normal file
79
src/Consts.h
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
/*
|
||||||
|
* Constants
|
||||||
|
*
|
||||||
|
* by WangLu
|
||||||
|
* 2012.08.07
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef CONSTS_H__
|
||||||
|
#define CONSTS_H__
|
||||||
|
#include <string>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
const char * HTML_HEAD = "<!DOCTYPE html>\n\
|
||||||
|
<html><head>\
|
||||||
|
<meta charset=\"utf-8\">\
|
||||||
|
<style type=\"text/css\">\
|
||||||
|
#pdf-main {\
|
||||||
|
font-family: sans-serif;\
|
||||||
|
position:absolute;\
|
||||||
|
top:0;\
|
||||||
|
left:0;\
|
||||||
|
bottom:0;\
|
||||||
|
right:0;\
|
||||||
|
overflow:auto;\
|
||||||
|
background-color:grey;\
|
||||||
|
}\
|
||||||
|
#pdf-main > .p {\
|
||||||
|
position:relative;\
|
||||||
|
margin:13px auto;\
|
||||||
|
background-color:white;\
|
||||||
|
overflow:hidden;\
|
||||||
|
display:none;\
|
||||||
|
}\
|
||||||
|
.p > .l {\
|
||||||
|
position:absolute; \
|
||||||
|
white-space:pre;\
|
||||||
|
}\
|
||||||
|
.l > .w {\
|
||||||
|
display:inline-block;\
|
||||||
|
}\
|
||||||
|
::selection{\
|
||||||
|
background: rgba(168,209,255,0.5);\
|
||||||
|
}\
|
||||||
|
::-moz-selection{\
|
||||||
|
background: rgba(168,209,255,0.5);\
|
||||||
|
}\
|
||||||
|
</style><link rel=\"stylesheet\" type=\"text/css\" href=\"all.css\" />\
|
||||||
|
<script type=\"text/javascript\">\
|
||||||
|
function show_pages()\
|
||||||
|
{\
|
||||||
|
var pages = document.getElementById('pdf-main').childNodes;\
|
||||||
|
var idx = 0;\
|
||||||
|
var f = function(){\
|
||||||
|
if (idx < pages.length) {\
|
||||||
|
try{\
|
||||||
|
pages[idx].style.display='block';\
|
||||||
|
}catch(e){}\
|
||||||
|
++idx;\
|
||||||
|
setTimeout(f,100);\
|
||||||
|
}\
|
||||||
|
};\
|
||||||
|
f();\
|
||||||
|
};\
|
||||||
|
</script>\
|
||||||
|
</head><body onload=\"show_pages();\"><div id=\"pdf-main\">";
|
||||||
|
|
||||||
|
const char * HTML_TAIL = "</div></body></html>";
|
||||||
|
|
||||||
|
const std::map<string, string> BASE_14_FONT_CSS_FONT_MAP({\
|
||||||
|
{ "Courier", "Courier,monospace" },\
|
||||||
|
{ "Helvetica", "Helvetica,Arial,\"Nimbus Sans L\",sans-serif" },\
|
||||||
|
{ "Times", "Times,\"Time New Roman\",\"Nimbus Roman No9 L\",serif" },\
|
||||||
|
{ "Symbol", "Symbol,\"Standard Symbols L\"" },\
|
||||||
|
{ "ZapfDingbats", "ZapfDingbats,\"Dingbats\"" },\
|
||||||
|
});
|
||||||
|
|
||||||
|
const double id_matrix[6] = {1.0, 0.0, 0.0, 1.0, 0.0, 0.0};
|
||||||
|
|
||||||
|
#endif //CONSTS_H__
|
@ -25,6 +25,7 @@
|
|||||||
|
|
||||||
#include "HTMLRenderer.h"
|
#include "HTMLRenderer.h"
|
||||||
#include "BackgroundRenderer.h"
|
#include "BackgroundRenderer.h"
|
||||||
|
#include "Consts.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* CSS classes
|
* CSS classes
|
||||||
@ -41,74 +42,9 @@
|
|||||||
* w<hex> - White space
|
* w<hex> - White space
|
||||||
* t<hex> - Transform matrix
|
* t<hex> - Transform matrix
|
||||||
* c<hex> - Color
|
* c<hex> - Color
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
const char * HTML_HEAD = "<!DOCTYPE html>\n\
|
|
||||||
<html><head>\
|
|
||||||
<meta charset=\"utf-8\">\
|
|
||||||
<style type=\"text/css\">\
|
|
||||||
#pdf-main {\
|
|
||||||
font-family: sans-serif;\
|
|
||||||
position:absolute;\
|
|
||||||
top:0;\
|
|
||||||
left:0;\
|
|
||||||
bottom:0;\
|
|
||||||
right:0;\
|
|
||||||
overflow:auto;\
|
|
||||||
background-color:grey;\
|
|
||||||
}\
|
|
||||||
#pdf-main > .p {\
|
|
||||||
position:relative;\
|
|
||||||
margin:13px auto;\
|
|
||||||
background-color:white;\
|
|
||||||
overflow:hidden;\
|
|
||||||
display:none;\
|
|
||||||
}\
|
|
||||||
.p > .l {\
|
|
||||||
position:absolute; \
|
|
||||||
white-space:pre;\
|
|
||||||
}\
|
|
||||||
.l > .w {\
|
|
||||||
display:inline-block;\
|
|
||||||
}\
|
|
||||||
::selection{\
|
|
||||||
background: rgba(168,209,255,0.5);\
|
|
||||||
}\
|
|
||||||
::-moz-selection{\
|
|
||||||
background: rgba(168,209,255,0.5);\
|
|
||||||
}\
|
|
||||||
</style><link rel=\"stylesheet\" type=\"text/css\" href=\"all.css\" />\
|
|
||||||
<script type=\"text/javascript\">\
|
|
||||||
function show_pages()\
|
|
||||||
{\
|
|
||||||
var pages = document.getElementById('pdf-main').childNodes;\
|
|
||||||
var idx = 0;\
|
|
||||||
var f = function(){\
|
|
||||||
if (idx < pages.length) {\
|
|
||||||
try{\
|
|
||||||
pages[idx].style.display='block';\
|
|
||||||
}catch(e){}\
|
|
||||||
++idx;\
|
|
||||||
setTimeout(f,100);\
|
|
||||||
}\
|
|
||||||
};\
|
|
||||||
f();\
|
|
||||||
};\
|
|
||||||
</script>\
|
|
||||||
</head><body onload=\"show_pages();\"><div id=\"pdf-main\">";
|
|
||||||
|
|
||||||
const char * HTML_TAIL = "</div></body></html>";
|
|
||||||
|
|
||||||
const std::map<string, string> BASE_14_FONT_CSS_FONT_MAP({\
|
|
||||||
{ "Courier", "Courier,monospace" },\
|
|
||||||
{ "Helvetica", "Helvetica,Arial,\"Nimbus Sans L\",sans-serif" },\
|
|
||||||
{ "Times", "Times,\"Time New Roman\",\"Nimbus Roman No9 L\",serif" },\
|
|
||||||
{ "Symbol", "Symbol" },\
|
|
||||||
{ "ZapfDingbats", "ZapfDingbats" },\
|
|
||||||
});
|
|
||||||
|
|
||||||
const double id_matrix[6] = {1.0, 0.0, 0.0, 1.0, 0.0, 0.0};
|
|
||||||
|
|
||||||
TextString::TextString(GfxState *state)
|
TextString::TextString(GfxState *state)
|
||||||
:unicodes()
|
:unicodes()
|
||||||
,x(state->getCurX())
|
,x(state->getCurX())
|
||||||
@ -242,7 +178,7 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state)
|
|||||||
|
|
||||||
cur_fn_id = cur_fs_id = cur_tm_id = cur_color_id = 0;
|
cur_fn_id = cur_fs_id = cur_tm_id = cur_color_id = 0;
|
||||||
cur_line_x_offset = 0;
|
cur_line_x_offset = 0;
|
||||||
cur_line_y = 0;
|
cur_tx = cur_ty = 0;
|
||||||
cur_font_size = 0;
|
cur_font_size = 0;
|
||||||
|
|
||||||
memcpy(draw_ctm, id_matrix, sizeof(draw_ctm));
|
memcpy(draw_ctm, id_matrix, sizeof(draw_ctm));
|
||||||
@ -370,6 +306,7 @@ void HTMLRenderer::beginString(GfxState *state, GooString *s) {
|
|||||||
void HTMLRenderer::endString(GfxState *state) {
|
void HTMLRenderer::endString(GfxState *state) {
|
||||||
if (cur_string->getSize() == 0) {
|
if (cur_string->getSize() == 0) {
|
||||||
delete cur_string ;
|
delete cur_string ;
|
||||||
|
cur_string = nullptr;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -378,8 +315,9 @@ void HTMLRenderer::endString(GfxState *state) {
|
|||||||
{
|
{
|
||||||
if(at_same_line(cur_line, cur_string))
|
if(at_same_line(cur_line, cur_string))
|
||||||
{
|
{
|
||||||
double x1 = cur_line->getX() + cur_line->getWidth();
|
// TODO: this is not correct
|
||||||
double x2 = cur_string->getX();
|
double x1 = cur_line->getState()->getLineX() + cur_line->getWidth();
|
||||||
|
double x2 = cur_string->getState()->getLineX();
|
||||||
double target = (x2-x1-cur_line_x_offset) * draw_scale;
|
double target = (x2-x1-cur_line_x_offset) * draw_scale;
|
||||||
|
|
||||||
if(target > -param->h_eps)
|
if(target > -param->h_eps)
|
||||||
@ -437,7 +375,8 @@ void HTMLRenderer::endString(GfxState *state) {
|
|||||||
html_fout << "\"";
|
html_fout << "\"";
|
||||||
double x,y;
|
double x,y;
|
||||||
cur_state->transform(cur_state->getCurX(), cur_state->getCurY(), &x, &y);
|
cur_state->transform(cur_state->getCurX(), cur_state->getCurY(), &x, &y);
|
||||||
html_fout << boost::format(" data-x=\"%1%\" data-y=\"%2%\" hs=\"%3%")%x%y%(cur_state->getHorizScaling());
|
html_fout << boost::format("data-lx=\"%5%\" data-ly=\"%6%\" data-scale=\"%4%\" data-x=\"%1%\" data-y=\"%2%\" data-hs=\"%3%")
|
||||||
|
%x%y%(cur_state->getHorizScaling())%draw_scale%cur_state->getLineX()%cur_state->getLineY();
|
||||||
}
|
}
|
||||||
|
|
||||||
html_fout << "\">";
|
html_fout << "\">";
|
||||||
@ -462,17 +401,6 @@ void HTMLRenderer::drawChar(GfxState *state, double x, double y,
|
|||||||
cur_string->addUnicodes(state, x, y, dx, dy, u, uLen);
|
cur_string->addUnicodes(state, x, y, dx, dy, u, uLen);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if(nBytes > 0)
|
|
||||||
{
|
|
||||||
std::cerr << "Cannot map to Unicode!" << std::endl;
|
|
||||||
std::cerr << cur_fn_id << std::endl;
|
|
||||||
std::cerr << "*";
|
|
||||||
for(int i = 0; i < nBytes; ++i)
|
|
||||||
{
|
|
||||||
std::cerr << (int)(((char*)&code)[i]);
|
|
||||||
}
|
|
||||||
std::cerr << std::endl;
|
|
||||||
}
|
|
||||||
cur_string->addChars(state, x, y, dx, dy, code, nBytes);
|
cur_string->addChars(state, x, y, dx, dy, code, nBytes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -480,11 +408,16 @@ void HTMLRenderer::drawChar(GfxState *state, double x, double y,
|
|||||||
// TODO
|
// TODO
|
||||||
void HTMLRenderer::drawString(GfxState * state, GooString * s)
|
void HTMLRenderer::drawString(GfxState * state, GooString * s)
|
||||||
{
|
{
|
||||||
|
check_state_change(state);
|
||||||
|
|
||||||
auto font = state->getFont();
|
auto font = state->getFont();
|
||||||
if(font->getWMode())
|
if(font->getWMode())
|
||||||
std::cerr << "TODO: writing mode" << std::endl;
|
{
|
||||||
|
//TODO
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// stolen from poppler
|
// from poppler
|
||||||
double dx = 0;
|
double dx = 0;
|
||||||
double dy = 0;
|
double dy = 0;
|
||||||
double dx2, dy2;
|
double dx2, dy2;
|
||||||
@ -992,12 +925,11 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
|||||||
{
|
{
|
||||||
if(pos_changed)
|
if(pos_changed)
|
||||||
{
|
{
|
||||||
if(!_equal(state->getLineY(), cur_line_y))
|
if(!_equal(state->getLineY(), cur_ty))
|
||||||
{
|
{
|
||||||
close_cur_line();
|
close_cur_line();
|
||||||
cur_line_y = state->getLineY();
|
cur_ty = state->getLineY();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(color_changed)
|
if(color_changed)
|
||||||
@ -1012,7 +944,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool need_rescale_font = true;
|
bool need_rescale_font = false;
|
||||||
if(font_changed)
|
if(font_changed)
|
||||||
{
|
{
|
||||||
long long new_fn_id = install_font(state->getFont());
|
long long new_fn_id = install_font(state->getFont());
|
||||||
@ -1022,6 +954,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
|||||||
close_cur_line();
|
close_cur_line();
|
||||||
cur_fn_id = new_fn_id;
|
cur_fn_id = new_fn_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!_equal(cur_font_size, state->getFontSize()))
|
if(!_equal(cur_font_size, state->getFontSize()))
|
||||||
{
|
{
|
||||||
cur_font_size = state->getFontSize();
|
cur_font_size = state->getFontSize();
|
||||||
@ -1043,7 +976,9 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
|||||||
new_ctm[3] = m1[1] * m2[2] + m1[3] * m2[3];
|
new_ctm[3] = m1[1] * m2[2] + m1[3] * m2[3];
|
||||||
new_ctm[4] = new_ctm[5] = 0;
|
new_ctm[4] = new_ctm[5] = 0;
|
||||||
|
|
||||||
if(!_tm_equal(new_ctm, draw_ctm, 4))
|
// TODO: this is not correct
|
||||||
|
// what to check?
|
||||||
|
if(!_tm_equal(new_ctm, draw_ctm, 4)) { }
|
||||||
{
|
{
|
||||||
need_rescale_font = true;
|
need_rescale_font = true;
|
||||||
}
|
}
|
||||||
|
@ -199,7 +199,7 @@ class HTMLRenderer : public OutputDev
|
|||||||
double cur_line_x_offset;
|
double cur_line_x_offset;
|
||||||
|
|
||||||
// current position
|
// current position
|
||||||
double cur_line_y;
|
double cur_tx, cur_ty; // in text coords
|
||||||
bool pos_changed;
|
bool pos_changed;
|
||||||
|
|
||||||
long long cur_fn_id;
|
long long cur_fn_id;
|
||||||
|
Loading…
Reference in New Issue
Block a user