mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 13:00:08 +00:00
simple clipping
This commit is contained in:
parent
84a2490200
commit
0cd9405565
@ -49,7 +49,7 @@
|
|||||||
.@CSS_CSS_DRAW_CN@ { display:none; }
|
.@CSS_CSS_DRAW_CN@ { display:none; }
|
||||||
}
|
}
|
||||||
/* Part 2: Page Elements: Modify with caution
|
/* Part 2: Page Elements: Modify with caution
|
||||||
* The followings are base classes, which are meant to be override by PDF specific classes
|
* The followings are base classes, some of which are meant to be override by PDF specific classes
|
||||||
* So do not increase the specificity (e.g. ".classname" -> "#page-container .classname")
|
* So do not increase the specificity (e.g. ".classname" -> "#page-container .classname")
|
||||||
*/
|
*/
|
||||||
.@CSS_PAGE_DECORATION_CN@ { /* page decoration */
|
.@CSS_PAGE_DECORATION_CN@ { /* page decoration */
|
||||||
@ -71,12 +71,15 @@
|
|||||||
.@CSS_PAGE_CONTENT_BOX_CN@ { /* content of a page */
|
.@CSS_PAGE_CONTENT_BOX_CN@ { /* content of a page */
|
||||||
position:absolute;
|
position:absolute;
|
||||||
border-width:0;
|
border-width:0;
|
||||||
|
padding:0;
|
||||||
|
margin:0;
|
||||||
top:0;
|
top:0;
|
||||||
left:0;
|
left:0;
|
||||||
width:100%;
|
width:100%;
|
||||||
height:100%;
|
height:100%;
|
||||||
overflow:hidden;
|
overflow:hidden;
|
||||||
display:block;
|
display:block;
|
||||||
|
/* set transform-origin for scaling */
|
||||||
transform-origin:0% 0%;
|
transform-origin:0% 0%;
|
||||||
-ms-transform-origin:0% 0%;
|
-ms-transform-origin:0% 0%;
|
||||||
-moz-transform-origin:0% 0%;
|
-moz-transform-origin:0% 0%;
|
||||||
@ -114,6 +117,14 @@
|
|||||||
.@CSS_PAGE_CONTENT_BOX_CN@ {overflow:visible;}
|
.@CSS_PAGE_CONTENT_BOX_CN@ {overflow:visible;}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
.@CSS_CLIP_CN@ { /* clip box */
|
||||||
|
position:absolute;
|
||||||
|
border-width:0;
|
||||||
|
padding:0;
|
||||||
|
margin:0;
|
||||||
|
overflow:hidden;
|
||||||
|
display:block;
|
||||||
|
}
|
||||||
.@CSS_LINE_CN@ { /* text line */
|
.@CSS_LINE_CN@ { /* text line */
|
||||||
position:absolute;
|
position:absolute;
|
||||||
white-space:pre;
|
white-space:pre;
|
||||||
@ -144,7 +155,7 @@ span { /* text blocks within a line */
|
|||||||
.@CSS_PAGE_DATA_CN@ { /* info for Javascript */
|
.@CSS_PAGE_DATA_CN@ { /* info for Javascript */
|
||||||
display:none;
|
display:none;
|
||||||
}
|
}
|
||||||
.@CSS_LINE_CN@ { /* annotation links */
|
.@CSS_LINK_CN@ { /* annotation links */
|
||||||
}
|
}
|
||||||
/* transparent color - WebKit */
|
/* transparent color - WebKit */
|
||||||
.@CSS_CSS_DRAW_CN@ { /* css drawing */
|
.@CSS_CSS_DRAW_CN@ { /* css drawing */
|
||||||
|
@ -296,8 +296,7 @@ protected:
|
|||||||
{
|
{
|
||||||
NLS_NONE,
|
NLS_NONE,
|
||||||
NLS_NEWSTATE,
|
NLS_NEWSTATE,
|
||||||
NLS_NEWLINE,
|
NLS_NEWLINE
|
||||||
NLS_NEWCLIP
|
|
||||||
} new_line_state;
|
} new_line_state;
|
||||||
|
|
||||||
// for font reencoding
|
// for font reencoding
|
||||||
|
@ -173,8 +173,13 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
|
|||||||
{
|
{
|
||||||
this->pageNum = pageNum;
|
this->pageNum = pageNum;
|
||||||
|
|
||||||
long long wid = all_manager.width.install(state->getPageWidth());
|
double pageWidth = state->getPageWidth();
|
||||||
long long hid = all_manager.height.install(state->getPageHeight());
|
double pageHeight = state->getPageHeight();
|
||||||
|
|
||||||
|
html_text_page.set_page_size(pageWidth, pageHeight);
|
||||||
|
|
||||||
|
long long wid = all_manager.width.install(pageWidth);
|
||||||
|
long long hid = all_manager.height.install(pageHeight);
|
||||||
f_pages.fs
|
f_pages.fs
|
||||||
<< "<div class=\"" << CSS::PAGE_DECORATION_CN
|
<< "<div class=\"" << CSS::PAGE_DECORATION_CN
|
||||||
<< " " << CSS::WIDTH_CN << wid
|
<< " " << CSS::WIDTH_CN << wid
|
||||||
|
@ -152,6 +152,13 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
|||||||
|
|
||||||
new_line_state = NLS_NONE;
|
new_line_state = NLS_NONE;
|
||||||
|
|
||||||
|
if(all_changed || clip_changed)
|
||||||
|
{
|
||||||
|
double x1, x2, y1, y2;
|
||||||
|
state->getClipBBox(&x1, &y1, &x2, &y2);
|
||||||
|
html_text_page.clip(x1, y1, x2, y2);
|
||||||
|
}
|
||||||
|
|
||||||
bool need_recheck_position = false;
|
bool need_recheck_position = false;
|
||||||
bool need_rescale_font = false;
|
bool need_rescale_font = false;
|
||||||
bool draw_text_scale_changed = false;
|
bool draw_text_scale_changed = false;
|
||||||
|
@ -29,6 +29,8 @@ HTMLTextLine::HTMLTextLine (const HTMLLineState & line_state, const Param & para
|
|||||||
:param(param)
|
:param(param)
|
||||||
,all_manager(all_manager)
|
,all_manager(all_manager)
|
||||||
,line_state(line_state)
|
,line_state(line_state)
|
||||||
|
,clip_x1(0)
|
||||||
|
,clip_y1(0)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
void HTMLTextLine::append_unicodes(const Unicode * u, int l)
|
void HTMLTextLine::append_unicodes(const Unicode * u, int l)
|
||||||
@ -81,9 +83,9 @@ void HTMLTextLine::dump_text(ostream & out)
|
|||||||
// open <div> for the current text line
|
// open <div> for the current text line
|
||||||
out << "<div class=\"" << CSS::LINE_CN
|
out << "<div class=\"" << CSS::LINE_CN
|
||||||
<< " " << CSS::TRANSFORM_MATRIX_CN << all_manager.transform_matrix.install(line_state.transform_matrix)
|
<< " " << CSS::TRANSFORM_MATRIX_CN << all_manager.transform_matrix.install(line_state.transform_matrix)
|
||||||
<< " " << CSS::LEFT_CN << all_manager.left.install(line_state.x)
|
<< " " << CSS::LEFT_CN << all_manager.left.install(line_state.x - clip_x1)
|
||||||
<< " " << CSS::HEIGHT_CN << all_manager.height.install(ascent)
|
<< " " << CSS::HEIGHT_CN << all_manager.height.install(ascent)
|
||||||
<< " " << CSS::BOTTOM_CN << all_manager.bottom.install(line_state.y)
|
<< " " << CSS::BOTTOM_CN << all_manager.bottom.install(line_state.y - clip_y1)
|
||||||
;
|
;
|
||||||
// it will be closed by the first state
|
// it will be closed by the first state
|
||||||
}
|
}
|
||||||
@ -231,6 +233,12 @@ void HTMLTextLine::clear(void)
|
|||||||
text.clear();
|
text.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void HTMLTextLine::clip(double x1, double y1, double x2, double y2)
|
||||||
|
{
|
||||||
|
clip_x1 = x1;
|
||||||
|
clip_y1 = y1;
|
||||||
|
}
|
||||||
|
|
||||||
void HTMLTextLine::prepare(void)
|
void HTMLTextLine::prepare(void)
|
||||||
{
|
{
|
||||||
if(param.optimize_text)
|
if(param.optimize_text)
|
||||||
|
@ -81,6 +81,8 @@ public:
|
|||||||
bool text_empty(void) const { return text.empty(); }
|
bool text_empty(void) const { return text.empty(); }
|
||||||
void clear(void);
|
void clear(void);
|
||||||
|
|
||||||
|
void clip(double x1, double y1, double x2, double y2);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Optimize and calculate necessary values
|
* Optimize and calculate necessary values
|
||||||
*/
|
*/
|
||||||
@ -93,6 +95,7 @@ private:
|
|||||||
|
|
||||||
HTMLLineState line_state;
|
HTMLLineState line_state;
|
||||||
double ascent, descent;
|
double ascent, descent;
|
||||||
|
double clip_x1, clip_y1;
|
||||||
|
|
||||||
std::vector<State> states;
|
std::vector<State> states;
|
||||||
std::vector<Offset> offsets;
|
std::vector<Offset> offsets;
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#include "HTMLTextPage.h"
|
#include "HTMLTextPage.h"
|
||||||
|
#include "util/css_const.h"
|
||||||
|
|
||||||
namespace pdf2htmlEX {
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
@ -17,6 +18,8 @@ HTMLTextPage::HTMLTextPage(const Param & param, AllStateManager & all_manager)
|
|||||||
: param(param)
|
: param(param)
|
||||||
, all_manager(all_manager)
|
, all_manager(all_manager)
|
||||||
, cur_line(nullptr)
|
, cur_line(nullptr)
|
||||||
|
, page_width(0)
|
||||||
|
, page_height(0)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
void HTMLTextPage::dump_text(ostream & out)
|
void HTMLTextPage::dump_text(ostream & out)
|
||||||
@ -25,8 +28,42 @@ void HTMLTextPage::dump_text(ostream & out)
|
|||||||
(*iter)->prepare();
|
(*iter)->prepare();
|
||||||
if(param.optimize_text)
|
if(param.optimize_text)
|
||||||
optimize();
|
optimize();
|
||||||
for(auto iter = text_lines.begin(); iter != text_lines.end(); ++iter)
|
|
||||||
(*iter)->dump_text(out);
|
//push a dummy entry for convenience
|
||||||
|
clip_boxes.emplace_back(0, 0, page_width, page_height, text_lines.size());
|
||||||
|
|
||||||
|
ClipBox cur_cb(0, 0, page_width, page_height, 0);
|
||||||
|
bool has_clip = false;
|
||||||
|
|
||||||
|
auto text_line_iter = text_lines.begin();
|
||||||
|
for(auto clip_iter = clip_boxes.begin(); clip_iter != clip_boxes.end(); ++clip_iter)
|
||||||
|
{
|
||||||
|
if(has_clip)
|
||||||
|
{
|
||||||
|
out << "<div class=\"" << CSS::CLIP_CN
|
||||||
|
<< " " << CSS::LEFT_CN << all_manager.left.install(cur_cb.x1)
|
||||||
|
<< " " << CSS::BOTTOM_CN << all_manager.bottom.install(cur_cb.y1)
|
||||||
|
<< " " << CSS::WIDTH_CN << all_manager.width.install(cur_cb.x2 - cur_cb.x1)
|
||||||
|
<< " " << CSS::HEIGHT_CN << all_manager.height.install(cur_cb.y2 - cur_cb.y1)
|
||||||
|
<< "\">";
|
||||||
|
}
|
||||||
|
|
||||||
|
auto next_text_line_iter = text_lines.begin() + clip_iter->start_idx;
|
||||||
|
while(text_line_iter != next_text_line_iter)
|
||||||
|
{
|
||||||
|
(*text_line_iter)->clip(cur_cb.x1, cur_cb.y1, cur_cb.x2, cur_cb.y2);
|
||||||
|
(*text_line_iter)->dump_text(out);
|
||||||
|
++text_line_iter;
|
||||||
|
}
|
||||||
|
if(has_clip)
|
||||||
|
{
|
||||||
|
out << "</div>";
|
||||||
|
}
|
||||||
|
|
||||||
|
cur_cb = *clip_iter;
|
||||||
|
has_clip = !(equal(0, cur_cb.x1) && equal(0, cur_cb.y1)
|
||||||
|
&& equal(page_width, cur_cb.x2) && equal(page_height, cur_cb.y2));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLTextPage::dump_css(ostream & out)
|
void HTMLTextPage::dump_css(ostream & out)
|
||||||
@ -37,6 +74,7 @@ void HTMLTextPage::dump_css(ostream & out)
|
|||||||
void HTMLTextPage::clear(void)
|
void HTMLTextPage::clear(void)
|
||||||
{
|
{
|
||||||
text_lines.clear();
|
text_lines.clear();
|
||||||
|
clip_boxes.clear();
|
||||||
cur_line = nullptr;
|
cur_line = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -50,6 +88,40 @@ void HTMLTextPage::open_new_line(const HTMLLineState & line_state)
|
|||||||
cur_line = text_lines.back().get();
|
cur_line = text_lines.back().get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void HTMLTextPage::set_page_size(double width, double height)
|
||||||
|
{
|
||||||
|
page_width = width;
|
||||||
|
page_height = height;
|
||||||
|
}
|
||||||
|
|
||||||
|
void HTMLTextPage::clip(double x1, double y1, double x2, double y2)
|
||||||
|
{
|
||||||
|
if(!clip_boxes.empty())
|
||||||
|
{
|
||||||
|
auto & cb = clip_boxes.back();
|
||||||
|
if(cb.start_idx == text_lines.size())
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Previous ClipBox is not used
|
||||||
|
*/
|
||||||
|
cb.x1 = x1;
|
||||||
|
cb.y1 = y1;
|
||||||
|
cb.x2 = x2;
|
||||||
|
cb.y2 = y2;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if(equal(cb.x1, x1) && equal(cb.y1, y1)
|
||||||
|
&& equal(cb.x2, x2) && equal(cb.y2, y2))
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* same as previous ClipBox
|
||||||
|
*/
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
clip_boxes.emplace_back(x1, y1, x2, y2, text_lines.size());
|
||||||
|
}
|
||||||
|
|
||||||
void HTMLTextPage::optimize(void)
|
void HTMLTextPage::optimize(void)
|
||||||
{
|
{
|
||||||
//TODO
|
//TODO
|
||||||
|
@ -35,6 +35,10 @@ public:
|
|||||||
void clear(void);
|
void clear(void);
|
||||||
|
|
||||||
void open_new_line(const HTMLLineState & line_state);
|
void open_new_line(const HTMLLineState & line_state);
|
||||||
|
|
||||||
|
/* for clipping */
|
||||||
|
void set_page_size(double width, double height);
|
||||||
|
void clip(double x1, double y1, double x2, double y2);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void optimize(void);
|
void optimize(void);
|
||||||
@ -42,7 +46,18 @@ private:
|
|||||||
const Param & param;
|
const Param & param;
|
||||||
AllStateManager & all_manager;
|
AllStateManager & all_manager;
|
||||||
HTMLTextLine * cur_line;
|
HTMLTextLine * cur_line;
|
||||||
|
double page_width, page_height;
|
||||||
|
|
||||||
std::vector<std::unique_ptr<HTMLTextLine>> text_lines;
|
std::vector<std::unique_ptr<HTMLTextLine>> text_lines;
|
||||||
|
|
||||||
|
struct ClipBox {
|
||||||
|
ClipBox(double x1, double y1, double x2, double y2, size_t start_idx)
|
||||||
|
:x1(x1),y1(y1),x2(x2),y2(y2),start_idx(start_idx)
|
||||||
|
{ }
|
||||||
|
double x1, y1, x2, y2;
|
||||||
|
size_t start_idx;
|
||||||
|
};
|
||||||
|
std::vector<ClipBox> clip_boxes;
|
||||||
};
|
};
|
||||||
|
|
||||||
} //namespace pdf2htmlEX
|
} //namespace pdf2htmlEX
|
||||||
|
@ -9,6 +9,7 @@ set(CSS_INVALID_ID "_")
|
|||||||
|
|
||||||
set(CSS_LINE_CN "t") # Text
|
set(CSS_LINE_CN "t") # Text
|
||||||
set(CSS_TRANSFORM_MATRIX_CN "m") # Matrix
|
set(CSS_TRANSFORM_MATRIX_CN "m") # Matrix
|
||||||
|
set(CSS_CLIP_CN "c") # Clip
|
||||||
|
|
||||||
set(CSS_PAGE_DECORATION_CN "pd") # Page Decoration
|
set(CSS_PAGE_DECORATION_CN "pd") # Page Decoration
|
||||||
set(CSS_PAGE_FRAME_CN "pf") # Page Frame
|
set(CSS_PAGE_FRAME_CN "pf") # Page Frame
|
||||||
|
@ -26,6 +26,7 @@ const char * const INVALID_ID = "@CSS_INVALID_ID@";
|
|||||||
|
|
||||||
const char * const LINE_CN = "@CSS_LINE_CN@";
|
const char * const LINE_CN = "@CSS_LINE_CN@";
|
||||||
const char * const TRANSFORM_MATRIX_CN = "@CSS_TRANSFORM_MATRIX_CN@";
|
const char * const TRANSFORM_MATRIX_CN = "@CSS_TRANSFORM_MATRIX_CN@";
|
||||||
|
const char * const CLIP_CN = "@CSS_CLIP_CN@";
|
||||||
|
|
||||||
// page_decoration is for shadow etc
|
// page_decoration is for shadow etc
|
||||||
// page_frame cannot have margin or border-width, pdf2htmlEX.js will use it to determine the coordinates
|
// page_frame cannot have margin or border-width, pdf2htmlEX.js will use it to determine the coordinates
|
||||||
|
Loading…
Reference in New Issue
Block a user