1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 13:00:08 +00:00

simple clipping

This commit is contained in:
Lu Wang 2013-05-04 19:26:26 +08:00
parent 84a2490200
commit 0cd9405565
10 changed files with 132 additions and 10 deletions

View File

@ -49,7 +49,7 @@
.@CSS_CSS_DRAW_CN@ { display:none; }
}
/* Part 2: Page Elements: Modify with caution
* The followings are base classes, which are meant to be override by PDF specific classes
* The followings are base classes, some of which are meant to be override by PDF specific classes
* So do not increase the specificity (e.g. ".classname" -> "#page-container .classname")
*/
.@CSS_PAGE_DECORATION_CN@ { /* page decoration */
@ -71,12 +71,15 @@
.@CSS_PAGE_CONTENT_BOX_CN@ { /* content of a page */
position:absolute;
border-width:0;
padding:0;
margin:0;
top:0;
left:0;
width:100%;
height:100%;
overflow:hidden;
display:block;
/* set transform-origin for scaling */
transform-origin:0% 0%;
-ms-transform-origin:0% 0%;
-moz-transform-origin:0% 0%;
@ -114,6 +117,14 @@
.@CSS_PAGE_CONTENT_BOX_CN@ {overflow:visible;}
}
}
.@CSS_CLIP_CN@ { /* clip box */
position:absolute;
border-width:0;
padding:0;
margin:0;
overflow:hidden;
display:block;
}
.@CSS_LINE_CN@ { /* text line */
position:absolute;
white-space:pre;
@ -144,7 +155,7 @@ span { /* text blocks within a line */
.@CSS_PAGE_DATA_CN@ { /* info for Javascript */
display:none;
}
.@CSS_LINE_CN@ { /* annotation links */
.@CSS_LINK_CN@ { /* annotation links */
}
/* transparent color - WebKit */
.@CSS_CSS_DRAW_CN@ { /* css drawing */

View File

@ -296,8 +296,7 @@ protected:
{
NLS_NONE,
NLS_NEWSTATE,
NLS_NEWLINE,
NLS_NEWCLIP
NLS_NEWLINE
} new_line_state;
// for font reencoding

View File

@ -173,8 +173,13 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
{
this->pageNum = pageNum;
long long wid = all_manager.width.install(state->getPageWidth());
long long hid = all_manager.height.install(state->getPageHeight());
double pageWidth = state->getPageWidth();
double pageHeight = state->getPageHeight();
html_text_page.set_page_size(pageWidth, pageHeight);
long long wid = all_manager.width.install(pageWidth);
long long hid = all_manager.height.install(pageHeight);
f_pages.fs
<< "<div class=\"" << CSS::PAGE_DECORATION_CN
<< " " << CSS::WIDTH_CN << wid

View File

@ -152,6 +152,13 @@ void HTMLRenderer::check_state_change(GfxState * state)
new_line_state = NLS_NONE;
if(all_changed || clip_changed)
{
double x1, x2, y1, y2;
state->getClipBBox(&x1, &y1, &x2, &y2);
html_text_page.clip(x1, y1, x2, y2);
}
bool need_recheck_position = false;
bool need_rescale_font = false;
bool draw_text_scale_changed = false;

View File

@ -29,6 +29,8 @@ HTMLTextLine::HTMLTextLine (const HTMLLineState & line_state, const Param & para
:param(param)
,all_manager(all_manager)
,line_state(line_state)
,clip_x1(0)
,clip_y1(0)
{ }
void HTMLTextLine::append_unicodes(const Unicode * u, int l)
@ -81,9 +83,9 @@ void HTMLTextLine::dump_text(ostream & out)
// open <div> for the current text line
out << "<div class=\"" << CSS::LINE_CN
<< " " << CSS::TRANSFORM_MATRIX_CN << all_manager.transform_matrix.install(line_state.transform_matrix)
<< " " << CSS::LEFT_CN << all_manager.left.install(line_state.x)
<< " " << CSS::LEFT_CN << all_manager.left.install(line_state.x - clip_x1)
<< " " << CSS::HEIGHT_CN << all_manager.height.install(ascent)
<< " " << CSS::BOTTOM_CN << all_manager.bottom.install(line_state.y)
<< " " << CSS::BOTTOM_CN << all_manager.bottom.install(line_state.y - clip_y1)
;
// it will be closed by the first state
}
@ -231,6 +233,12 @@ void HTMLTextLine::clear(void)
text.clear();
}
void HTMLTextLine::clip(double x1, double y1, double x2, double y2)
{
clip_x1 = x1;
clip_y1 = y1;
}
void HTMLTextLine::prepare(void)
{
if(param.optimize_text)

View File

@ -81,6 +81,8 @@ public:
bool text_empty(void) const { return text.empty(); }
void clear(void);
void clip(double x1, double y1, double x2, double y2);
/*
* Optimize and calculate necessary values
*/
@ -93,6 +95,7 @@ private:
HTMLLineState line_state;
double ascent, descent;
double clip_x1, clip_y1;
std::vector<State> states;
std::vector<Offset> offsets;

View File

@ -7,6 +7,7 @@
*/
#include "HTMLTextPage.h"
#include "util/css_const.h"
namespace pdf2htmlEX {
@ -17,6 +18,8 @@ HTMLTextPage::HTMLTextPage(const Param & param, AllStateManager & all_manager)
: param(param)
, all_manager(all_manager)
, cur_line(nullptr)
, page_width(0)
, page_height(0)
{ }
void HTMLTextPage::dump_text(ostream & out)
@ -25,8 +28,42 @@ void HTMLTextPage::dump_text(ostream & out)
(*iter)->prepare();
if(param.optimize_text)
optimize();
for(auto iter = text_lines.begin(); iter != text_lines.end(); ++iter)
(*iter)->dump_text(out);
//push a dummy entry for convenience
clip_boxes.emplace_back(0, 0, page_width, page_height, text_lines.size());
ClipBox cur_cb(0, 0, page_width, page_height, 0);
bool has_clip = false;
auto text_line_iter = text_lines.begin();
for(auto clip_iter = clip_boxes.begin(); clip_iter != clip_boxes.end(); ++clip_iter)
{
if(has_clip)
{
out << "<div class=\"" << CSS::CLIP_CN
<< " " << CSS::LEFT_CN << all_manager.left.install(cur_cb.x1)
<< " " << CSS::BOTTOM_CN << all_manager.bottom.install(cur_cb.y1)
<< " " << CSS::WIDTH_CN << all_manager.width.install(cur_cb.x2 - cur_cb.x1)
<< " " << CSS::HEIGHT_CN << all_manager.height.install(cur_cb.y2 - cur_cb.y1)
<< "\">";
}
auto next_text_line_iter = text_lines.begin() + clip_iter->start_idx;
while(text_line_iter != next_text_line_iter)
{
(*text_line_iter)->clip(cur_cb.x1, cur_cb.y1, cur_cb.x2, cur_cb.y2);
(*text_line_iter)->dump_text(out);
++text_line_iter;
}
if(has_clip)
{
out << "</div>";
}
cur_cb = *clip_iter;
has_clip = !(equal(0, cur_cb.x1) && equal(0, cur_cb.y1)
&& equal(page_width, cur_cb.x2) && equal(page_height, cur_cb.y2));
}
}
void HTMLTextPage::dump_css(ostream & out)
@ -37,6 +74,7 @@ void HTMLTextPage::dump_css(ostream & out)
void HTMLTextPage::clear(void)
{
text_lines.clear();
clip_boxes.clear();
cur_line = nullptr;
}
@ -50,6 +88,40 @@ void HTMLTextPage::open_new_line(const HTMLLineState & line_state)
cur_line = text_lines.back().get();
}
void HTMLTextPage::set_page_size(double width, double height)
{
page_width = width;
page_height = height;
}
void HTMLTextPage::clip(double x1, double y1, double x2, double y2)
{
if(!clip_boxes.empty())
{
auto & cb = clip_boxes.back();
if(cb.start_idx == text_lines.size())
{
/*
* Previous ClipBox is not used
*/
cb.x1 = x1;
cb.y1 = y1;
cb.x2 = x2;
cb.y2 = y2;
return;
}
if(equal(cb.x1, x1) && equal(cb.y1, y1)
&& equal(cb.x2, x2) && equal(cb.y2, y2))
{
/*
* same as previous ClipBox
*/
return;
}
}
clip_boxes.emplace_back(x1, y1, x2, y2, text_lines.size());
}
void HTMLTextPage::optimize(void)
{
//TODO

View File

@ -35,6 +35,10 @@ public:
void clear(void);
void open_new_line(const HTMLLineState & line_state);
/* for clipping */
void set_page_size(double width, double height);
void clip(double x1, double y1, double x2, double y2);
private:
void optimize(void);
@ -42,7 +46,18 @@ private:
const Param & param;
AllStateManager & all_manager;
HTMLTextLine * cur_line;
double page_width, page_height;
std::vector<std::unique_ptr<HTMLTextLine>> text_lines;
struct ClipBox {
ClipBox(double x1, double y1, double x2, double y2, size_t start_idx)
:x1(x1),y1(y1),x2(x2),y2(y2),start_idx(start_idx)
{ }
double x1, y1, x2, y2;
size_t start_idx;
};
std::vector<ClipBox> clip_boxes;
};
} //namespace pdf2htmlEX

View File

@ -9,6 +9,7 @@ set(CSS_INVALID_ID "_")
set(CSS_LINE_CN "t") # Text
set(CSS_TRANSFORM_MATRIX_CN "m") # Matrix
set(CSS_CLIP_CN "c") # Clip
set(CSS_PAGE_DECORATION_CN "pd") # Page Decoration
set(CSS_PAGE_FRAME_CN "pf") # Page Frame

View File

@ -26,6 +26,7 @@ const char * const INVALID_ID = "@CSS_INVALID_ID@";
const char * const LINE_CN = "@CSS_LINE_CN@";
const char * const TRANSFORM_MATRIX_CN = "@CSS_TRANSFORM_MATRIX_CN@";
const char * const CLIP_CN = "@CSS_CLIP_CN@";
// page_decoration is for shadow etc
// page_frame cannot have margin or border-width, pdf2htmlEX.js will use it to determine the coordinates