diff --git a/share/base.css.in b/share/base.css.in
index 83a9246..cffa338 100644
--- a/share/base.css.in
+++ b/share/base.css.in
@@ -49,7 +49,7 @@
.@CSS_CSS_DRAW_CN@ { display:none; }
}
/* Part 2: Page Elements: Modify with caution
- * The followings are base classes, which are meant to be override by PDF specific classes
+ * The followings are base classes, some of which are meant to be override by PDF specific classes
* So do not increase the specificity (e.g. ".classname" -> "#page-container .classname")
*/
.@CSS_PAGE_DECORATION_CN@ { /* page decoration */
@@ -71,12 +71,15 @@
.@CSS_PAGE_CONTENT_BOX_CN@ { /* content of a page */
position:absolute;
border-width:0;
+ padding:0;
+ margin:0;
top:0;
left:0;
width:100%;
height:100%;
overflow:hidden;
display:block;
+ /* set transform-origin for scaling */
transform-origin:0% 0%;
-ms-transform-origin:0% 0%;
-moz-transform-origin:0% 0%;
@@ -114,6 +117,14 @@
.@CSS_PAGE_CONTENT_BOX_CN@ {overflow:visible;}
}
}
+.@CSS_CLIP_CN@ { /* clip box */
+ position:absolute;
+ border-width:0;
+ padding:0;
+ margin:0;
+ overflow:hidden;
+ display:block;
+}
.@CSS_LINE_CN@ { /* text line */
position:absolute;
white-space:pre;
@@ -144,7 +155,7 @@ span { /* text blocks within a line */
.@CSS_PAGE_DATA_CN@ { /* info for Javascript */
display:none;
}
-.@CSS_LINE_CN@ { /* annotation links */
+.@CSS_LINK_CN@ { /* annotation links */
}
/* transparent color - WebKit */
.@CSS_CSS_DRAW_CN@ { /* css drawing */
diff --git a/src/HTMLRenderer/HTMLRenderer.h b/src/HTMLRenderer/HTMLRenderer.h
index 80e3182..cdd17e0 100644
--- a/src/HTMLRenderer/HTMLRenderer.h
+++ b/src/HTMLRenderer/HTMLRenderer.h
@@ -296,8 +296,7 @@ protected:
{
NLS_NONE,
NLS_NEWSTATE,
- NLS_NEWLINE,
- NLS_NEWCLIP
+ NLS_NEWLINE
} new_line_state;
// for font reencoding
diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc
index 950501f..daafcea 100644
--- a/src/HTMLRenderer/general.cc
+++ b/src/HTMLRenderer/general.cc
@@ -173,8 +173,13 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
{
this->pageNum = pageNum;
- long long wid = all_manager.width.install(state->getPageWidth());
- long long hid = all_manager.height.install(state->getPageHeight());
+ double pageWidth = state->getPageWidth();
+ double pageHeight = state->getPageHeight();
+
+ html_text_page.set_page_size(pageWidth, pageHeight);
+
+ long long wid = all_manager.width.install(pageWidth);
+ long long hid = all_manager.height.install(pageHeight);
f_pages.fs
<< "
getClipBBox(&x1, &y1, &x2, &y2);
+ html_text_page.clip(x1, y1, x2, y2);
+ }
+
bool need_recheck_position = false;
bool need_rescale_font = false;
bool draw_text_scale_changed = false;
diff --git a/src/HTMLTextLine.cc b/src/HTMLTextLine.cc
index 598b95f..e2d81f7 100644
--- a/src/HTMLTextLine.cc
+++ b/src/HTMLTextLine.cc
@@ -29,6 +29,8 @@ HTMLTextLine::HTMLTextLine (const HTMLLineState & line_state, const Param & para
:param(param)
,all_manager(all_manager)
,line_state(line_state)
+ ,clip_x1(0)
+ ,clip_y1(0)
{ }
void HTMLTextLine::append_unicodes(const Unicode * u, int l)
@@ -81,9 +83,9 @@ void HTMLTextLine::dump_text(ostream & out)
// open
for the current text line
out << "
states;
std::vector
offsets;
diff --git a/src/HTMLTextPage.cc b/src/HTMLTextPage.cc
index 8780e5e..d1e092b 100644
--- a/src/HTMLTextPage.cc
+++ b/src/HTMLTextPage.cc
@@ -7,6 +7,7 @@
*/
#include "HTMLTextPage.h"
+#include "util/css_const.h"
namespace pdf2htmlEX {
@@ -17,6 +18,8 @@ HTMLTextPage::HTMLTextPage(const Param & param, AllStateManager & all_manager)
: param(param)
, all_manager(all_manager)
, cur_line(nullptr)
+ , page_width(0)
+ , page_height(0)
{ }
void HTMLTextPage::dump_text(ostream & out)
@@ -25,8 +28,42 @@ void HTMLTextPage::dump_text(ostream & out)
(*iter)->prepare();
if(param.optimize_text)
optimize();
- for(auto iter = text_lines.begin(); iter != text_lines.end(); ++iter)
- (*iter)->dump_text(out);
+
+ //push a dummy entry for convenience
+ clip_boxes.emplace_back(0, 0, page_width, page_height, text_lines.size());
+
+ ClipBox cur_cb(0, 0, page_width, page_height, 0);
+ bool has_clip = false;
+
+ auto text_line_iter = text_lines.begin();
+ for(auto clip_iter = clip_boxes.begin(); clip_iter != clip_boxes.end(); ++clip_iter)
+ {
+ if(has_clip)
+ {
+ out << "";
+ }
+
+ auto next_text_line_iter = text_lines.begin() + clip_iter->start_idx;
+ while(text_line_iter != next_text_line_iter)
+ {
+ (*text_line_iter)->clip(cur_cb.x1, cur_cb.y1, cur_cb.x2, cur_cb.y2);
+ (*text_line_iter)->dump_text(out);
+ ++text_line_iter;
+ }
+ if(has_clip)
+ {
+ out << "
";
+ }
+
+ cur_cb = *clip_iter;
+ has_clip = !(equal(0, cur_cb.x1) && equal(0, cur_cb.y1)
+ && equal(page_width, cur_cb.x2) && equal(page_height, cur_cb.y2));
+ }
}
void HTMLTextPage::dump_css(ostream & out)
@@ -37,6 +74,7 @@ void HTMLTextPage::dump_css(ostream & out)
void HTMLTextPage::clear(void)
{
text_lines.clear();
+ clip_boxes.clear();
cur_line = nullptr;
}
@@ -50,6 +88,40 @@ void HTMLTextPage::open_new_line(const HTMLLineState & line_state)
cur_line = text_lines.back().get();
}
+void HTMLTextPage::set_page_size(double width, double height)
+{
+ page_width = width;
+ page_height = height;
+}
+
+void HTMLTextPage::clip(double x1, double y1, double x2, double y2)
+{
+ if(!clip_boxes.empty())
+ {
+ auto & cb = clip_boxes.back();
+ if(cb.start_idx == text_lines.size())
+ {
+ /*
+ * Previous ClipBox is not used
+ */
+ cb.x1 = x1;
+ cb.y1 = y1;
+ cb.x2 = x2;
+ cb.y2 = y2;
+ return;
+ }
+ if(equal(cb.x1, x1) && equal(cb.y1, y1)
+ && equal(cb.x2, x2) && equal(cb.y2, y2))
+ {
+ /*
+ * same as previous ClipBox
+ */
+ return;
+ }
+ }
+ clip_boxes.emplace_back(x1, y1, x2, y2, text_lines.size());
+}
+
void HTMLTextPage::optimize(void)
{
//TODO
diff --git a/src/HTMLTextPage.h b/src/HTMLTextPage.h
index 5e6683e..2125519 100644
--- a/src/HTMLTextPage.h
+++ b/src/HTMLTextPage.h
@@ -35,6 +35,10 @@ public:
void clear(void);
void open_new_line(const HTMLLineState & line_state);
+
+ /* for clipping */
+ void set_page_size(double width, double height);
+ void clip(double x1, double y1, double x2, double y2);
private:
void optimize(void);
@@ -42,7 +46,18 @@ private:
const Param & param;
AllStateManager & all_manager;
HTMLTextLine * cur_line;
+ double page_width, page_height;
+
std::vector> text_lines;
+
+ struct ClipBox {
+ ClipBox(double x1, double y1, double x2, double y2, size_t start_idx)
+ :x1(x1),y1(y1),x2(x2),y2(y2),start_idx(start_idx)
+ { }
+ double x1, y1, x2, y2;
+ size_t start_idx;
+ };
+ std::vector clip_boxes;
};
} //namespace pdf2htmlEX
diff --git a/src/css_class_names.cmakelists.txt b/src/css_class_names.cmakelists.txt
index 6b604e6..18217f5 100644
--- a/src/css_class_names.cmakelists.txt
+++ b/src/css_class_names.cmakelists.txt
@@ -9,6 +9,7 @@ set(CSS_INVALID_ID "_")
set(CSS_LINE_CN "t") # Text
set(CSS_TRANSFORM_MATRIX_CN "m") # Matrix
+set(CSS_CLIP_CN "c") # Clip
set(CSS_PAGE_DECORATION_CN "pd") # Page Decoration
set(CSS_PAGE_FRAME_CN "pf") # Page Frame
diff --git a/src/util/css_const.h.in b/src/util/css_const.h.in
index 1e9f1aa..edc4f0f 100644
--- a/src/util/css_const.h.in
+++ b/src/util/css_const.h.in
@@ -26,6 +26,7 @@ const char * const INVALID_ID = "@CSS_INVALID_ID@";
const char * const LINE_CN = "@CSS_LINE_CN@";
const char * const TRANSFORM_MATRIX_CN = "@CSS_TRANSFORM_MATRIX_CN@";
+const char * const CLIP_CN = "@CSS_CLIP_CN@";
// page_decoration is for shadow etc
// page_frame cannot have margin or border-width, pdf2htmlEX.js will use it to determine the coordinates