diff --git a/CMakeLists.txt b/CMakeLists.txt index bb47b10..8d75405 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -163,6 +163,8 @@ set(PDF2HTMLEX_SRC ${PDF2HTMLEX_SRC} src/Color.cc src/CoveredTextHandler.h src/CoveredTextHandler.cc + src/DrawingTracer.h + src/DrawingTracer.cc src/HTMLState.h src/HTMLTextLine.h src/HTMLTextLine.cc diff --git a/src/DrawingTracer.cc b/src/DrawingTracer.cc new file mode 100644 index 0000000..15b9f20 --- /dev/null +++ b/src/DrawingTracer.cc @@ -0,0 +1,218 @@ +/* + * DrawingTracer.cc + * + * Created on: 2014-6-15 + * Author: duanyao + */ + +#include "GfxFont.h" + +#include "util/math.h" +#include "DrawingTracer.h" + +namespace pdf2htmlEX +{ + +DrawingTracer::DrawingTracer(const Param & param):param(param) +{ +} + +DrawingTracer::~DrawingTracer() +{ + finish(); +} + +void DrawingTracer::reset(GfxState *state) +{ + if (!param.process_covered_text) + return; + finish(); + cairo_rectangle_t page_box {0, 0, width:state->getPageWidth(), height:state->getPageHeight()}; + cairo_surface_t * surface = cairo_recording_surface_create(CAIRO_CONTENT_COLOR_ALPHA, &page_box); + cairo = cairo_create(surface); +} + +void DrawingTracer::finish() +{ + if (cairo) + { + cairo_destroy(cairo); + cairo = nullptr; + } +} + +void DrawingTracer::set_ctm(GfxState *state) +{ + if (!param.process_covered_text) + return; + double * ctm = state->getCTM(); + cairo_matrix_t matrix; + matrix.xx = ctm[0]; + matrix.yx = ctm[1]; + matrix.xy = ctm[2]; + matrix.yy = ctm[3]; + matrix.x0 = ctm[4]; + matrix.y0 = ctm[5]; + cairo_set_matrix (cairo, &matrix); +} + +void DrawingTracer::clip(GfxState * state, bool even_odd) +{ + if (!param.process_covered_text) + return; + do_path (state, state->getPath()); + cairo_set_fill_rule (cairo, even_odd? CAIRO_FILL_RULE_EVEN_ODD : CAIRO_FILL_RULE_WINDING); + cairo_clip (cairo); +} + +void DrawingTracer::clip_to_stroke_path(GfxState * state) +{ + if (!param.process_covered_text) + return; + // TODO cairo_stroke_to_path() ? +} + +void DrawingTracer::save() +{ + if (!param.process_covered_text) + return; + cairo_save(cairo); +} +void DrawingTracer::restore() +{ + if (!param.process_covered_text) + return; + cairo_restore(cairo); +} + +void DrawingTracer::do_path(GfxState * state, GfxPath * path) +{ + //copy from CairoOutputDev::doPath + GfxSubpath *subpath; + int i, j; + double x, y; + cairo_new_path (cairo); + for (i = 0; i < path->getNumSubpaths(); ++i) { + subpath = path->getSubpath(i); + if (subpath->getNumPoints() > 0) { + x = subpath->getX(0); + y = subpath->getY(0); + cairo_move_to (cairo, x, y); + j = 1; + while (j < subpath->getNumPoints()) { + if (subpath->getCurve(j)) { + x = subpath->getX(j+2); + y = subpath->getY(j+2); + cairo_curve_to(cairo, + subpath->getX(j), subpath->getY(j), + subpath->getX(j+1), subpath->getY(j+1), + x, y); + j += 3; + } else { + x = subpath->getX(j); + y = subpath->getY(j); + cairo_line_to (cairo, x, y); + ++j; + } + } + if (subpath->isClosed()) { + cairo_close_path (cairo); + } + } + } +} + +void DrawingTracer::stroke(GfxState * state) +{ + if (!param.process_covered_text) + return; + // TODO + // 1. if stroke extents is large, break the path into pieces and handle each of them; + // 2. if the line width is small, could just ignore the path? + do_path(state, state->getPath()); + cairo_set_line_width(cairo, state->getLineWidth()); + double sbox[4]; + cairo_stroke_extents(cairo, sbox, sbox + 1, sbox + 2, sbox + 3); + draw_non_char_bbox(state, sbox); +} + +void DrawingTracer::fill(GfxState * state, bool even_odd) +{ + if (!param.process_covered_text) + return; + do_path(state, state->getPath()); + //cairo_fill_extents don't take fill rule into account. + //cairo_set_fill_rule (cairo, even_odd? CAIRO_FILL_RULE_EVEN_ODD : CAIRO_FILL_RULE_WINDING); + double fbox[4]; + cairo_fill_extents(cairo, fbox, fbox + 1, fbox + 2, fbox + 3); + draw_non_char_bbox(state, fbox); +} + +void DrawingTracer::draw_non_char_bbox(GfxState * state, double * bbox) +{ + double cbox[4], result[4]; + cairo_clip_extents(cairo, cbox, cbox + 1, cbox + 2, cbox + 3); + // TODO intersect + tm_transform_bbox(state->getCTM(), bbox); + if (on_non_char_drawn) + on_non_char_drawn(bbox); +} + +void DrawingTracer::draw_char_bbox(GfxState * state, double * bbox) +{ + double cbox[4], result[4]; + cairo_clip_extents(cairo, cbox, cbox + 1, cbox + 2, cbox + 3); + // TODO intersect + tm_transform_bbox(state->getCTM(), bbox); + if (on_char_drawn) + on_char_drawn(bbox); +} + +void DrawingTracer::draw_image(GfxState *state) +{ + if (!param.process_covered_text) + return; + double bbox[4] {0, 0, 1, 1}; + draw_non_char_bbox(state, bbox); +} + +void DrawingTracer::draw_char(GfxState *state, double x, double y, double ax, double ay) +{ + if (!param.process_covered_text) + return; + + Matrix tm, itm; + //memcpy(tm_ctm.m, this->cur_text_tm, sizeof(tm_ctm.m)); + memcpy(tm.m, state->getTextMat(), sizeof(tm.m)); + double fs = state->getFontSize(); + + double cx = state->getCurX(), cy = state->getCurY(), + ry = state->getRise(), h = state->getHorizScaling(); + + //cx and cy has been transformed by text matrix, we need to reverse them. + tm.invertTo(&itm); + double char_cx, char_cy; + itm.transform(cx, cy, &char_cx, &char_cy); + + //TODO Vertical? Currently vertical/type3 chars are treated as non-chars. + double tchar[6] {fs * h, 0, 0, fs, char_cx + x, char_cy + y + ry}; + + double tfinal[6]; + tm_multiply(tfinal, tm.m, tchar); + + auto font = state->getFont(); + double bbox[4] {0, 0, ax, ay}; + double desc = font->getDescent(), asc = font->getAscent(); + if (font->getWMode() == 0) + { + bbox[1] += desc; + bbox[3] += asc; + } + else + {//TODO Vertical? + } + tm_transform_bbox(tfinal, bbox); + draw_char_bbox(state, bbox); +} + +} /* namespace pdf2htmlEX */ diff --git a/src/DrawingTracer.h b/src/DrawingTracer.h new file mode 100644 index 0000000..81fd4b0 --- /dev/null +++ b/src/DrawingTracer.h @@ -0,0 +1,66 @@ +/* + * DrawingTracer.h + * + * Created on: 2014-6-15 + * Author: duanyao + */ + +#ifndef DRAWINGTRACER_H__ +#define DRAWINGTRACER_H__ + +#include + +#include +#include + +#include "Param.h" + +namespace pdf2htmlEX +{ + +class DrawingTracer +{ +public: + /* + * The callback to receive drawn event. + * bbox in device space. + */ + std::function on_non_char_drawn; + std::function on_char_drawn; + std::function on_char_clipped; + + DrawingTracer(const Param & param); + virtual ~DrawingTracer(); + void reset(GfxState * state); + + /* + * A character is drawing + * x, y: glyph-drawing position, in PDF text object space. + * ax, ay: glyph advance, in glyph space. + */ + void draw_char(GfxState * state, double x, double y, double ax, double ay); + /* + * An image is drawing + */ + void draw_image(GfxState * state); + void set_ctm(GfxState * state); + void clip(GfxState * state, bool even_odd = false); + void clip_to_stroke_path(GfxState * state); + void fill(GfxState * state, bool even_odd = false); + void stroke(GfxState * state); + void save(); + void restore(); + +private: + void finish(); + // Following methods operate in user space (just before CTM is applied) + void do_path(GfxState * state, GfxPath * path); + void draw_non_char_bbox(GfxState * state, double * bbox); + void draw_char_bbox(GfxState * state, double * bbox); + + const Param & param; + cairo_t * cairo = nullptr; +}; + +} /* namespace pdf2htmlEX */ +#endif /* DRAWINGTRACER_H__ */ diff --git a/src/HTMLRenderer/HTMLRenderer.h b/src/HTMLRenderer/HTMLRenderer.h index 75293f8..8aa0a0f 100644 --- a/src/HTMLRenderer/HTMLRenderer.h +++ b/src/HTMLRenderer/HTMLRenderer.h @@ -20,6 +20,8 @@ #include #include +#include + #include "pdf2htmlEX-config.h" #include "Param.h" @@ -32,10 +34,12 @@ #include "BackgroundRenderer/BackgroundRenderer.h" #include "CoveredTextHandler.h" +#include "DrawingTracer.h" #include "util/const.h" #include "util/misc.h" + namespace pdf2htmlEX { class HTMLRenderer : public OutputDev @@ -90,7 +94,9 @@ public: * We just mark as changed, and recheck if they have been changed when we are about to output a new string */ - virtual void restoreState(GfxState * state) { updateAll(state); } + virtual void restoreState(GfxState * state); + + virtual void saveState(GfxState *state); virtual void updateAll(GfxState * state); @@ -135,15 +141,16 @@ public: GfxImageColorMap *maskColorMap, GBool maskInterpolate); - virtual void stroke(GfxState *state) { css_do_path(state, false); } - virtual void fill(GfxState *state) { css_do_path(state, true); } + virtual void stroke(GfxState *state); ////{ css_do_path(state, false); } + virtual void fill(GfxState *state); ////{ css_do_path(state, true); } + virtual void eoFill(GfxState *state); virtual GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax); virtual void processLink(AnnotLink * al); /* capacity test */ - bool can_stroke(GfxState *state) { return css_do_path(state, false, true); } - bool can_fill(GfxState *state) { return css_do_path(state, true, true); } + bool can_stroke(GfxState *state) { return false; } ////{ return css_do_path(state, false, true); } + bool can_fill(GfxState *state) { return false; } ////{ return css_do_path(state, true, true); } const std::vector & get_chars_covered() { return covered_text_handler.get_chars_covered(); } @@ -207,6 +214,7 @@ protected: // make sure the current HTML style consistent with PDF void prepare_text_line(GfxState * state); +#if 0 //disable CSS drawing //////////////////////////////////////////////////// // CSS drawing //////////////////////////////////////////////////// @@ -226,20 +234,8 @@ protected: double * line_width_array, int line_width_count, const GfxRGB * line_color, const GfxRGB * fill_color, void (*style_function)(void *, std::ostream &) = nullptr, void * style_function_data = nullptr ); +#endif //disable CSS drawing - //////////////////////////////////////////////////// - // Covered text handling - //////////////////////////////////////////////////// - /* - * Cue CoveredTextHandler that a character is drawn - * x, y: glyph-drawing position, in PDF text object space. - * ax, ay: glyph advance, in glyph space. - */ - void add_char_bbox(GfxState *state, double x, double y, double ax, double ay); - /* - * Cue CoveredTextHandler that an image is drawn - */ - void add_image_bbox(GfxState *state); //////////////////////////////////////////////////// // PDF stuffs @@ -365,6 +361,7 @@ protected: static const std::string MANIFEST_FILENAME; CoveredTextHandler covered_text_handler; + DrawingTracer tracer; }; } //namespace pdf2htmlEX diff --git a/src/HTMLRenderer/draw.cc b/src/HTMLRenderer/draw.cc index 7a84b1a..b87c897 100644 --- a/src/HTMLRenderer/draw.cc +++ b/src/HTMLRenderer/draw.cc @@ -30,6 +30,38 @@ using std::sqrt; using std::vector; using std::ostream; +void HTMLRenderer::restoreState(GfxState * state) +{ + updateAll(state); tracer.restore(); +} + +void HTMLRenderer::saveState(GfxState *state) +{ + tracer.save(); +} + +void HTMLRenderer::stroke(GfxState * state) +{ + tracer.stroke(state); +} + +void HTMLRenderer::fill(GfxState * state) +{ + tracer.fill(state); +} + +void HTMLRenderer::eoFill(GfxState * state) +{ + tracer.fill(state, true); +} + +GBool HTMLRenderer::axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax) +{ + tracer.fill(state); //TODO correct? + return true; +} + +#if 0 //disable css drawing static bool is_horizontal_line(GfxSubpath * path) { return ((path->getNumPoints() == 2) @@ -415,6 +447,7 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, co (*f_curpage) << "\">"; } +#endif //disable css drawing } // namespace pdf2htmlEX diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index 3d43ede..d784f3b 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -46,6 +47,7 @@ HTMLRenderer::HTMLRenderer(const Param & param) ,html_text_page(param, all_manager) ,preprocessor(param) ,tmp_files(param) + ,tracer(param) { if(!(param.debug)) { @@ -76,6 +78,13 @@ HTMLRenderer::HTMLRenderer(const Param & param) all_manager.height .set_eps(EPS); all_manager.width .set_eps(EPS); all_manager.bottom .set_eps(EPS); + + tracer.on_char_drawn = + [this](double * box) { covered_text_handler.add_char_bbox(box); }; + tracer.on_char_clipped = + [this](double * box) { covered_text_handler.add_char_bbox(box); }; //TODO + tracer.on_non_char_drawn = + [this](double * box) { covered_text_handler.add_non_char_bbox(box); }; } HTMLRenderer::~HTMLRenderer() @@ -136,7 +145,6 @@ void HTMLRenderer::process(PDFDoc *doc) // We handle covered texts during doc->displayPage(this...), // and bg_renderer->render_page() depends on the result, so it must be called after // doc->displayPage(this...). - covered_text_handler.reset(); doc->displayPage(this, i, text_zoom_factor() * DEFAULT_DPI, text_zoom_factor() * DEFAULT_DPI, 0, @@ -194,6 +202,9 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state) void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref) #endif { + covered_text_handler.reset(); + tracer.reset(state); + this->pageNum = pageNum; double pageWidth = state->getPageWidth(); diff --git a/src/HTMLRenderer/image.cc b/src/HTMLRenderer/image.cc index 3e4f8d0..91ca767 100644 --- a/src/HTMLRenderer/image.cc +++ b/src/HTMLRenderer/image.cc @@ -14,7 +14,7 @@ namespace pdf2htmlEX { void HTMLRenderer::drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg) { - add_image_bbox(state); + tracer.draw_image(state); return OutputDev::drawImage(state,ref,str,width,height,colorMap,interpolate,maskColors,inlineImg); @@ -73,21 +73,11 @@ void HTMLRenderer::drawSoftMaskedImage(GfxState *state, Object *ref, Stream *str GfxImageColorMap *maskColorMap, GBool maskInterpolate) { - add_image_bbox(state); + tracer.draw_image(state); - return OutputDev::drawSoftMaskedImage(state,ref,str, + return OutputDev::drawSoftMaskedImage(state,ref,str, // TODO really required? width,height,colorMap,interpolate, maskStr, maskWidth, maskHeight, maskColorMap, maskInterpolate); } -void HTMLRenderer::add_image_bbox(GfxState *state) -{ - if (!param.process_covered_text) - return; - auto ctm = state->getCTM(); - double bbox[4] {0, 0, 1, 1}; - tm_transform_bbox(ctm, bbox); - covered_text_handler.add_non_char_bbox(bbox); -} - } // namespace pdf2htmlEX diff --git a/src/HTMLRenderer/state.cc b/src/HTMLRenderer/state.cc index 8df9700..498655a 100644 --- a/src/HTMLRenderer/state.cc +++ b/src/HTMLRenderer/state.cc @@ -46,6 +46,7 @@ void HTMLRenderer::updateFont(GfxState * state) void HTMLRenderer::updateCTM(GfxState * state, double m11, double m12, double m21, double m22, double m31, double m32) { ctm_changed = true; + tracer.set_ctm(state); } void HTMLRenderer::updateTextMat(GfxState * state) { @@ -89,14 +90,17 @@ void HTMLRenderer::updateStrokeColor(GfxState * state) void HTMLRenderer::clip(GfxState * state) { clip_changed = true; + tracer.clip(state); } void HTMLRenderer::eoClip(GfxState * state) { clip_changed = true; + tracer.clip(state, true); } void HTMLRenderer::clipToStrokePath(GfxState * state) { clip_changed = true; + tracer.clip_to_stroke_path(state); } void HTMLRenderer::reset_state() { diff --git a/src/HTMLRenderer/text.cc b/src/HTMLRenderer/text.cc index 0122356..13f2065 100644 --- a/src/HTMLRenderer/text.cc +++ b/src/HTMLRenderer/text.cc @@ -72,7 +72,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s) cerr << "TODO: non-zero origins" << endl; } - add_char_bbox(state, dx, dy, dx1, dy1); + tracer.draw_char(state, dx, dy, dx1, dy1); //TODO dx dy seems not correct? bool is_space = false; if (n == 1 && *p == ' ') @@ -145,43 +145,4 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s) draw_ty += dy; } -void HTMLRenderer::add_char_bbox(GfxState *state, double x, double y, double ax, double ay) -{ - if (!param.process_covered_text) - return; - - Matrix tm_ctm, tm, itm; - memcpy(tm_ctm.m, this->cur_text_tm, sizeof(tm_ctm.m)); - memcpy(tm.m, state->getTextMat(), sizeof(tm.m)); - double fs = state->getFontSize(); - - double cx = state->getCurX(), cy = state->getCurY(), - ry = state->getRise(), h = state->getHorizScaling(); - - //cx and cy has been transformed by text matrix, we need to reverse them. - tm.invertTo(&itm); - double char_cx, char_cy; - itm.transform(cx, cy, &char_cx, &char_cy); - - //TODO Vertical? Currently vertical/type3 chars are treated as non-chars. - double tchar[6] {fs * h, 0, 0, fs, char_cx + x, char_cy + y + ry}; - - double tfinal[6]; - tm_multiply(tfinal, tm_ctm.m, tchar); - - auto font = state->getFont(); - double bbox[4] {0, 0, ax, ay}; - double desc = font->getDescent(), asc = font->getAscent(); - if (font->getWMode() == 0) - { - bbox[1] += desc; - bbox[3] += asc; - } - else - {//TODO Vertical? - } - tm_transform_bbox(tfinal, bbox); - covered_text_handler.add_char_bbox(bbox); -} - } // namespace pdf2htmlEX