1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-07-05 01:28:39 +00:00

Handle texts covered by paths; concentrate drawing tracing codes to DrawingTracer calss; disable CSS drawing.

This commit is contained in:
Duan Yao 2014-06-15 13:35:24 +08:00
parent ce28c00a49
commit bd3f165ae2
9 changed files with 354 additions and 72 deletions

View File

@ -163,6 +163,8 @@ set(PDF2HTMLEX_SRC ${PDF2HTMLEX_SRC}
src/Color.cc
src/CoveredTextHandler.h
src/CoveredTextHandler.cc
src/DrawingTracer.h
src/DrawingTracer.cc
src/HTMLState.h
src/HTMLTextLine.h
src/HTMLTextLine.cc

218
src/DrawingTracer.cc Normal file
View File

@ -0,0 +1,218 @@
/*
* DrawingTracer.cc
*
* Created on: 2014-6-15
* Author: duanyao
*/
#include "GfxFont.h"
#include "util/math.h"
#include "DrawingTracer.h"
namespace pdf2htmlEX
{
DrawingTracer::DrawingTracer(const Param & param):param(param)
{
}
DrawingTracer::~DrawingTracer()
{
finish();
}
void DrawingTracer::reset(GfxState *state)
{
if (!param.process_covered_text)
return;
finish();
cairo_rectangle_t page_box {0, 0, width:state->getPageWidth(), height:state->getPageHeight()};
cairo_surface_t * surface = cairo_recording_surface_create(CAIRO_CONTENT_COLOR_ALPHA, &page_box);
cairo = cairo_create(surface);
}
void DrawingTracer::finish()
{
if (cairo)
{
cairo_destroy(cairo);
cairo = nullptr;
}
}
void DrawingTracer::set_ctm(GfxState *state)
{
if (!param.process_covered_text)
return;
double * ctm = state->getCTM();
cairo_matrix_t matrix;
matrix.xx = ctm[0];
matrix.yx = ctm[1];
matrix.xy = ctm[2];
matrix.yy = ctm[3];
matrix.x0 = ctm[4];
matrix.y0 = ctm[5];
cairo_set_matrix (cairo, &matrix);
}
void DrawingTracer::clip(GfxState * state, bool even_odd)
{
if (!param.process_covered_text)
return;
do_path (state, state->getPath());
cairo_set_fill_rule (cairo, even_odd? CAIRO_FILL_RULE_EVEN_ODD : CAIRO_FILL_RULE_WINDING);
cairo_clip (cairo);
}
void DrawingTracer::clip_to_stroke_path(GfxState * state)
{
if (!param.process_covered_text)
return;
// TODO cairo_stroke_to_path() ?
}
void DrawingTracer::save()
{
if (!param.process_covered_text)
return;
cairo_save(cairo);
}
void DrawingTracer::restore()
{
if (!param.process_covered_text)
return;
cairo_restore(cairo);
}
void DrawingTracer::do_path(GfxState * state, GfxPath * path)
{
//copy from CairoOutputDev::doPath
GfxSubpath *subpath;
int i, j;
double x, y;
cairo_new_path (cairo);
for (i = 0; i < path->getNumSubpaths(); ++i) {
subpath = path->getSubpath(i);
if (subpath->getNumPoints() > 0) {
x = subpath->getX(0);
y = subpath->getY(0);
cairo_move_to (cairo, x, y);
j = 1;
while (j < subpath->getNumPoints()) {
if (subpath->getCurve(j)) {
x = subpath->getX(j+2);
y = subpath->getY(j+2);
cairo_curve_to(cairo,
subpath->getX(j), subpath->getY(j),
subpath->getX(j+1), subpath->getY(j+1),
x, y);
j += 3;
} else {
x = subpath->getX(j);
y = subpath->getY(j);
cairo_line_to (cairo, x, y);
++j;
}
}
if (subpath->isClosed()) {
cairo_close_path (cairo);
}
}
}
}
void DrawingTracer::stroke(GfxState * state)
{
if (!param.process_covered_text)
return;
// TODO
// 1. if stroke extents is large, break the path into pieces and handle each of them;
// 2. if the line width is small, could just ignore the path?
do_path(state, state->getPath());
cairo_set_line_width(cairo, state->getLineWidth());
double sbox[4];
cairo_stroke_extents(cairo, sbox, sbox + 1, sbox + 2, sbox + 3);
draw_non_char_bbox(state, sbox);
}
void DrawingTracer::fill(GfxState * state, bool even_odd)
{
if (!param.process_covered_text)
return;
do_path(state, state->getPath());
//cairo_fill_extents don't take fill rule into account.
//cairo_set_fill_rule (cairo, even_odd? CAIRO_FILL_RULE_EVEN_ODD : CAIRO_FILL_RULE_WINDING);
double fbox[4];
cairo_fill_extents(cairo, fbox, fbox + 1, fbox + 2, fbox + 3);
draw_non_char_bbox(state, fbox);
}
void DrawingTracer::draw_non_char_bbox(GfxState * state, double * bbox)
{
double cbox[4], result[4];
cairo_clip_extents(cairo, cbox, cbox + 1, cbox + 2, cbox + 3);
// TODO intersect
tm_transform_bbox(state->getCTM(), bbox);
if (on_non_char_drawn)
on_non_char_drawn(bbox);
}
void DrawingTracer::draw_char_bbox(GfxState * state, double * bbox)
{
double cbox[4], result[4];
cairo_clip_extents(cairo, cbox, cbox + 1, cbox + 2, cbox + 3);
// TODO intersect
tm_transform_bbox(state->getCTM(), bbox);
if (on_char_drawn)
on_char_drawn(bbox);
}
void DrawingTracer::draw_image(GfxState *state)
{
if (!param.process_covered_text)
return;
double bbox[4] {0, 0, 1, 1};
draw_non_char_bbox(state, bbox);
}
void DrawingTracer::draw_char(GfxState *state, double x, double y, double ax, double ay)
{
if (!param.process_covered_text)
return;
Matrix tm, itm;
//memcpy(tm_ctm.m, this->cur_text_tm, sizeof(tm_ctm.m));
memcpy(tm.m, state->getTextMat(), sizeof(tm.m));
double fs = state->getFontSize();
double cx = state->getCurX(), cy = state->getCurY(),
ry = state->getRise(), h = state->getHorizScaling();
//cx and cy has been transformed by text matrix, we need to reverse them.
tm.invertTo(&itm);
double char_cx, char_cy;
itm.transform(cx, cy, &char_cx, &char_cy);
//TODO Vertical? Currently vertical/type3 chars are treated as non-chars.
double tchar[6] {fs * h, 0, 0, fs, char_cx + x, char_cy + y + ry};
double tfinal[6];
tm_multiply(tfinal, tm.m, tchar);
auto font = state->getFont();
double bbox[4] {0, 0, ax, ay};
double desc = font->getDescent(), asc = font->getAscent();
if (font->getWMode() == 0)
{
bbox[1] += desc;
bbox[3] += asc;
}
else
{//TODO Vertical?
}
tm_transform_bbox(tfinal, bbox);
draw_char_bbox(state, bbox);
}
} /* namespace pdf2htmlEX */

66
src/DrawingTracer.h Normal file
View File

@ -0,0 +1,66 @@
/*
* DrawingTracer.h
*
* Created on: 2014-6-15
* Author: duanyao
*/
#ifndef DRAWINGTRACER_H__
#define DRAWINGTRACER_H__
#include <functional>
#include <GfxState.h>
#include <cairo.h>
#include "Param.h"
namespace pdf2htmlEX
{
class DrawingTracer
{
public:
/*
* The callback to receive drawn event.
* bbox in device space.
*/
std::function<void(double * bbox)> on_non_char_drawn;
std::function<void(double * bbox)> on_char_drawn;
std::function<void(double * bbox)> on_char_clipped;
DrawingTracer(const Param & param);
virtual ~DrawingTracer();
void reset(GfxState * state);
/*
* A character is drawing
* x, y: glyph-drawing position, in PDF text object space.
* ax, ay: glyph advance, in glyph space.
*/
void draw_char(GfxState * state, double x, double y, double ax, double ay);
/*
* An image is drawing
*/
void draw_image(GfxState * state);
void set_ctm(GfxState * state);
void clip(GfxState * state, bool even_odd = false);
void clip_to_stroke_path(GfxState * state);
void fill(GfxState * state, bool even_odd = false);
void stroke(GfxState * state);
void save();
void restore();
private:
void finish();
// Following methods operate in user space (just before CTM is applied)
void do_path(GfxState * state, GfxPath * path);
void draw_non_char_bbox(GfxState * state, double * bbox);
void draw_char_bbox(GfxState * state, double * bbox);
const Param & param;
cairo_t * cairo = nullptr;
};
} /* namespace pdf2htmlEX */
#endif /* DRAWINGTRACER_H__ */

View File

@ -20,6 +20,8 @@
#include <GfxFont.h>
#include <Annot.h>
#include <cairo.h>
#include "pdf2htmlEX-config.h"
#include "Param.h"
@ -32,10 +34,12 @@
#include "BackgroundRenderer/BackgroundRenderer.h"
#include "CoveredTextHandler.h"
#include "DrawingTracer.h"
#include "util/const.h"
#include "util/misc.h"
namespace pdf2htmlEX {
class HTMLRenderer : public OutputDev
@ -90,7 +94,9 @@ public:
* We just mark as changed, and recheck if they have been changed when we are about to output a new string
*/
virtual void restoreState(GfxState * state) { updateAll(state); }
virtual void restoreState(GfxState * state);
virtual void saveState(GfxState *state);
virtual void updateAll(GfxState * state);
@ -135,15 +141,16 @@ public:
GfxImageColorMap *maskColorMap,
GBool maskInterpolate);
virtual void stroke(GfxState *state) { css_do_path(state, false); }
virtual void fill(GfxState *state) { css_do_path(state, true); }
virtual void stroke(GfxState *state); ////{ css_do_path(state, false); }
virtual void fill(GfxState *state); ////{ css_do_path(state, true); }
virtual void eoFill(GfxState *state);
virtual GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax);
virtual void processLink(AnnotLink * al);
/* capacity test */
bool can_stroke(GfxState *state) { return css_do_path(state, false, true); }
bool can_fill(GfxState *state) { return css_do_path(state, true, true); }
bool can_stroke(GfxState *state) { return false; } ////{ return css_do_path(state, false, true); }
bool can_fill(GfxState *state) { return false; } ////{ return css_do_path(state, true, true); }
const std::vector<bool> & get_chars_covered() { return covered_text_handler.get_chars_covered(); }
@ -207,6 +214,7 @@ protected:
// make sure the current HTML style consistent with PDF
void prepare_text_line(GfxState * state);
#if 0 //disable CSS drawing
////////////////////////////////////////////////////
// CSS drawing
////////////////////////////////////////////////////
@ -226,20 +234,8 @@ protected:
double * line_width_array, int line_width_count,
const GfxRGB * line_color, const GfxRGB * fill_color,
void (*style_function)(void *, std::ostream &) = nullptr, void * style_function_data = nullptr );
#endif //disable CSS drawing
////////////////////////////////////////////////////
// Covered text handling
////////////////////////////////////////////////////
/*
* Cue CoveredTextHandler that a character is drawn
* x, y: glyph-drawing position, in PDF text object space.
* ax, ay: glyph advance, in glyph space.
*/
void add_char_bbox(GfxState *state, double x, double y, double ax, double ay);
/*
* Cue CoveredTextHandler that an image is drawn
*/
void add_image_bbox(GfxState *state);
////////////////////////////////////////////////////
// PDF stuffs
@ -365,6 +361,7 @@ protected:
static const std::string MANIFEST_FILENAME;
CoveredTextHandler covered_text_handler;
DrawingTracer tracer;
};
} //namespace pdf2htmlEX

View File

@ -30,6 +30,38 @@ using std::sqrt;
using std::vector;
using std::ostream;
void HTMLRenderer::restoreState(GfxState * state)
{
updateAll(state); tracer.restore();
}
void HTMLRenderer::saveState(GfxState *state)
{
tracer.save();
}
void HTMLRenderer::stroke(GfxState * state)
{
tracer.stroke(state);
}
void HTMLRenderer::fill(GfxState * state)
{
tracer.fill(state);
}
void HTMLRenderer::eoFill(GfxState * state)
{
tracer.fill(state, true);
}
GBool HTMLRenderer::axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax)
{
tracer.fill(state); //TODO correct?
return true;
}
#if 0 //disable css drawing
static bool is_horizontal_line(GfxSubpath * path)
{
return ((path->getNumPoints() == 2)
@ -415,6 +447,7 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, co
(*f_curpage) << "\"></div>";
}
#endif //disable css drawing
} // namespace pdf2htmlEX

View File

@ -11,6 +11,7 @@
#include <cmath>
#include <algorithm>
#include <vector>
#include <functional>
#include <GlobalParams.h>
@ -46,6 +47,7 @@ HTMLRenderer::HTMLRenderer(const Param & param)
,html_text_page(param, all_manager)
,preprocessor(param)
,tmp_files(param)
,tracer(param)
{
if(!(param.debug))
{
@ -76,6 +78,13 @@ HTMLRenderer::HTMLRenderer(const Param & param)
all_manager.height .set_eps(EPS);
all_manager.width .set_eps(EPS);
all_manager.bottom .set_eps(EPS);
tracer.on_char_drawn =
[this](double * box) { covered_text_handler.add_char_bbox(box); };
tracer.on_char_clipped =
[this](double * box) { covered_text_handler.add_char_bbox(box); }; //TODO
tracer.on_non_char_drawn =
[this](double * box) { covered_text_handler.add_non_char_bbox(box); };
}
HTMLRenderer::~HTMLRenderer()
@ -136,7 +145,6 @@ void HTMLRenderer::process(PDFDoc *doc)
// We handle covered texts during doc->displayPage(this...),
// and bg_renderer->render_page() depends on the result, so it must be called after
// doc->displayPage(this...).
covered_text_handler.reset();
doc->displayPage(this, i,
text_zoom_factor() * DEFAULT_DPI, text_zoom_factor() * DEFAULT_DPI,
0,
@ -194,6 +202,9 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state)
void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
#endif
{
covered_text_handler.reset();
tracer.reset(state);
this->pageNum = pageNum;
double pageWidth = state->getPageWidth();

View File

@ -14,7 +14,7 @@ namespace pdf2htmlEX {
void HTMLRenderer::drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg)
{
add_image_bbox(state);
tracer.draw_image(state);
return OutputDev::drawImage(state,ref,str,width,height,colorMap,interpolate,maskColors,inlineImg);
@ -73,21 +73,11 @@ void HTMLRenderer::drawSoftMaskedImage(GfxState *state, Object *ref, Stream *str
GfxImageColorMap *maskColorMap,
GBool maskInterpolate)
{
add_image_bbox(state);
tracer.draw_image(state);
return OutputDev::drawSoftMaskedImage(state,ref,str,
return OutputDev::drawSoftMaskedImage(state,ref,str, // TODO really required?
width,height,colorMap,interpolate,
maskStr, maskWidth, maskHeight, maskColorMap, maskInterpolate);
}
void HTMLRenderer::add_image_bbox(GfxState *state)
{
if (!param.process_covered_text)
return;
auto ctm = state->getCTM();
double bbox[4] {0, 0, 1, 1};
tm_transform_bbox(ctm, bbox);
covered_text_handler.add_non_char_bbox(bbox);
}
} // namespace pdf2htmlEX

View File

@ -46,6 +46,7 @@ void HTMLRenderer::updateFont(GfxState * state)
void HTMLRenderer::updateCTM(GfxState * state, double m11, double m12, double m21, double m22, double m31, double m32)
{
ctm_changed = true;
tracer.set_ctm(state);
}
void HTMLRenderer::updateTextMat(GfxState * state)
{
@ -89,14 +90,17 @@ void HTMLRenderer::updateStrokeColor(GfxState * state)
void HTMLRenderer::clip(GfxState * state)
{
clip_changed = true;
tracer.clip(state);
}
void HTMLRenderer::eoClip(GfxState * state)
{
clip_changed = true;
tracer.clip(state, true);
}
void HTMLRenderer::clipToStrokePath(GfxState * state)
{
clip_changed = true;
tracer.clip_to_stroke_path(state);
}
void HTMLRenderer::reset_state()
{

View File

@ -72,7 +72,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
cerr << "TODO: non-zero origins" << endl;
}
add_char_bbox(state, dx, dy, dx1, dy1);
tracer.draw_char(state, dx, dy, dx1, dy1); //TODO dx dy seems not correct?
bool is_space = false;
if (n == 1 && *p == ' ')
@ -145,43 +145,4 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
draw_ty += dy;
}
void HTMLRenderer::add_char_bbox(GfxState *state, double x, double y, double ax, double ay)
{
if (!param.process_covered_text)
return;
Matrix tm_ctm, tm, itm;
memcpy(tm_ctm.m, this->cur_text_tm, sizeof(tm_ctm.m));
memcpy(tm.m, state->getTextMat(), sizeof(tm.m));
double fs = state->getFontSize();
double cx = state->getCurX(), cy = state->getCurY(),
ry = state->getRise(), h = state->getHorizScaling();
//cx and cy has been transformed by text matrix, we need to reverse them.
tm.invertTo(&itm);
double char_cx, char_cy;
itm.transform(cx, cy, &char_cx, &char_cy);
//TODO Vertical? Currently vertical/type3 chars are treated as non-chars.
double tchar[6] {fs * h, 0, 0, fs, char_cx + x, char_cy + y + ry};
double tfinal[6];
tm_multiply(tfinal, tm_ctm.m, tchar);
auto font = state->getFont();
double bbox[4] {0, 0, ax, ay};
double desc = font->getDescent(), asc = font->getAscent();
if (font->getWMode() == 0)
{
bbox[1] += desc;
bbox[3] += asc;
}
else
{//TODO Vertical?
}
tm_transform_bbox(tfinal, bbox);
covered_text_handler.add_char_bbox(bbox);
}
} // namespace pdf2htmlEX