mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 13:00:08 +00:00
Merge pull request #365 from duanyao/covered_text_handling
Covered text handling
This commit is contained in:
commit
80b8e1f5de
@ -161,6 +161,10 @@ set(PDF2HTMLEX_SRC ${PDF2HTMLEX_SRC}
|
|||||||
src/Base64Stream.cc
|
src/Base64Stream.cc
|
||||||
src/Color.h
|
src/Color.h
|
||||||
src/Color.cc
|
src/Color.cc
|
||||||
|
src/CoveredTextDetector.h
|
||||||
|
src/CoveredTextDetector.cc
|
||||||
|
src/DrawingTracer.h
|
||||||
|
src/DrawingTracer.cc
|
||||||
src/HTMLState.h
|
src/HTMLState.h
|
||||||
src/HTMLTextLine.h
|
src/HTMLTextLine.h
|
||||||
src/HTMLTextLine.cc
|
src/HTMLTextLine.cc
|
||||||
|
@ -242,6 +242,11 @@ If set to 0, pdf2htmlEX would try its best to balance the two methods above.
|
|||||||
.B \-\-optimize\-text <0|1> (Default: 0)
|
.B \-\-optimize\-text <0|1> (Default: 0)
|
||||||
If set to 1, pdf2htmlEX will try to reduce the number of HTML elements used for text. Turn it off if anything goes wrong.
|
If set to 1, pdf2htmlEX will try to reduce the number of HTML elements used for text. Turn it off if anything goes wrong.
|
||||||
|
|
||||||
|
.TP
|
||||||
|
.B --correct-text-visibility <0|1> (Default: 0)
|
||||||
|
If set to 1, pdf2htmlEX will try to detect texts covered by other graphics and properly arrange them,
|
||||||
|
i.e. covered texts are made transparent in text layer, and are drawn on background layer.
|
||||||
|
|
||||||
.SS Background Image
|
.SS Background Image
|
||||||
|
|
||||||
.TP
|
.TP
|
||||||
|
@ -63,6 +63,13 @@ void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y,
|
|||||||
{
|
{
|
||||||
CairoOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code,nBytes,u,uLen);
|
CairoOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code,nBytes,u,uLen);
|
||||||
}
|
}
|
||||||
|
// If a char is treated as image, it is not subject to cover test
|
||||||
|
// (see HTMLRenderer::drawString), so don't increase drawn_char_count.
|
||||||
|
else if (param.correct_text_visibility) {
|
||||||
|
if (html_renderer->is_char_covered(drawn_char_count))
|
||||||
|
CairoOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code,nBytes,u,uLen);
|
||||||
|
drawn_char_count++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CairoBackgroundRenderer::beginTextObject(GfxState *state)
|
void CairoBackgroundRenderer::beginTextObject(GfxState *state)
|
||||||
@ -104,6 +111,7 @@ static GBool annot_cb(Annot *, void * pflag) {
|
|||||||
|
|
||||||
bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
|
bool CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
|
||||||
{
|
{
|
||||||
|
drawn_char_count = 0;
|
||||||
double page_width;
|
double page_width;
|
||||||
double page_height;
|
double page_height;
|
||||||
if(param.use_cropbox)
|
if(param.use_cropbox)
|
||||||
|
@ -67,6 +67,7 @@ private:
|
|||||||
std::unordered_map<int, int> bitmaps_ref_count;
|
std::unordered_map<int, int> bitmaps_ref_count;
|
||||||
// id of bitmaps' stream used by current page
|
// id of bitmaps' stream used by current page
|
||||||
std::vector<int> bitmaps_in_current_page;
|
std::vector<int> bitmaps_in_current_page;
|
||||||
|
int drawn_char_count;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -88,6 +88,13 @@ void SplashBackgroundRenderer::drawChar(GfxState *state, double x, double y,
|
|||||||
{
|
{
|
||||||
SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code,nBytes,u,uLen);
|
SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code,nBytes,u,uLen);
|
||||||
}
|
}
|
||||||
|
// If a char is treated as image, it is not subject to cover test
|
||||||
|
// (see HTMLRenderer::drawString), so don't increase drawn_char_count.
|
||||||
|
else if (param.correct_text_visibility) {
|
||||||
|
if (html_renderer->is_char_covered(drawn_char_count))
|
||||||
|
SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code,nBytes,u,uLen);
|
||||||
|
drawn_char_count++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void SplashBackgroundRenderer::beginTextObject(GfxState *state)
|
void SplashBackgroundRenderer::beginTextObject(GfxState *state)
|
||||||
@ -129,6 +136,7 @@ static GBool annot_cb(Annot *, void * pflag) {
|
|||||||
|
|
||||||
bool SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
|
bool SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
|
||||||
{
|
{
|
||||||
|
drawn_char_count = 0;
|
||||||
bool process_annotation = param.process_annotation;
|
bool process_annotation = param.process_annotation;
|
||||||
doc->displayPage(this, pageno, param.h_dpi, param.v_dpi,
|
doc->displayPage(this, pageno, param.h_dpi, param.v_dpi,
|
||||||
0,
|
0,
|
||||||
|
@ -71,6 +71,7 @@ protected:
|
|||||||
HTMLRenderer * html_renderer;
|
HTMLRenderer * html_renderer;
|
||||||
const Param & param;
|
const Param & param;
|
||||||
std::string format;
|
std::string format;
|
||||||
|
int drawn_char_count;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace pdf2htmlEX
|
} // namespace pdf2htmlEX
|
||||||
|
51
src/CoveredTextDetector.cc
Normal file
51
src/CoveredTextDetector.cc
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
/*
|
||||||
|
* CoveredTextDetector.cc
|
||||||
|
*
|
||||||
|
* Created on: 2014-6-14
|
||||||
|
* Author: duanyao
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "CoveredTextDetector.h"
|
||||||
|
|
||||||
|
#include "util/math.h"
|
||||||
|
|
||||||
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
void CoveredTextDetector::reset()
|
||||||
|
{
|
||||||
|
char_bboxes.clear();
|
||||||
|
chars_covered.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void CoveredTextDetector::add_char_bbox(double * bbox)
|
||||||
|
{
|
||||||
|
char_bboxes.insert(char_bboxes.end(), bbox, bbox + 4);
|
||||||
|
chars_covered.push_back(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CoveredTextDetector::add_char_bbox_clipped(double * bbox, bool patially)
|
||||||
|
{
|
||||||
|
char_bboxes.insert(char_bboxes.end(), bbox, bbox + 4);
|
||||||
|
chars_covered.push_back(true);
|
||||||
|
if (patially)
|
||||||
|
add_non_char_bbox(bbox, chars_covered.size() - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CoveredTextDetector::add_non_char_bbox(double * bbox, int index)
|
||||||
|
{
|
||||||
|
if (index < 0)
|
||||||
|
index = chars_covered.size();
|
||||||
|
for (int i = 0; i < index; i++)
|
||||||
|
{
|
||||||
|
if (chars_covered[i])
|
||||||
|
continue;
|
||||||
|
double * cbbox = &char_bboxes[i * 4];
|
||||||
|
if (bbox_intersect(cbbox, bbox))
|
||||||
|
{
|
||||||
|
chars_covered[i] = true;
|
||||||
|
add_non_char_bbox(cbbox, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
61
src/CoveredTextDetector.h
Normal file
61
src/CoveredTextDetector.h
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
/*
|
||||||
|
* CoveredTextDetector.h
|
||||||
|
*
|
||||||
|
* Created on: 2014-6-14
|
||||||
|
* Author: duanyao
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef COVEREDTEXTDETECTOR_H__
|
||||||
|
#define COVEREDTEXTDETECTOR_H__
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detect characters that are covered by non-char graphics on a page.
|
||||||
|
*/
|
||||||
|
class CoveredTextDetector
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reset to initial state. Should be called when start drawing a page.
|
||||||
|
*/
|
||||||
|
void reset();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a drawn character's bounding box.
|
||||||
|
* @param bbox (x0, y0, x1, y1)
|
||||||
|
*/
|
||||||
|
void add_char_bbox(double * bbox);
|
||||||
|
|
||||||
|
void add_char_bbox_clipped(double * bbox, bool patially);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a drawn non-char graphics' bounding box.
|
||||||
|
* If it intersects any previously drawn char's bbox, the char is marked as covered
|
||||||
|
* and treated as an non-char.
|
||||||
|
* @param bbox (x0, y0, x1, y1)
|
||||||
|
* @param index this graphics' drawing order: assume it is drawn after (index-1)th
|
||||||
|
* char. -1 means after the last char.
|
||||||
|
*/
|
||||||
|
void add_non_char_bbox(double * bbox, int index = -1);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An array of flags indicating whether a char is covered by any non-char graphics.
|
||||||
|
* Index by the order that these chars are added.
|
||||||
|
* This vector grows as add_char_bbox() is called, so its size is the count
|
||||||
|
* of currently drawn chars.
|
||||||
|
*/
|
||||||
|
const std::vector<bool> & get_chars_covered() { return chars_covered; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<bool> chars_covered;
|
||||||
|
// x00, y00, x01, y01; x10, y10, x11, y11;...
|
||||||
|
std::vector<double> char_bboxes;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* COVEREDTEXTDETECTOR_H__ */
|
360
src/DrawingTracer.cc
Normal file
360
src/DrawingTracer.cc
Normal file
@ -0,0 +1,360 @@
|
|||||||
|
/*
|
||||||
|
* DrawingTracer.cc
|
||||||
|
*
|
||||||
|
* Created on: 2014-6-15
|
||||||
|
* Author: duanyao
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "GfxFont.h"
|
||||||
|
|
||||||
|
#include "util/math.h"
|
||||||
|
#include "DrawingTracer.h"
|
||||||
|
|
||||||
|
//#define DT_DEBUG(x) (x)
|
||||||
|
#define DT_DEBUG(x)
|
||||||
|
|
||||||
|
#if !ENABLE_SVG
|
||||||
|
#warning "Cairo is disabled because ENABLE_SVG is off, --correct-text-visibility has limited functionality."
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace pdf2htmlEX
|
||||||
|
{
|
||||||
|
|
||||||
|
DrawingTracer::DrawingTracer(const Param & param): param(param)
|
||||||
|
#if ENABLE_SVG
|
||||||
|
, cairo(nullptr)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
DrawingTracer::~DrawingTracer()
|
||||||
|
{
|
||||||
|
finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
void DrawingTracer::reset(GfxState *state)
|
||||||
|
{
|
||||||
|
if (!param.correct_text_visibility)
|
||||||
|
return;
|
||||||
|
finish();
|
||||||
|
|
||||||
|
#if ENABLE_SVG
|
||||||
|
cairo_rectangle_t page_box {0, 0, width:state->getPageWidth(), height:state->getPageHeight()};
|
||||||
|
cairo_surface_t * surface = cairo_recording_surface_create(CAIRO_CONTENT_COLOR_ALPHA, &page_box);
|
||||||
|
cairo = cairo_create(surface);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void DrawingTracer::finish()
|
||||||
|
{
|
||||||
|
#if ENABLE_SVG
|
||||||
|
if (cairo)
|
||||||
|
{
|
||||||
|
cairo_destroy(cairo);
|
||||||
|
cairo = nullptr;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// Poppler won't inform us its initial CTM, and the initial CTM is affected by zoom level.
|
||||||
|
// OutputDev::clip() may be called before OutputDev::updateCTM(), so we can't rely on GfxState::getCTM(),
|
||||||
|
// and should trace ctm changes ourself (via cairo).
|
||||||
|
void DrawingTracer::update_ctm(GfxState *state, double m11, double m12, double m21, double m22, double m31, double m32)
|
||||||
|
{
|
||||||
|
if (!param.correct_text_visibility)
|
||||||
|
return;
|
||||||
|
|
||||||
|
#if ENABLE_SVG
|
||||||
|
cairo_matrix_t matrix;
|
||||||
|
matrix.xx = m11;
|
||||||
|
matrix.yx = m12;
|
||||||
|
matrix.xy = m21;
|
||||||
|
matrix.yy = m22;
|
||||||
|
matrix.x0 = m31;
|
||||||
|
matrix.y0 = m32;
|
||||||
|
cairo_transform(cairo, &matrix);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void DrawingTracer::clip(GfxState * state, bool even_odd)
|
||||||
|
{
|
||||||
|
if (!param.correct_text_visibility)
|
||||||
|
return;
|
||||||
|
#if ENABLE_SVG
|
||||||
|
do_path(state, state->getPath());
|
||||||
|
cairo_set_fill_rule(cairo, even_odd? CAIRO_FILL_RULE_EVEN_ODD : CAIRO_FILL_RULE_WINDING);
|
||||||
|
cairo_clip (cairo);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void DrawingTracer::clip_to_stroke_path(GfxState * state)
|
||||||
|
{
|
||||||
|
if (!param.correct_text_visibility)
|
||||||
|
return;
|
||||||
|
// TODO cairo_stroke_to_path() ?
|
||||||
|
}
|
||||||
|
|
||||||
|
void DrawingTracer::save()
|
||||||
|
{
|
||||||
|
if (!param.correct_text_visibility)
|
||||||
|
return;
|
||||||
|
#if ENABLE_SVG
|
||||||
|
cairo_save(cairo);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
void DrawingTracer::restore()
|
||||||
|
{
|
||||||
|
if (!param.correct_text_visibility)
|
||||||
|
return;
|
||||||
|
#if ENABLE_SVG
|
||||||
|
cairo_restore(cairo);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void DrawingTracer::do_path(GfxState * state, GfxPath * path)
|
||||||
|
{
|
||||||
|
#if ENABLE_SVG
|
||||||
|
//copy from CairoOutputDev::doPath
|
||||||
|
GfxSubpath *subpath;
|
||||||
|
int i, j;
|
||||||
|
double x, y;
|
||||||
|
cairo_new_path(cairo);
|
||||||
|
for (i = 0; i < path->getNumSubpaths(); ++i) {
|
||||||
|
subpath = path->getSubpath(i);
|
||||||
|
if (subpath->getNumPoints() > 0) {
|
||||||
|
x = subpath->getX(0);
|
||||||
|
y = subpath->getY(0);
|
||||||
|
cairo_move_to(cairo, x, y);
|
||||||
|
j = 1;
|
||||||
|
while (j < subpath->getNumPoints()) {
|
||||||
|
if (subpath->getCurve(j)) {
|
||||||
|
x = subpath->getX(j+2);
|
||||||
|
y = subpath->getY(j+2);
|
||||||
|
cairo_curve_to(cairo,
|
||||||
|
subpath->getX(j), subpath->getY(j),
|
||||||
|
subpath->getX(j+1), subpath->getY(j+1),
|
||||||
|
x, y);
|
||||||
|
j += 3;
|
||||||
|
} else {
|
||||||
|
x = subpath->getX(j);
|
||||||
|
y = subpath->getY(j);
|
||||||
|
cairo_line_to(cairo, x, y);
|
||||||
|
++j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (subpath->isClosed()) {
|
||||||
|
cairo_close_path (cairo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void DrawingTracer::stroke(GfxState * state)
|
||||||
|
{
|
||||||
|
#if ENABLE_SVG
|
||||||
|
if (!param.correct_text_visibility)
|
||||||
|
return;
|
||||||
|
|
||||||
|
DT_DEBUG(printf("DrawingTracer::stroke\n"));
|
||||||
|
|
||||||
|
cairo_set_line_width(cairo, state->getLineWidth());
|
||||||
|
|
||||||
|
// GfxPath is broken into steps, each step makes up a cairo path and its bbox is used for covering test.
|
||||||
|
// TODO
|
||||||
|
// 1. path steps that are not vertical or horizontal lines may still falsely "cover" many chars,
|
||||||
|
// can we slice those steps further?
|
||||||
|
// 2. if the line width is small, can we just ignore the path?
|
||||||
|
// 3. line join feature can't be retained. We use line-cap-square to minimize the problem that
|
||||||
|
// some chars actually covered by a line join are missed. However chars covered by a acute angle
|
||||||
|
// with line-join-miter may be still recognized as not covered.
|
||||||
|
cairo_set_line_cap(cairo, CAIRO_LINE_CAP_SQUARE);
|
||||||
|
GfxPath * path = state->getPath();
|
||||||
|
for (int i = 0; i < path->getNumSubpaths(); ++i) {
|
||||||
|
GfxSubpath * subpath = path->getSubpath(i);
|
||||||
|
if (subpath->getNumPoints() <= 0)
|
||||||
|
continue;
|
||||||
|
double x = subpath->getX(0);
|
||||||
|
double y = subpath->getY(0);
|
||||||
|
//p: loop cursor; j: next point index
|
||||||
|
int p =1, j = 1;
|
||||||
|
int n = subpath->getNumPoints();
|
||||||
|
while (p <= n) {
|
||||||
|
cairo_new_path(cairo);
|
||||||
|
cairo_move_to(cairo, x, y);
|
||||||
|
if (subpath->getCurve(j)) {
|
||||||
|
x = subpath->getX(j+2);
|
||||||
|
y = subpath->getY(j+2);
|
||||||
|
cairo_curve_to(cairo,
|
||||||
|
subpath->getX(j), subpath->getY(j),
|
||||||
|
subpath->getX(j+1), subpath->getY(j+1),
|
||||||
|
x, y);
|
||||||
|
p += 3;
|
||||||
|
} else {
|
||||||
|
x = subpath->getX(j);
|
||||||
|
y = subpath->getY(j);
|
||||||
|
cairo_line_to(cairo, x, y);
|
||||||
|
++p;
|
||||||
|
}
|
||||||
|
|
||||||
|
DT_DEBUG(printf("DrawingTracer::stroke:new box:\n"));
|
||||||
|
double sbox[4];
|
||||||
|
cairo_stroke_extents(cairo, sbox, sbox + 1, sbox + 2, sbox + 3);
|
||||||
|
if (sbox[0] != sbox[2] && sbox[1] != sbox[3])
|
||||||
|
draw_non_char_bbox(state, sbox);
|
||||||
|
else
|
||||||
|
DT_DEBUG(printf("DrawingTracer::stroke:zero box!\n"));
|
||||||
|
|
||||||
|
if (p == n)
|
||||||
|
{
|
||||||
|
if (subpath->isClosed())
|
||||||
|
j = 0; // if sub path is closed, go back to starting point
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
j = p;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void DrawingTracer::fill(GfxState * state, bool even_odd)
|
||||||
|
{
|
||||||
|
if (!param.correct_text_visibility)
|
||||||
|
return;
|
||||||
|
|
||||||
|
#if ENABLE_SVG
|
||||||
|
do_path(state, state->getPath());
|
||||||
|
//cairo_fill_extents don't take fill rule into account.
|
||||||
|
//cairo_set_fill_rule (cairo, even_odd? CAIRO_FILL_RULE_EVEN_ODD : CAIRO_FILL_RULE_WINDING);
|
||||||
|
double fbox[4];
|
||||||
|
cairo_fill_extents(cairo, fbox, fbox + 1, fbox + 2, fbox + 3);
|
||||||
|
draw_non_char_bbox(state, fbox);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void DrawingTracer::draw_non_char_bbox(GfxState * state, double * bbox)
|
||||||
|
{
|
||||||
|
#if ENABLE_SVG
|
||||||
|
double cbox[4];
|
||||||
|
cairo_clip_extents(cairo, cbox, cbox + 1, cbox + 2, cbox + 3);
|
||||||
|
if(bbox_intersect(cbox, bbox, bbox))
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
transform_bbox_by_ctm(bbox, state);
|
||||||
|
DT_DEBUG(printf("DrawingTracer::draw_non_char_bbox:[%f,%f,%f,%f]\n", bbox[0],bbox[1],bbox[2],bbox[3]));
|
||||||
|
if (on_non_char_drawn)
|
||||||
|
on_non_char_drawn(bbox);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DrawingTracer::draw_char_bbox(GfxState * state, double * bbox)
|
||||||
|
{
|
||||||
|
#if ENABLE_SVG
|
||||||
|
// Note: even if 4 corners of the char are all in or all out of the clip area,
|
||||||
|
// it could still be partially clipped.
|
||||||
|
// TODO better solution?
|
||||||
|
int pt_in = 0;
|
||||||
|
if (cairo_in_clip(cairo, bbox[0], bbox[1]))
|
||||||
|
++pt_in;
|
||||||
|
if (cairo_in_clip(cairo, bbox[2], bbox[3]))
|
||||||
|
++pt_in;
|
||||||
|
if (cairo_in_clip(cairo, bbox[2], bbox[1]))
|
||||||
|
++pt_in;
|
||||||
|
if (cairo_in_clip(cairo, bbox[0], bbox[3]))
|
||||||
|
++pt_in;
|
||||||
|
|
||||||
|
if (pt_in == 0)
|
||||||
|
{
|
||||||
|
transform_bbox_by_ctm(bbox);
|
||||||
|
if(on_char_clipped)
|
||||||
|
on_char_clipped(bbox, false);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (pt_in < 4)
|
||||||
|
{
|
||||||
|
double cbox[4];
|
||||||
|
cairo_clip_extents(cairo, cbox, cbox + 1, cbox + 2, cbox + 3);
|
||||||
|
bbox_intersect(cbox, bbox, bbox);
|
||||||
|
}
|
||||||
|
transform_bbox_by_ctm(bbox);
|
||||||
|
if (pt_in < 4)
|
||||||
|
{
|
||||||
|
if(on_char_clipped)
|
||||||
|
on_char_clipped(bbox, true);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (on_char_drawn)
|
||||||
|
on_char_drawn(bbox);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
transform_bbox_by_ctm(bbox, state);
|
||||||
|
if (on_char_drawn)
|
||||||
|
on_char_drawn(bbox);
|
||||||
|
#endif
|
||||||
|
DT_DEBUG(printf("DrawingTracer::draw_char_bbox:[%f,%f,%f,%f]\n",bbox[0],bbox[1],bbox[2],bbox[3]));
|
||||||
|
}
|
||||||
|
|
||||||
|
void DrawingTracer::draw_image(GfxState *state)
|
||||||
|
{
|
||||||
|
if (!param.correct_text_visibility)
|
||||||
|
return;
|
||||||
|
double bbox[4] {0, 0, 1, 1};
|
||||||
|
draw_non_char_bbox(state, bbox);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DrawingTracer::draw_char(GfxState *state, double x, double y, double ax, double ay)
|
||||||
|
{
|
||||||
|
if (!param.correct_text_visibility)
|
||||||
|
return;
|
||||||
|
|
||||||
|
Matrix tm, itm;
|
||||||
|
memcpy(tm.m, state->getTextMat(), sizeof(tm.m));
|
||||||
|
|
||||||
|
double cx = state->getCurX(), cy = state->getCurY(), fs = state->getFontSize(),
|
||||||
|
ry = state->getRise(), h = state->getHorizScaling();
|
||||||
|
|
||||||
|
//cx and cy has been transformed by text matrix, we need to reverse them.
|
||||||
|
tm.invertTo(&itm);
|
||||||
|
double char_cx, char_cy;
|
||||||
|
itm.transform(cx, cy, &char_cx, &char_cy);
|
||||||
|
|
||||||
|
//TODO Vertical? Currently vertical/type3 chars are treated as non-chars.
|
||||||
|
double char_m[6] {fs * h, 0, 0, fs, char_cx + x, char_cy + y + ry};
|
||||||
|
|
||||||
|
double final_m[6];
|
||||||
|
tm_multiply(final_m, tm.m, char_m);
|
||||||
|
|
||||||
|
auto font = state->getFont();
|
||||||
|
double bbox[4] {0, 0, ax, ay};
|
||||||
|
double desc = font->getDescent(), asc = font->getAscent();
|
||||||
|
if (font->getWMode() == 0)
|
||||||
|
{
|
||||||
|
bbox[1] += desc;
|
||||||
|
bbox[3] += asc;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{//TODO Vertical?
|
||||||
|
}
|
||||||
|
tm_transform_bbox(final_m, bbox);
|
||||||
|
draw_char_bbox(state, bbox);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DrawingTracer::transform_bbox_by_ctm(double * bbox, GfxState * state)
|
||||||
|
{
|
||||||
|
#if ENABLE_SVG
|
||||||
|
cairo_matrix_t mat;
|
||||||
|
cairo_get_matrix(cairo, &mat);
|
||||||
|
double mat_a[6] {mat.xx, mat.yx, mat.xy, mat.yy, mat.x0, mat.y0};
|
||||||
|
tm_transform_bbox(mat_a, bbox);
|
||||||
|
#else
|
||||||
|
tm_transform_bbox(state->getCTM(), bbox);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
} /* namespace pdf2htmlEX */
|
79
src/DrawingTracer.h
Normal file
79
src/DrawingTracer.h
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
/*
|
||||||
|
* DrawingTracer.h
|
||||||
|
*
|
||||||
|
* Created on: 2014-6-15
|
||||||
|
* Author: duanyao
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef DRAWINGTRACER_H__
|
||||||
|
#define DRAWINGTRACER_H__
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
|
#include <GfxState.h>
|
||||||
|
|
||||||
|
#include "pdf2htmlEX-config.h"
|
||||||
|
|
||||||
|
#if ENABLE_SVG
|
||||||
|
#include <cairo.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "Param.h"
|
||||||
|
|
||||||
|
namespace pdf2htmlEX
|
||||||
|
{
|
||||||
|
|
||||||
|
class DrawingTracer
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
/*
|
||||||
|
* The callback to receive drawn event.
|
||||||
|
* bbox in device space.
|
||||||
|
*/
|
||||||
|
// a non-char graphics is drawn
|
||||||
|
std::function<void(double * bbox)> on_non_char_drawn;
|
||||||
|
// a char is drawn in the clip area
|
||||||
|
std::function<void(double * bbox)> on_char_drawn;
|
||||||
|
// a char is drawn out of/partially in the clip area
|
||||||
|
std::function<void(double * bbox, bool patially)> on_char_clipped;
|
||||||
|
|
||||||
|
DrawingTracer(const Param & param);
|
||||||
|
virtual ~DrawingTracer();
|
||||||
|
void reset(GfxState * state);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A character is drawing
|
||||||
|
* x, y: glyph-drawing position, in PDF text object space.
|
||||||
|
* ax, ay: glyph advance, in glyph space.
|
||||||
|
*/
|
||||||
|
void draw_char(GfxState * state, double x, double y, double ax, double ay);
|
||||||
|
/*
|
||||||
|
* An image is drawing
|
||||||
|
*/
|
||||||
|
void draw_image(GfxState * state);
|
||||||
|
void update_ctm(GfxState * state, double m11, double m12, double m21, double m22, double m31, double m32);
|
||||||
|
void clip(GfxState * state, bool even_odd = false);
|
||||||
|
void clip_to_stroke_path(GfxState * state);
|
||||||
|
void fill(GfxState * state, bool even_odd = false);
|
||||||
|
void stroke(GfxState * state);
|
||||||
|
void save();
|
||||||
|
void restore();
|
||||||
|
|
||||||
|
private:
|
||||||
|
void finish();
|
||||||
|
// Following methods operate in user space (just before CTM is applied)
|
||||||
|
void do_path(GfxState * state, GfxPath * path);
|
||||||
|
void draw_non_char_bbox(GfxState * state, double * bbox);
|
||||||
|
void draw_char_bbox(GfxState * state, double * bbox);
|
||||||
|
// If cairo is available, parameter state is ignored
|
||||||
|
void transform_bbox_by_ctm(double * bbox, GfxState * state = nullptr);
|
||||||
|
|
||||||
|
const Param & param;
|
||||||
|
|
||||||
|
#if ENABLE_SVG
|
||||||
|
cairo_t * cairo;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
} /* namespace pdf2htmlEX */
|
||||||
|
#endif /* DRAWINGTRACER_H__ */
|
@ -31,10 +31,13 @@
|
|||||||
#include "HTMLTextPage.h"
|
#include "HTMLTextPage.h"
|
||||||
|
|
||||||
#include "BackgroundRenderer/BackgroundRenderer.h"
|
#include "BackgroundRenderer/BackgroundRenderer.h"
|
||||||
|
#include "CoveredTextDetector.h"
|
||||||
|
#include "DrawingTracer.h"
|
||||||
|
|
||||||
#include "util/const.h"
|
#include "util/const.h"
|
||||||
#include "util/misc.h"
|
#include "util/misc.h"
|
||||||
|
|
||||||
|
|
||||||
namespace pdf2htmlEX {
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
class HTMLRenderer : public OutputDev
|
class HTMLRenderer : public OutputDev
|
||||||
@ -89,7 +92,9 @@ public:
|
|||||||
* We just mark as changed, and recheck if they have been changed when we are about to output a new string
|
* We just mark as changed, and recheck if they have been changed when we are about to output a new string
|
||||||
*/
|
*/
|
||||||
|
|
||||||
virtual void restoreState(GfxState * state) { updateAll(state); }
|
virtual void restoreState(GfxState * state);
|
||||||
|
|
||||||
|
virtual void saveState(GfxState *state);
|
||||||
|
|
||||||
virtual void updateAll(GfxState * state);
|
virtual void updateAll(GfxState * state);
|
||||||
|
|
||||||
@ -125,15 +130,34 @@ public:
|
|||||||
|
|
||||||
virtual void drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg);
|
virtual void drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg);
|
||||||
|
|
||||||
virtual void stroke(GfxState *state) { css_do_path(state, false); }
|
virtual void drawSoftMaskedImage(GfxState *state, Object *ref, Stream *str,
|
||||||
virtual void fill(GfxState *state) { css_do_path(state, true); }
|
int width, int height,
|
||||||
|
GfxImageColorMap *colorMap,
|
||||||
|
GBool interpolate,
|
||||||
|
Stream *maskStr,
|
||||||
|
int maskWidth, int maskHeight,
|
||||||
|
GfxImageColorMap *maskColorMap,
|
||||||
|
GBool maskInterpolate);
|
||||||
|
|
||||||
|
virtual void stroke(GfxState *state); ////{ css_do_path(state, false); }
|
||||||
|
virtual void fill(GfxState *state); ////{ css_do_path(state, true); }
|
||||||
|
virtual void eoFill(GfxState *state);
|
||||||
virtual GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax);
|
virtual GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax);
|
||||||
|
|
||||||
virtual void processLink(AnnotLink * al);
|
virtual void processLink(AnnotLink * al);
|
||||||
|
|
||||||
/* capacity test */
|
/* capacity test */
|
||||||
bool can_stroke(GfxState *state) { return css_do_path(state, false, true); }
|
bool can_stroke(GfxState *state) { return false; } ////{ return css_do_path(state, false, true); }
|
||||||
bool can_fill(GfxState *state) { return css_do_path(state, true, true); }
|
bool can_fill(GfxState *state) { return false; } ////{ return css_do_path(state, true, true); }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Covered text handling.
|
||||||
|
*/
|
||||||
|
// Is a char (actually a glyph) covered by non-char's. Index in drawing order in current page.
|
||||||
|
// Does not fail on out-of-bound conditions, but return false.
|
||||||
|
bool is_char_covered(int index);
|
||||||
|
// Currently drawn char (glyph) count in current page.
|
||||||
|
int get_char_count() { return (int)covered_text_detecor.get_chars_covered().size(); }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
@ -195,6 +219,7 @@ protected:
|
|||||||
// make sure the current HTML style consistent with PDF
|
// make sure the current HTML style consistent with PDF
|
||||||
void prepare_text_line(GfxState * state);
|
void prepare_text_line(GfxState * state);
|
||||||
|
|
||||||
|
#if 0 //disable CSS drawing
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
// CSS drawing
|
// CSS drawing
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
@ -214,6 +239,7 @@ protected:
|
|||||||
double * line_width_array, int line_width_count,
|
double * line_width_array, int line_width_count,
|
||||||
const GfxRGB * line_color, const GfxRGB * fill_color,
|
const GfxRGB * line_color, const GfxRGB * fill_color,
|
||||||
void (*style_function)(void *, std::ostream &) = nullptr, void * style_function_data = nullptr );
|
void (*style_function)(void *, std::ostream &) = nullptr, void * style_function_data = nullptr );
|
||||||
|
#endif //disable CSS drawing
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
@ -328,7 +354,6 @@ protected:
|
|||||||
#endif
|
#endif
|
||||||
BackgroundRenderer * bg_renderer;
|
BackgroundRenderer * bg_renderer;
|
||||||
BackgroundRenderer * fallback_bg_renderer;
|
BackgroundRenderer * fallback_bg_renderer;
|
||||||
bool fallback_bg_required;
|
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
std::ofstream fs;
|
std::ofstream fs;
|
||||||
@ -338,6 +363,9 @@ protected:
|
|||||||
std::string cur_page_filename;
|
std::string cur_page_filename;
|
||||||
|
|
||||||
static const std::string MANIFEST_FILENAME;
|
static const std::string MANIFEST_FILENAME;
|
||||||
|
|
||||||
|
CoveredTextDetector covered_text_detecor;
|
||||||
|
DrawingTracer tracer;
|
||||||
};
|
};
|
||||||
|
|
||||||
} //namespace pdf2htmlEX
|
} //namespace pdf2htmlEX
|
||||||
|
@ -30,6 +30,39 @@ using std::sqrt;
|
|||||||
using std::vector;
|
using std::vector;
|
||||||
using std::ostream;
|
using std::ostream;
|
||||||
|
|
||||||
|
void HTMLRenderer::restoreState(GfxState * state)
|
||||||
|
{
|
||||||
|
updateAll(state);
|
||||||
|
tracer.restore();
|
||||||
|
}
|
||||||
|
|
||||||
|
void HTMLRenderer::saveState(GfxState *state)
|
||||||
|
{
|
||||||
|
tracer.save();
|
||||||
|
}
|
||||||
|
|
||||||
|
void HTMLRenderer::stroke(GfxState * state)
|
||||||
|
{
|
||||||
|
tracer.stroke(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HTMLRenderer::fill(GfxState * state)
|
||||||
|
{
|
||||||
|
tracer.fill(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HTMLRenderer::eoFill(GfxState * state)
|
||||||
|
{
|
||||||
|
tracer.fill(state, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
GBool HTMLRenderer::axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax)
|
||||||
|
{
|
||||||
|
tracer.fill(state); //TODO correct?
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0 //disable css drawing
|
||||||
static bool is_horizontal_line(GfxSubpath * path)
|
static bool is_horizontal_line(GfxSubpath * path)
|
||||||
{
|
{
|
||||||
return ((path->getNumPoints() == 2)
|
return ((path->getNumPoints() == 2)
|
||||||
@ -415,6 +448,7 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, co
|
|||||||
|
|
||||||
(*f_curpage) << "\"></div>";
|
(*f_curpage) << "\"></div>";
|
||||||
}
|
}
|
||||||
|
#endif //disable css drawing
|
||||||
|
|
||||||
|
|
||||||
} // namespace pdf2htmlEX
|
} // namespace pdf2htmlEX
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
#include <GlobalParams.h>
|
#include <GlobalParams.h>
|
||||||
|
|
||||||
@ -46,6 +47,7 @@ HTMLRenderer::HTMLRenderer(const Param & param)
|
|||||||
,html_text_page(param, all_manager)
|
,html_text_page(param, all_manager)
|
||||||
,preprocessor(param)
|
,preprocessor(param)
|
||||||
,tmp_files(param)
|
,tmp_files(param)
|
||||||
|
,tracer(param)
|
||||||
{
|
{
|
||||||
if(!(param.debug))
|
if(!(param.debug))
|
||||||
{
|
{
|
||||||
@ -76,6 +78,13 @@ HTMLRenderer::HTMLRenderer(const Param & param)
|
|||||||
all_manager.height .set_eps(EPS);
|
all_manager.height .set_eps(EPS);
|
||||||
all_manager.width .set_eps(EPS);
|
all_manager.width .set_eps(EPS);
|
||||||
all_manager.bottom .set_eps(EPS);
|
all_manager.bottom .set_eps(EPS);
|
||||||
|
|
||||||
|
tracer.on_char_drawn =
|
||||||
|
[this](double * box) { covered_text_detecor.add_char_bbox(box); };
|
||||||
|
tracer.on_char_clipped =
|
||||||
|
[this](double * box, bool partial) { covered_text_detecor.add_char_bbox_clipped(box, partial); };
|
||||||
|
tracer.on_non_char_drawn =
|
||||||
|
[this](double * box) { covered_text_detecor.add_non_char_bbox(box); };
|
||||||
}
|
}
|
||||||
|
|
||||||
HTMLRenderer::~HTMLRenderer()
|
HTMLRenderer::~HTMLRenderer()
|
||||||
@ -133,13 +142,6 @@ void HTMLRenderer::process(PDFDoc *doc)
|
|||||||
cur_page_filename = filled_template_filename;
|
cur_page_filename = filled_template_filename;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(param.process_nontext)
|
|
||||||
{
|
|
||||||
fallback_bg_required = !bg_renderer->render_page(doc, i);
|
|
||||||
if (fallback_bg_required && fallback_bg_renderer != nullptr)
|
|
||||||
fallback_bg_renderer->render_page(doc, i);
|
|
||||||
}
|
|
||||||
|
|
||||||
doc->displayPage(this, i,
|
doc->displayPage(this, i,
|
||||||
text_zoom_factor() * DEFAULT_DPI, text_zoom_factor() * DEFAULT_DPI,
|
text_zoom_factor() * DEFAULT_DPI, text_zoom_factor() * DEFAULT_DPI,
|
||||||
0,
|
0,
|
||||||
@ -190,15 +192,20 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state)
|
|||||||
void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
|
void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
|
covered_text_detecor.reset();
|
||||||
|
tracer.reset(state);
|
||||||
|
|
||||||
this->pageNum = pageNum;
|
this->pageNum = pageNum;
|
||||||
|
|
||||||
double pageWidth = state->getPageWidth();
|
html_text_page.set_page_size(state->getPageWidth(), state->getPageHeight());
|
||||||
double pageHeight = state->getPageHeight();
|
|
||||||
|
|
||||||
html_text_page.set_page_size(pageWidth, pageHeight);
|
reset_state();
|
||||||
|
}
|
||||||
|
|
||||||
|
void HTMLRenderer::endPage() {
|
||||||
|
long long wid = all_manager.width.install(html_text_page.get_width());
|
||||||
|
long long hid = all_manager.height.install(html_text_page.get_height());
|
||||||
|
|
||||||
long long wid = all_manager.width.install(pageWidth);
|
|
||||||
long long hid = all_manager.height.install(pageHeight);
|
|
||||||
(*f_curpage)
|
(*f_curpage)
|
||||||
<< "<div id=\"" << CSS::PAGE_FRAME_CN << pageNum
|
<< "<div id=\"" << CSS::PAGE_FRAME_CN << pageNum
|
||||||
<< "\" class=\"" << CSS::PAGE_FRAME_CN
|
<< "\" class=\"" << CSS::PAGE_FRAME_CN
|
||||||
@ -231,16 +238,15 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
|
|||||||
|
|
||||||
if(param.process_nontext)
|
if(param.process_nontext)
|
||||||
{
|
{
|
||||||
if (!fallback_bg_required)
|
if (bg_renderer->render_page(cur_doc, pageNum))
|
||||||
bg_renderer->embed_image(pageNum);
|
bg_renderer->embed_image(pageNum);
|
||||||
else if (fallback_bg_renderer != nullptr)
|
else if (fallback_bg_renderer != nullptr)
|
||||||
fallback_bg_renderer->embed_image(pageNum);
|
{
|
||||||
|
if (fallback_bg_renderer->render_page(cur_doc, pageNum))
|
||||||
|
fallback_bg_renderer->embed_image(pageNum);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
reset_state();
|
|
||||||
}
|
|
||||||
|
|
||||||
void HTMLRenderer::endPage() {
|
|
||||||
// dump all text
|
// dump all text
|
||||||
html_text_page.dump_text(*f_curpage);
|
html_text_page.dump_text(*f_curpage);
|
||||||
html_text_page.dump_css(f_css.fs);
|
html_text_page.dump_css(f_css.fs);
|
||||||
|
@ -14,6 +14,8 @@ namespace pdf2htmlEX {
|
|||||||
|
|
||||||
void HTMLRenderer::drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg)
|
void HTMLRenderer::drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg)
|
||||||
{
|
{
|
||||||
|
tracer.draw_image(state);
|
||||||
|
|
||||||
return OutputDev::drawImage(state,ref,str,width,height,colorMap,interpolate,maskColors,inlineImg);
|
return OutputDev::drawImage(state,ref,str,width,height,colorMap,interpolate,maskColors,inlineImg);
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
@ -62,4 +64,20 @@ void HTMLRenderer::drawImage(GfxState * state, Object * ref, Stream * str, int w
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void HTMLRenderer::drawSoftMaskedImage(GfxState *state, Object *ref, Stream *str,
|
||||||
|
int width, int height,
|
||||||
|
GfxImageColorMap *colorMap,
|
||||||
|
GBool interpolate,
|
||||||
|
Stream *maskStr,
|
||||||
|
int maskWidth, int maskHeight,
|
||||||
|
GfxImageColorMap *maskColorMap,
|
||||||
|
GBool maskInterpolate)
|
||||||
|
{
|
||||||
|
tracer.draw_image(state);
|
||||||
|
|
||||||
|
return OutputDev::drawSoftMaskedImage(state,ref,str, // TODO really required?
|
||||||
|
width,height,colorMap,interpolate,
|
||||||
|
maskStr, maskWidth, maskHeight, maskColorMap, maskInterpolate);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace pdf2htmlEX
|
} // namespace pdf2htmlEX
|
||||||
|
@ -46,6 +46,7 @@ void HTMLRenderer::updateFont(GfxState * state)
|
|||||||
void HTMLRenderer::updateCTM(GfxState * state, double m11, double m12, double m21, double m22, double m31, double m32)
|
void HTMLRenderer::updateCTM(GfxState * state, double m11, double m12, double m21, double m22, double m31, double m32)
|
||||||
{
|
{
|
||||||
ctm_changed = true;
|
ctm_changed = true;
|
||||||
|
tracer.update_ctm(state, m11, m12, m21, m22, m31, m32);
|
||||||
}
|
}
|
||||||
void HTMLRenderer::updateTextMat(GfxState * state)
|
void HTMLRenderer::updateTextMat(GfxState * state)
|
||||||
{
|
{
|
||||||
@ -89,14 +90,17 @@ void HTMLRenderer::updateStrokeColor(GfxState * state)
|
|||||||
void HTMLRenderer::clip(GfxState * state)
|
void HTMLRenderer::clip(GfxState * state)
|
||||||
{
|
{
|
||||||
clip_changed = true;
|
clip_changed = true;
|
||||||
|
tracer.clip(state);
|
||||||
}
|
}
|
||||||
void HTMLRenderer::eoClip(GfxState * state)
|
void HTMLRenderer::eoClip(GfxState * state)
|
||||||
{
|
{
|
||||||
clip_changed = true;
|
clip_changed = true;
|
||||||
|
tracer.clip(state, true);
|
||||||
}
|
}
|
||||||
void HTMLRenderer::clipToStrokePath(GfxState * state)
|
void HTMLRenderer::clipToStrokePath(GfxState * state)
|
||||||
{
|
{
|
||||||
clip_changed = true;
|
clip_changed = true;
|
||||||
|
tracer.clip_to_stroke_path(state);
|
||||||
}
|
}
|
||||||
void HTMLRenderer::reset_state()
|
void HTMLRenderer::reset_state()
|
||||||
{
|
{
|
||||||
@ -119,6 +123,8 @@ void HTMLRenderer::reset_state()
|
|||||||
cur_line_state.y = 0;
|
cur_line_state.y = 0;
|
||||||
memcpy(cur_line_state.transform_matrix, ID_MATRIX, sizeof(cur_line_state.transform_matrix));
|
memcpy(cur_line_state.transform_matrix, ID_MATRIX, sizeof(cur_line_state.transform_matrix));
|
||||||
|
|
||||||
|
cur_line_state.is_char_covered = [this](int index) { return is_char_covered(index);};
|
||||||
|
|
||||||
cur_clip_state.xmin = 0;
|
cur_clip_state.xmin = 0;
|
||||||
cur_clip_state.xmax = 0;
|
cur_clip_state.xmax = 0;
|
||||||
cur_clip_state.ymin = 0;
|
cur_clip_state.ymin = 0;
|
||||||
@ -502,6 +508,10 @@ void HTMLRenderer::prepare_text_line(GfxState * state)
|
|||||||
double rise_x, rise_y;
|
double rise_x, rise_y;
|
||||||
state->textTransformDelta(0, state->getRise(), &rise_x, &rise_y);
|
state->textTransformDelta(0, state->getRise(), &rise_x, &rise_y);
|
||||||
state->transform(state->getCurX() + rise_x, state->getCurY() + rise_y, &cur_line_state.x, &cur_line_state.y);
|
state->transform(state->getCurX() + rise_x, state->getCurY() + rise_y, &cur_line_state.x, &cur_line_state.y);
|
||||||
|
|
||||||
|
if (param.correct_text_visibility)
|
||||||
|
cur_line_state.first_char_index = get_char_count();
|
||||||
|
|
||||||
html_text_page.open_new_line(cur_line_state);
|
html_text_page.open_new_line(cur_line_state);
|
||||||
|
|
||||||
cur_text_state.vertical_align = 0;
|
cur_text_state.vertical_align = 0;
|
||||||
|
@ -14,6 +14,9 @@
|
|||||||
#include "util/namespace.h"
|
#include "util/namespace.h"
|
||||||
#include "util/unicode.h"
|
#include "util/unicode.h"
|
||||||
|
|
||||||
|
//#define HR_DEBUG(x) (x)
|
||||||
|
#define HR_DEBUG(x)
|
||||||
|
|
||||||
namespace pdf2htmlEX {
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
using std::all_of;
|
using std::all_of;
|
||||||
@ -51,26 +54,35 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
|
|||||||
char *p = s->getCString();
|
char *p = s->getCString();
|
||||||
int len = s->getLength();
|
int len = s->getLength();
|
||||||
|
|
||||||
|
//accumulated displacement of chars in this string, in text object space
|
||||||
double dx = 0;
|
double dx = 0;
|
||||||
double dy = 0;
|
double dy = 0;
|
||||||
double dx1,dy1;
|
//displacement of current char, in text object space, including letter space but not word space.
|
||||||
|
double ddx, ddy;
|
||||||
|
//advance of current char, in glyph space
|
||||||
|
double ax, ay;
|
||||||
|
//origin of current char, in glyph space
|
||||||
double ox, oy;
|
double ox, oy;
|
||||||
|
|
||||||
int nChars = 0;
|
|
||||||
int nSpaces = 0;
|
|
||||||
int uLen;
|
int uLen;
|
||||||
|
|
||||||
CharCode code;
|
CharCode code;
|
||||||
Unicode *u = nullptr;
|
Unicode *u = nullptr;
|
||||||
|
|
||||||
|
HR_DEBUG(printf("HTMLRenderer::drawString:len=%d\n", len));
|
||||||
|
|
||||||
while (len > 0)
|
while (len > 0)
|
||||||
{
|
{
|
||||||
auto n = font->getNextChar(p, len, &code, &u, &uLen, &dx1, &dy1, &ox, &oy);
|
auto n = font->getNextChar(p, len, &code, &u, &uLen, &ax, &ay, &ox, &oy);
|
||||||
|
HR_DEBUG(printf("HTMLRenderer::drawString:unicode=%lc(%d)\n", (wchar_t)u[0], u[0]));
|
||||||
|
|
||||||
if(!(equal(ox, 0) && equal(oy, 0)))
|
if(!(equal(ox, 0) && equal(oy, 0)))
|
||||||
{
|
{
|
||||||
cerr << "TODO: non-zero origins" << endl;
|
cerr << "TODO: non-zero origins" << endl;
|
||||||
}
|
}
|
||||||
|
ddx = ax * cur_font_size + cur_letter_space;
|
||||||
|
ddy = ay * cur_font_size;
|
||||||
|
tracer.draw_char(state, dx, dy, ax, ay);
|
||||||
|
|
||||||
bool is_space = false;
|
bool is_space = false;
|
||||||
if (n == 1 && *p == ' ')
|
if (n == 1 && *p == ' ')
|
||||||
@ -85,19 +97,19 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
|
|||||||
* There are always ugly PDF files with no useful info at all.
|
* There are always ugly PDF files with no useful info at all.
|
||||||
*/
|
*/
|
||||||
is_space = true;
|
is_space = true;
|
||||||
++nSpaces;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(is_space && (param.space_as_offset))
|
if(is_space && (param.space_as_offset))
|
||||||
{
|
{
|
||||||
|
html_text_page.get_cur_line()->append_padding_char();
|
||||||
// ignore horiz_scaling, as it has been merged into CTM
|
// ignore horiz_scaling, as it has been merged into CTM
|
||||||
html_text_page.get_cur_line()->append_offset((dx1 * cur_font_size + cur_letter_space + cur_word_space) * draw_text_scale);
|
html_text_page.get_cur_line()->append_offset((ax * cur_font_size + cur_letter_space + cur_word_space) * draw_text_scale);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if((param.decompose_ligature) && (uLen > 1) && all_of(u, u+uLen, isLegalUnicode))
|
if((param.decompose_ligature) && (uLen > 1) && all_of(u, u+uLen, isLegalUnicode))
|
||||||
{
|
{
|
||||||
html_text_page.get_cur_line()->append_unicodes(u, uLen, (dx1 * cur_font_size + cur_letter_space));
|
html_text_page.get_cur_line()->append_unicodes(u, uLen, ddx);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -110,7 +122,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
|
|||||||
{
|
{
|
||||||
uu = unicode_from_font(code, font);
|
uu = unicode_from_font(code, font);
|
||||||
}
|
}
|
||||||
html_text_page.get_cur_line()->append_unicodes(&uu, 1, (dx1 * cur_font_size + cur_letter_space));
|
html_text_page.get_cur_line()->append_unicodes(&uu, 1, ddx);
|
||||||
/*
|
/*
|
||||||
* In PDF, word_space is appended if (n == 1 and *p = ' ')
|
* In PDF, word_space is appended if (n == 1 and *p = ' ')
|
||||||
* but in HTML, word_space is appended if (uu == ' ')
|
* but in HTML, word_space is appended if (uu == ' ')
|
||||||
@ -123,19 +135,15 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dx += dx1;
|
dx += ddx * cur_horiz_scaling;
|
||||||
dy += dy1;
|
dy += ddy;
|
||||||
|
if (is_space)
|
||||||
|
dx += cur_word_space * cur_horiz_scaling;
|
||||||
|
|
||||||
++nChars;
|
|
||||||
p += n;
|
p += n;
|
||||||
len -= n;
|
len -= n;
|
||||||
}
|
}
|
||||||
|
|
||||||
// horiz_scaling is merged into ctm now,
|
|
||||||
// so the coordinate system is ugly
|
|
||||||
dx = (dx * cur_font_size + nChars * cur_letter_space + nSpaces * cur_word_space) * cur_horiz_scaling;
|
|
||||||
dy *= cur_font_size;
|
|
||||||
|
|
||||||
cur_tx += dx;
|
cur_tx += dx;
|
||||||
cur_ty += dy;
|
cur_ty += dy;
|
||||||
|
|
||||||
@ -143,4 +151,16 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
|
|||||||
draw_ty += dy;
|
draw_ty += dy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool HTMLRenderer::is_char_covered(int index)
|
||||||
|
{
|
||||||
|
auto covered = covered_text_detecor.get_chars_covered();
|
||||||
|
if (index < 0 || index >= (int)covered.size())
|
||||||
|
{
|
||||||
|
std::cerr << "Warning: HTMLRenderer::is_char_covered: index out of bound: "
|
||||||
|
<< index << ", size: " << covered.size() <<endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return covered[index];
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace pdf2htmlEX
|
} // namespace pdf2htmlEX
|
||||||
|
@ -5,6 +5,8 @@
|
|||||||
#ifndef HTMLSTATE_H__
|
#ifndef HTMLSTATE_H__
|
||||||
#define HTMLSTATE_H__
|
#define HTMLSTATE_H__
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
#include "Color.h"
|
#include "Color.h"
|
||||||
|
|
||||||
namespace pdf2htmlEX {
|
namespace pdf2htmlEX {
|
||||||
@ -62,6 +64,12 @@ struct HTMLLineState
|
|||||||
{
|
{
|
||||||
double x,y;
|
double x,y;
|
||||||
double transform_matrix[4];
|
double transform_matrix[4];
|
||||||
|
// The page-cope char index(in drawing order) of the first char in this line.
|
||||||
|
int first_char_index;
|
||||||
|
// A function to determine whether a char is covered at a given index.
|
||||||
|
std::function<bool(int)> is_char_covered;
|
||||||
|
|
||||||
|
HTMLLineState(): first_char_index(-1) { }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct HTMLClipState
|
struct HTMLClipState
|
||||||
|
@ -36,7 +36,14 @@ HTMLTextLine::HTMLTextLine (const HTMLLineState & line_state, const Param & para
|
|||||||
|
|
||||||
void HTMLTextLine::append_unicodes(const Unicode * u, int l, double width)
|
void HTMLTextLine::append_unicodes(const Unicode * u, int l, double width)
|
||||||
{
|
{
|
||||||
text.insert(text.end(), u, u+l);
|
if (l == 1)
|
||||||
|
text.push_back(min(u[0], (unsigned)INT_MAX));
|
||||||
|
else if (l > 1)
|
||||||
|
{
|
||||||
|
text.push_back(- decomposed_text.size() - 1);
|
||||||
|
decomposed_text.emplace_back();
|
||||||
|
decomposed_text.back().assign(u, u + l);
|
||||||
|
}
|
||||||
this->width += width;
|
this->width += width;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -69,6 +76,60 @@ void HTMLTextLine::append_state(const HTMLTextState & text_state)
|
|||||||
last_state.font_size *= last_state.font_info->font_size_scale;
|
last_state.font_size *= last_state.font_info->font_size_scale;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void HTMLTextLine::dump_char(std::ostream & out, int pos)
|
||||||
|
{
|
||||||
|
int c = text[pos];
|
||||||
|
if (c > 0)
|
||||||
|
{
|
||||||
|
Unicode u = c;
|
||||||
|
writeUnicodes(out, &u, 1);
|
||||||
|
}
|
||||||
|
else if (c < 0)
|
||||||
|
{
|
||||||
|
auto dt = decomposed_text[- c - 1];
|
||||||
|
writeUnicodes(out, &dt.front(), dt.size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void HTMLTextLine::dump_chars(ostream & out, int begin, int len)
|
||||||
|
{
|
||||||
|
static const Color transparent(0, 0, 0, true);
|
||||||
|
|
||||||
|
if (line_state.first_char_index < 0)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < len; i++)
|
||||||
|
dump_char(out, begin + i);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool invisible_group_open = false;
|
||||||
|
for(int i = 0; i < len; i++)
|
||||||
|
{
|
||||||
|
if (!line_state.is_char_covered(line_state.first_char_index + begin + i)) //visible
|
||||||
|
{
|
||||||
|
if (invisible_group_open)
|
||||||
|
{
|
||||||
|
invisible_group_open = false;
|
||||||
|
out << "</span>";
|
||||||
|
}
|
||||||
|
dump_char(out, begin + i);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (!invisible_group_open)
|
||||||
|
{
|
||||||
|
out << "<span class=\"" << all_manager.fill_color.get_css_class_name()
|
||||||
|
<< all_manager.fill_color.install(transparent) << " " << all_manager.stroke_color.get_css_class_name()
|
||||||
|
<< all_manager.stroke_color.install(transparent) << "\">";
|
||||||
|
invisible_group_open = true;
|
||||||
|
}
|
||||||
|
dump_char(out, begin + i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (invisible_group_open)
|
||||||
|
out << "</span>";
|
||||||
|
}
|
||||||
|
|
||||||
void HTMLTextLine::dump_text(ostream & out)
|
void HTMLTextLine::dump_text(ostream & out)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@ -216,7 +277,7 @@ void HTMLTextLine::dump_text(ostream & out)
|
|||||||
size_t next_text_idx = text_idx2;
|
size_t next_text_idx = text_idx2;
|
||||||
if((cur_offset_iter != offsets.end()) && (cur_offset_iter->start_idx) < next_text_idx)
|
if((cur_offset_iter != offsets.end()) && (cur_offset_iter->start_idx) < next_text_idx)
|
||||||
next_text_idx = cur_offset_iter->start_idx;
|
next_text_idx = cur_offset_iter->start_idx;
|
||||||
writeUnicodes(out, (&text.front()) + cur_text_idx, next_text_idx - cur_text_idx);
|
dump_chars(out, cur_text_idx, next_text_idx - cur_text_idx);
|
||||||
cur_text_idx = next_text_idx;
|
cur_text_idx = next_text_idx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -73,7 +73,16 @@ public:
|
|||||||
double width;
|
double width;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Append a drawn char (glyph)'s unicode. l > 1 mean this glyph correspond to
|
||||||
|
* multiple code points.
|
||||||
|
*/
|
||||||
void append_unicodes(const Unicode * u, int l, double width);
|
void append_unicodes(const Unicode * u, int l, double width);
|
||||||
|
/**
|
||||||
|
* Append a special padding char with 0 width, in order to keep char index consistent.
|
||||||
|
* The padding char is ignored during output.
|
||||||
|
*/
|
||||||
|
void append_padding_char() { text.push_back(0); }
|
||||||
void append_offset(double width);
|
void append_offset(double width);
|
||||||
void append_state(const HTMLTextState & text_state);
|
void append_state(const HTMLTextState & text_state);
|
||||||
void dump_text(std::ostream & out);
|
void dump_text(std::ostream & out);
|
||||||
@ -92,6 +101,13 @@ private:
|
|||||||
void optimize_normal(std::vector<HTMLTextLine*> &);
|
void optimize_normal(std::vector<HTMLTextLine*> &);
|
||||||
void optimize_aggressive(std::vector<HTMLTextLine*> &);
|
void optimize_aggressive(std::vector<HTMLTextLine*> &);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dump chars' unicode to output stream.
|
||||||
|
* begin/pos is the index in 'text'.
|
||||||
|
*/
|
||||||
|
void dump_chars(std::ostream & out, int begin, int len);
|
||||||
|
void dump_char(std::ostream & out, int pos);
|
||||||
|
|
||||||
const Param & param;
|
const Param & param;
|
||||||
AllStateManager & all_manager;
|
AllStateManager & all_manager;
|
||||||
|
|
||||||
@ -102,7 +118,16 @@ private:
|
|||||||
|
|
||||||
std::vector<State> states;
|
std::vector<State> states;
|
||||||
std::vector<Offset> offsets;
|
std::vector<Offset> offsets;
|
||||||
std::vector<Unicode> text;
|
|
||||||
|
/**
|
||||||
|
* Drawn chars (glyph) in this line are stored in 'text'. For each element c in 'text':
|
||||||
|
* - If c > 0, it is the unicode code point corresponds to the glyph;
|
||||||
|
* - If c == 0, it is a padding char, and ignored during output (TODO some bad PDFs utilize 0?);
|
||||||
|
* - If c < -1, this glyph corresponds to more than one unicode code points,
|
||||||
|
* which are stored in 'decomposed_text', and (-c-1) is the index in 'decomposed_text'.
|
||||||
|
*/
|
||||||
|
std::vector<int> text;
|
||||||
|
std::vector<std::vector<Unicode> > decomposed_text;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace pdf2htmlEX
|
} // namespace pdf2htmlEX
|
||||||
|
@ -39,6 +39,9 @@ public:
|
|||||||
void set_page_size(double width, double height);
|
void set_page_size(double width, double height);
|
||||||
void clip(const HTMLClipState & clip_state);
|
void clip(const HTMLClipState & clip_state);
|
||||||
|
|
||||||
|
double get_width() { return page_width; }
|
||||||
|
double get_height() { return page_height; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void optimize(void);
|
void optimize(void);
|
||||||
|
|
||||||
|
@ -38,6 +38,7 @@ struct Param
|
|||||||
int process_nontext;
|
int process_nontext;
|
||||||
int process_outline;
|
int process_outline;
|
||||||
int process_annotation;
|
int process_annotation;
|
||||||
|
int correct_text_visibility;
|
||||||
int printing;
|
int printing;
|
||||||
int fallback;
|
int fallback;
|
||||||
int tmp_file_size_limit;
|
int tmp_file_size_limit;
|
||||||
|
@ -187,6 +187,7 @@ void parse_options (int argc, char **argv)
|
|||||||
.add("space-as-offset", ¶m.space_as_offset, 0, "treat space characters as offsets")
|
.add("space-as-offset", ¶m.space_as_offset, 0, "treat space characters as offsets")
|
||||||
.add("tounicode", ¶m.tounicode, 0, "how to handle ToUnicode CMaps (0=auto, 1=force, -1=ignore)")
|
.add("tounicode", ¶m.tounicode, 0, "how to handle ToUnicode CMaps (0=auto, 1=force, -1=ignore)")
|
||||||
.add("optimize-text", ¶m.optimize_text, 0, "try to reduce the number of HTML elements used for text")
|
.add("optimize-text", ¶m.optimize_text, 0, "try to reduce the number of HTML elements used for text")
|
||||||
|
.add("correct-text-visibility", ¶m.correct_text_visibility, 0, "try to detect texts covered by other graphics and properly arrange them")
|
||||||
|
|
||||||
// background image
|
// background image
|
||||||
.add("bg-format", ¶m.bg_format, "png", "specify background image format")
|
.add("bg-format", ¶m.bg_format, "png", "specify background image format")
|
||||||
|
@ -1,8 +1,12 @@
|
|||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
#include "math.h"
|
#include "math.h"
|
||||||
|
|
||||||
|
using std::min;
|
||||||
|
using std::max;
|
||||||
|
|
||||||
namespace pdf2htmlEX {
|
namespace pdf2htmlEX {
|
||||||
|
|
||||||
void tm_transform(const double * tm, double & x, double & y, bool is_delta)
|
void tm_transform(const double * tm, double & x, double & y, bool is_delta)
|
||||||
@ -56,5 +60,31 @@ void tm_transform_bbox(const double * tm, double * bbox)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool bbox_intersect(const double * bbox1, const double * bbox2, double * result)
|
||||||
|
{
|
||||||
|
double x0, y0, x1, y1;
|
||||||
|
|
||||||
|
x0 = max(min(bbox1[0], bbox1[2]), min(bbox2[0], bbox2[2]));
|
||||||
|
x1 = min(max(bbox1[0], bbox1[2]), max(bbox2[0], bbox2[2]));
|
||||||
|
|
||||||
|
if (x0 >= x1)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
y0 = max(min(bbox1[1], bbox1[3]), min(bbox2[1], bbox2[3]));
|
||||||
|
y1 = min(max(bbox1[1], bbox1[3]), max(bbox2[1], bbox2[3]));
|
||||||
|
|
||||||
|
if (y0 >= y1)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (result)
|
||||||
|
{
|
||||||
|
result[0] = x0;
|
||||||
|
result[1] = y0;
|
||||||
|
result[2] = x1;
|
||||||
|
result[3] = y1;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
} //namespace pdf2htmlEX
|
} //namespace pdf2htmlEX
|
||||||
|
|
||||||
|
@ -24,6 +24,13 @@ static inline bool tm_equal(const double * tm1, const double * tm2, int size = 6
|
|||||||
return false;
|
return false;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void tm_init(double * tm)
|
||||||
|
{
|
||||||
|
tm[0] = tm[3] = 1;
|
||||||
|
tm[1] = tm[2] = tm[4] = tm[5] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
static inline void tm_multiply(double * result, const double * m1, const double * m2)
|
static inline void tm_multiply(double * result, const double * m1, const double * m2)
|
||||||
{
|
{
|
||||||
result[0] = m1[0] * m2[0] + m1[2] * m2[1];
|
result[0] = m1[0] * m2[0] + m1[2] * m2[1];
|
||||||
@ -39,6 +46,14 @@ static inline double hypot(double x, double y) { return std::sqrt(x*x+y*y); }
|
|||||||
void tm_transform(const double * tm, double & x, double & y, bool is_delta = false);
|
void tm_transform(const double * tm, double & x, double & y, bool is_delta = false);
|
||||||
void tm_multiply(double * tm_left, const double * tm_right);
|
void tm_multiply(double * tm_left, const double * tm_right);
|
||||||
void tm_transform_bbox(const double * tm, double * bbox);
|
void tm_transform_bbox(const double * tm, double * bbox);
|
||||||
|
/**
|
||||||
|
* Calculate the intersection of 2 boxes.
|
||||||
|
* If they are intersecting, store the result to result (if not null) and return true.
|
||||||
|
* Otherwise return false, and result is not touched.
|
||||||
|
* Param result can be same as one of bbox1 and bbox2.
|
||||||
|
* Data in boxes are expected in the order of (x0, y0, x1, y1).
|
||||||
|
*/
|
||||||
|
bool bbox_intersect(const double * bbox1, const double * bbox2, double * result = nullptr);
|
||||||
|
|
||||||
} //namespace pdf2htmlEX
|
} //namespace pdf2htmlEX
|
||||||
#endif //MATH_H__
|
#endif //MATH_H__
|
||||||
|
Loading…
Reference in New Issue
Block a user