1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-10-06 12:01:39 +00:00
pdf2htmlEX/src/HTMLRenderer/state.cc

484 lines
15 KiB
C++
Raw Normal View History

2012-08-14 06:35:55 +00:00
/*
* state.cc
*
2013-04-04 07:31:15 +00:00
* track PDF states
2012-08-14 06:35:55 +00:00
*
2013-02-02 19:35:45 +00:00
* Copyright (C) 2012,2013 Lu Wang <coolwanglu@gmail.com>
2012-08-14 06:35:55 +00:00
*/
2012-10-02 18:19:40 +00:00
#include <cmath>
#include <algorithm>
2012-08-14 06:35:55 +00:00
#include "HTMLRenderer.h"
2013-04-06 08:45:01 +00:00
2012-11-29 09:28:05 +00:00
#include "util/namespace.h"
2012-11-29 10:16:05 +00:00
#include "util/math.h"
2012-09-12 15:26:14 +00:00
namespace pdf2htmlEX {
using std::max;
2012-08-31 15:14:05 +00:00
using std::abs;
void HTMLRenderer::updateAll(GfxState * state)
{
all_changed = true;
updateTextPos(state);
}
void HTMLRenderer::updateRise(GfxState * state)
{
rise_changed = true;
}
void HTMLRenderer::updateTextPos(GfxState * state)
{
text_pos_changed = true;
cur_tx = state->getLineX();
cur_ty = state->getLineY();
}
void HTMLRenderer::updateTextShift(GfxState * state, double shift)
{
text_pos_changed = true;
cur_tx -= shift * 0.001 * state->getFontSize() * state->getHorizScaling();
}
void HTMLRenderer::updateFont(GfxState * state)
{
font_changed = true;
}
void HTMLRenderer::updateCTM(GfxState * state, double m11, double m12, double m21, double m22, double m31, double m32)
{
ctm_changed = true;
}
void HTMLRenderer::updateTextMat(GfxState * state)
{
text_mat_changed = true;
}
void HTMLRenderer::updateHorizScaling(GfxState * state)
{
hori_scale_changed = true;
}
void HTMLRenderer::updateCharSpace(GfxState * state)
{
letter_space_changed = true;
}
void HTMLRenderer::updateWordSpace(GfxState * state)
{
word_space_changed = true;
}
2013-01-24 12:39:24 +00:00
void HTMLRenderer::updateRender(GfxState * state)
{
// currently Render is traced for color only
// might need something like render_changed later
2013-01-31 22:21:57 +00:00
fill_color_changed = true;
stroke_color_changed = true;
2013-01-24 12:39:24 +00:00
}
void HTMLRenderer::updateFillColorSpace(GfxState * state)
{
2013-01-31 22:21:57 +00:00
fill_color_changed = true;
2013-01-24 12:39:24 +00:00
}
void HTMLRenderer::updateStrokeColorSpace(GfxState * state)
{
2013-01-31 22:21:57 +00:00
stroke_color_changed = true;
2013-01-24 12:39:24 +00:00
}
void HTMLRenderer::updateFillColor(GfxState * state)
{
2013-01-31 22:21:57 +00:00
fill_color_changed = true;
}
2013-01-24 12:39:24 +00:00
void HTMLRenderer::updateStrokeColor(GfxState * state)
{
2013-01-31 22:21:57 +00:00
stroke_color_changed = true;
2013-01-24 12:39:24 +00:00
}
2013-01-30 15:56:27 +00:00
void HTMLRenderer::reset_state()
{
draw_text_scale = 1.0;
2013-02-05 06:41:00 +00:00
cur_font_size = 0.0;
2013-01-30 15:56:27 +00:00
memcpy(cur_text_tm, ID_MATRIX, sizeof(cur_text_tm));
2013-04-04 13:19:28 +00:00
// reset html_state
2013-04-04 13:25:23 +00:00
cur_html_state.font_info = install_font(nullptr);
2013-04-04 13:19:28 +00:00
cur_html_state.font_size = 0;
cur_html_state.fill_color.transparent = true;
cur_html_state.stroke_color.transparent = true;
cur_html_state.letter_space = 0;
cur_html_state.word_space = 0;
2013-04-05 13:53:34 +00:00
cur_html_state.vertical_align = 0;
2013-04-04 13:19:28 +00:00
cur_html_state.x = 0;
cur_html_state.y = 0;
memcpy(cur_html_state.transform_matrix, ID_MATRIX, sizeof(cur_html_state.transform_matrix));
2013-02-05 07:12:17 +00:00
2013-02-05 12:37:05 +00:00
cur_tx = cur_ty = 0;
2013-01-30 15:56:27 +00:00
draw_tx = draw_ty = 0;
reset_state_change();
all_changed = true;
}
void HTMLRenderer::reset_state_change()
{
all_changed = false;
rise_changed = false;
text_pos_changed = false;
font_changed = false;
ctm_changed = false;
text_mat_changed = false;
hori_scale_changed = false;
letter_space_changed = false;
word_space_changed = false;
fill_color_changed = false;
stroke_color_changed = false;
2013-01-30 15:56:27 +00:00
}
2012-08-14 06:35:55 +00:00
void HTMLRenderer::check_state_change(GfxState * state)
{
// DEPENDENCY WARNING
// don't adjust the order of state checking
2012-09-09 06:48:10 +00:00
new_line_state = NLS_NONE;
2012-08-14 06:35:55 +00:00
bool need_recheck_position = false;
bool need_rescale_font = false;
2012-10-02 06:19:20 +00:00
bool draw_text_scale_changed = false;
// save current info for later use
HTMLState old_html_state = cur_html_state;
double old_tm[6];
memcpy(old_tm, cur_text_tm, sizeof(old_tm));
double old_draw_text_scale = draw_text_scale;
// text position
// we've been tracking the text position positively in the update*** functions
if(all_changed || text_pos_changed)
2012-08-14 06:35:55 +00:00
{
2012-08-15 13:51:39 +00:00
need_recheck_position = true;
}
// font name & size
2012-08-14 06:35:55 +00:00
if(all_changed || font_changed)
{
2012-09-04 15:33:15 +00:00
const FontInfo * new_font_info = install_font(state->getFont());
2012-08-14 06:35:55 +00:00
2013-04-04 13:19:28 +00:00
if(!(new_font_info->id == cur_html_state.font_info->id))
2012-08-14 06:35:55 +00:00
{
2013-02-03 09:36:28 +00:00
// The width of the type 3 font text, if shown, is likely to be wrong
// So we will create separate (absolute positioned) blocks for them, such that it won't affect other text
2013-02-03 09:36:28 +00:00
// TODO: consider the font matrix and estimate the metrics
2013-04-04 13:19:28 +00:00
if(new_font_info->is_type3 || cur_html_state.font_info->is_type3)
{
new_line_state = max<NewLineState>(new_line_state, NLS_DIV);
}
else
{
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
}
2013-04-04 13:19:28 +00:00
cur_html_state.font_info = new_font_info;
2012-08-14 06:35:55 +00:00
}
double new_font_size = state->getFontSize();
2012-11-29 10:16:05 +00:00
if(!equal(cur_font_size, new_font_size))
2012-08-14 06:35:55 +00:00
{
need_rescale_font = true;
cur_font_size = new_font_size;
2012-08-14 06:35:55 +00:00
}
}
2013-04-05 10:07:37 +00:00
// ctm & text ctm & hori scale & rise
if(all_changed || ctm_changed || text_mat_changed || hori_scale_changed || rise_changed)
2012-08-14 06:35:55 +00:00
{
2013-02-04 10:13:04 +00:00
double new_text_tm[6];
2013-04-05 10:07:37 +00:00
double m1[6];
double m2[6];
//the matrix with horizontal_scale and rise
m1[0] = state->getHorizScaling();
m1[3] = 1;
m1[5] = state->getRise();
m1[1] = m1[2] = m1[4] = 0;
2013-04-05 10:07:37 +00:00
tm_multiply(m2, state->getCTM(), state->getTextMat());
tm_multiply(new_text_tm, m2, m1);
2012-08-14 06:35:55 +00:00
2013-02-04 10:13:04 +00:00
if(!tm_equal(new_text_tm, cur_text_tm))
2012-08-14 06:35:55 +00:00
{
2012-08-19 10:02:18 +00:00
need_recheck_position = true;
2012-08-14 06:35:55 +00:00
need_rescale_font = true;
2013-02-04 10:13:04 +00:00
memcpy(cur_text_tm, new_text_tm, sizeof(cur_text_tm));
2012-08-14 06:35:55 +00:00
}
}
2012-10-02 06:19:20 +00:00
// draw_text_tm, draw_text_scale
// depends: font size & ctm & text_ctm & hori scale
2012-08-14 06:35:55 +00:00
if(need_rescale_font)
{
2013-02-04 10:13:04 +00:00
/*
* Rescale the font
* If the font-size is 1, and the matrix is [10,0,0,10,0,0], we would like to change it to
* font-size == 10 and matrix == [1,0,0,1,0,0],
* such that it will be easy and natrual for web browsers
*/
2012-10-01 17:59:04 +00:00
double new_draw_text_tm[6];
memcpy(new_draw_text_tm, cur_text_tm, sizeof(new_draw_text_tm));
2012-08-14 06:35:55 +00:00
2013-02-04 10:13:04 +00:00
// see how the tm (together with text_scale_factor2) would change the vector (0,1)
2012-11-29 10:16:05 +00:00
double new_draw_text_scale = 1.0/text_scale_factor2 * hypot(new_draw_text_tm[2], new_draw_text_tm[3]);
2012-08-14 06:35:55 +00:00
double new_draw_font_size = cur_font_size;
2012-11-29 10:16:05 +00:00
if(is_positive(new_draw_text_scale))
2012-08-14 06:35:55 +00:00
{
2013-02-04 10:13:04 +00:00
// scale both font size and matrix
2012-10-02 06:19:20 +00:00
new_draw_font_size *= new_draw_text_scale;
2012-08-14 06:35:55 +00:00
for(int i = 0; i < 4; ++i)
2012-10-02 06:19:20 +00:00
new_draw_text_tm[i] /= new_draw_text_scale;
2012-08-14 06:35:55 +00:00
}
else
{
2012-10-02 06:19:20 +00:00
new_draw_text_scale = 1.0;
}
2012-12-07 12:14:51 +00:00
if(!is_positive(new_draw_font_size))
{
2013-02-04 10:13:04 +00:00
// CSS cannot handle flipped pages
2013-03-24 11:06:47 +00:00
new_draw_font_size *= -1;
for(int i = 0; i < 4; ++i)
new_draw_text_tm[i] *= -1;
}
2012-11-29 10:16:05 +00:00
if(!(equal(new_draw_text_scale, draw_text_scale)))
{
2012-10-02 06:19:20 +00:00
draw_text_scale_changed = true;
draw_text_scale = new_draw_text_scale;
2012-08-14 06:35:55 +00:00
}
2013-04-04 13:19:28 +00:00
if(!equal(new_draw_font_size, cur_html_state.font_size))
2012-08-14 06:35:55 +00:00
{
2012-10-02 18:19:40 +00:00
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
2013-04-04 13:19:28 +00:00
cur_html_state.font_size = new_draw_font_size;
2012-08-14 06:35:55 +00:00
}
2013-04-04 13:19:28 +00:00
if(!tm_equal(new_draw_text_tm, cur_html_state.transform_matrix, 4))
2012-08-14 06:35:55 +00:00
{
2012-10-02 18:19:40 +00:00
new_line_state = max<NewLineState>(new_line_state, NLS_DIV);
2013-04-04 13:19:28 +00:00
memcpy(cur_html_state.transform_matrix, new_draw_text_tm, sizeof(cur_html_state.transform_matrix));
}
}
2013-03-24 09:18:13 +00:00
// see if the new line is compatible with the current line with proper position shift
2013-03-24 15:37:41 +00:00
// don't bother doing the heavy job when (new_line_state == NLS_DIV)
2013-04-05 10:07:37 +00:00
// depends: text position & transformation
2013-03-24 13:42:51 +00:00
if(need_recheck_position && (new_line_state < NLS_DIV))
2012-08-19 10:02:18 +00:00
{
2013-04-05 10:32:22 +00:00
// TM[4] and/or TM[5] have been changed
// To find an offset (dx,dy), which would cancel the effect
2012-08-19 20:30:05 +00:00
/*
2013-03-24 09:18:13 +00:00
* CurTM * (cur_tx, cur_ty, 1)^T = OldTM * (draw_tx + dx, draw_ty + dy, 1)^T
2012-08-19 20:30:05 +00:00
*
* the first 4 elements of CurTM and OldTM should be the proportional
2013-03-24 09:18:13 +00:00
* otherwise the following text cannot be parallel
2012-08-19 20:30:05 +00:00
*
* NOTE:
* dx,dy are handled by the old state. so they should be multiplied by old_draw_text_scale
2012-08-19 20:30:05 +00:00
*/
2012-08-31 15:46:59 +00:00
bool merged = false;
2013-03-24 09:18:13 +00:00
double dx = 0;
double dy = 0;
if(tm_equal(old_html_state.transform_matrix, cur_html_state.transform_matrix, 4))
2012-08-19 10:02:18 +00:00
{
double det = old_tm[0] * old_tm[3] - old_tm[1] * old_tm[2];
if(!equal(det, 0))
2012-08-19 10:02:18 +00:00
{
double lhs1 = cur_text_tm[0] * cur_tx + cur_text_tm[2] * cur_ty + cur_text_tm[4] - old_tm[0] * draw_tx - old_tm[2] * draw_ty - old_tm[4];
double lhs2 = cur_text_tm[1] * cur_tx + cur_text_tm[3] * cur_ty + cur_text_tm[5] - old_tm[1] * draw_tx - old_tm[3] * draw_ty - old_tm[5];
2013-04-05 10:27:09 +00:00
/*
* Now the equation system becomes
*
* lhs1 = OldTM[0] * dx + OldTM[2] * dy
* lhs2 = OldTM[1] * dx + OldTM[3] * dy
*/
double inverted[4];
inverted[0] = old_tm[3] / det;
inverted[1] = -old_tm[1] / det;
inverted[2] = -old_tm[2] / det;
inverted[3] = old_tm[0] / det;
dx = inverted[0] * lhs1 + inverted[2] * lhs2;
dy = inverted[1] * lhs1 + inverted[3] * lhs2;
if(equal(dy, 0))
2012-08-31 15:46:59 +00:00
{
2013-04-05 13:53:34 +00:00
// text on a same horizontal line, we can insert positive or negaive x-offsets
2012-08-31 15:46:59 +00:00
merged = true;
2012-08-19 20:30:05 +00:00
}
2013-04-05 13:53:34 +00:00
else
{
// otherwise we merge the lines only when
// - text are not shifted to the left too much
// - text are not moved too high or too low
if((dx * old_draw_text_scale) >= -(old_html_state.font_info->ascent - old_html_state.font_info->descent) * old_html_state.font_size - EPS)
2013-04-05 13:53:34 +00:00
{
double oldymin = old_html_state.font_info->descent * old_html_state.font_size;
double oldymax = old_html_state.font_info->ascent * old_html_state.font_size;
double ymin = dy * old_draw_text_scale + cur_html_state.font_info->descent * cur_html_state.font_size;
double ymax = dy * old_draw_text_scale + cur_html_state.font_info->ascent * cur_html_state.font_size;
2013-04-05 13:53:34 +00:00
if((ymin <= oldymax + EPS) && (ymax >= oldymin - EPS))
{
merged = true;
}
}
}
2012-08-19 10:02:18 +00:00
}
2012-08-31 15:46:59 +00:00
//else no solution
2012-08-19 10:02:18 +00:00
}
2012-09-18 08:02:17 +00:00
// else force new line
2012-08-31 15:46:59 +00:00
2013-03-24 09:18:13 +00:00
if(merged)
{
2013-04-06 15:51:33 +00:00
text_lines.back()->append_offset(dx * old_draw_text_scale);
2013-04-05 13:53:34 +00:00
if(equal(dy, 0))
{
cur_html_state.vertical_align = 0;
}
else
{
cur_html_state.vertical_align = (dy * old_draw_text_scale);
2013-04-05 13:53:34 +00:00
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
}
2013-03-24 13:42:51 +00:00
draw_tx = cur_tx;
2013-03-24 09:18:13 +00:00
draw_ty = cur_ty;
}
else
2012-08-19 10:02:18 +00:00
{
2012-10-02 18:19:40 +00:00
new_line_state = max<NewLineState>(new_line_state, NLS_DIV);
2012-08-19 10:02:18 +00:00
}
}
// letter space
2012-10-02 06:19:20 +00:00
// depends: draw_text_scale
2013-04-04 13:19:28 +00:00
if(all_changed || letter_space_changed || draw_text_scale_changed)
{
2013-04-04 13:19:28 +00:00
double new_letter_space = state->getCharSpace() * draw_text_scale;
if(!equal(new_letter_space, cur_html_state.letter_space))
{
cur_html_state.letter_space = new_letter_space;
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
}
}
// word space
2012-10-02 06:19:20 +00:00
// depends draw_text_scale
2013-04-04 13:19:28 +00:00
if(all_changed || word_space_changed || draw_text_scale_changed)
{
2013-04-04 13:19:28 +00:00
double new_word_space = state->getWordSpace() * draw_text_scale;
if(!equal(new_word_space, cur_html_state.word_space))
{
cur_html_state.word_space = new_word_space;
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
}
2012-08-14 06:35:55 +00:00
}
2013-02-05 13:56:19 +00:00
// fill color
2013-04-06 09:01:05 +00:00
if((!(param.fallback)) && (all_changed || fill_color_changed))
{
2013-03-25 14:53:39 +00:00
// * PDF Spec. Table 106 –Text rendering modes
static const char FILL[8] = { true, false, true, false, true, false, true, false };
2013-01-31 22:21:57 +00:00
2013-02-02 12:32:09 +00:00
int idx = state->getRender();
assert((idx >= 0) && (idx < 8));
2013-04-04 13:19:28 +00:00
Color new_fill_color;
2013-02-05 13:56:19 +00:00
if(FILL[idx])
{
2013-04-04 13:19:28 +00:00
new_fill_color.transparent = false;
state->getFillRGB(&new_fill_color.rgb);
2013-02-02 12:32:09 +00:00
}
else
{
2013-04-04 13:19:28 +00:00
new_fill_color.transparent = true;
2013-01-31 22:21:57 +00:00
}
2013-04-04 13:19:28 +00:00
if(!(new_fill_color == cur_html_state.fill_color))
{
cur_html_state.fill_color = new_fill_color;
2013-02-05 13:56:19 +00:00
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
2013-04-04 13:19:28 +00:00
}
2013-02-05 13:56:19 +00:00
}
// stroke color
2013-04-06 09:01:05 +00:00
if((!(param.fallback)) && (all_changed || stroke_color_changed))
2013-02-05 13:56:19 +00:00
{
// * PDF Spec. Table 106 – Text rendering modes
static const char STROKE[8] = { false, true, true, false, false, true, true, false };
2013-01-31 22:21:57 +00:00
2013-02-05 13:56:19 +00:00
int idx = state->getRender();
assert((idx >= 0) && (idx < 8));
2013-04-04 13:19:28 +00:00
Color new_stroke_color;
2013-01-31 22:21:57 +00:00
// stroke
2013-02-05 13:56:19 +00:00
if(STROKE[idx])
2013-01-31 22:21:57 +00:00
{
2013-04-04 13:19:28 +00:00
new_stroke_color.transparent = false;
state->getStrokeRGB(&new_stroke_color.rgb);
2013-01-31 22:21:57 +00:00
}
2013-02-02 12:32:09 +00:00
else
{
2013-04-04 13:19:28 +00:00
new_stroke_color.transparent = true;
}
2013-04-04 13:19:28 +00:00
if(!(new_stroke_color == cur_html_state.stroke_color))
{
cur_html_state.stroke_color = new_stroke_color;
2013-02-05 13:56:19 +00:00
new_line_state = max<NewLineState>(new_line_state, NLS_SPAN);
2013-04-04 13:19:28 +00:00
}
}
2012-08-14 06:35:55 +00:00
reset_state_change();
}
2012-10-01 17:59:04 +00:00
void HTMLRenderer::prepare_text_line(GfxState * state)
{
2012-09-04 15:33:15 +00:00
if(!line_opened)
{
2012-09-09 06:48:10 +00:00
new_line_state = NLS_DIV;
}
2012-09-04 15:33:15 +00:00
2012-09-09 06:48:10 +00:00
if(new_line_state == NLS_DIV)
{
2012-10-01 17:59:04 +00:00
close_text_line();
2013-04-04 13:19:28 +00:00
// update position such that they will be recorded by text_line_buf
state->transform(state->getCurX(), state->getCurY(), &cur_html_state.x, &cur_html_state.y);
2013-04-05 13:53:34 +00:00
cur_html_state.vertical_align = 0;
2012-09-04 15:33:15 +00:00
//resync position
draw_ty = cur_ty;
draw_tx = cur_tx;
2012-08-24 17:15:15 +00:00
}
2012-09-04 15:33:15 +00:00
else
2012-08-24 17:15:15 +00:00
{
// align horizontal position
// try to merge with the last line if possible
2012-10-02 06:19:20 +00:00
double target = (cur_tx - draw_tx) * draw_text_scale;
2013-04-04 04:39:49 +00:00
if(!equal(target, 0))
2012-08-24 17:15:15 +00:00
{
2013-04-06 15:51:33 +00:00
text_lines.back()->append_offset(target);
2012-10-02 06:19:20 +00:00
draw_tx += target / draw_text_scale;
2012-08-24 17:15:15 +00:00
}
}
2012-09-09 06:48:10 +00:00
if(new_line_state != NLS_NONE)
{
2013-04-06 15:51:33 +00:00
text_lines.back()->append_state(cur_html_state);
}
2012-09-04 15:33:15 +00:00
line_opened = true;
2012-08-14 06:35:55 +00:00
}
2012-09-04 15:33:15 +00:00
2012-10-01 17:59:04 +00:00
void HTMLRenderer::close_text_line()
2012-08-14 06:35:55 +00:00
{
2012-09-04 15:33:15 +00:00
if(line_opened)
{
line_opened = false;
2013-04-06 15:51:33 +00:00
text_lines.emplace_back(new HTMLTextLine(param, all_manager));
2012-09-04 15:33:15 +00:00
}
}
2012-09-12 15:26:14 +00:00
} //namespace pdf2htmlEX