1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-09-19 21:50:08 +00:00
pdf2htmlEX/src/HTMLRenderer/text.cc

130 lines
3.1 KiB
C++
Raw Normal View History

2012-08-14 08:23:15 +00:00
/*
* text.cc
2012-08-14 08:23:15 +00:00
*
* Handling text & font, and relative stuffs
2012-08-14 08:23:15 +00:00
*
2012-10-05 15:38:17 +00:00
* Copyright (C) 2012 Lu Wang <coolwanglu@gmail.com>
2012-08-14 08:23:15 +00:00
*/
2013-02-05 14:07:51 +00:00
#include <algorithm>
2012-08-14 08:23:15 +00:00
#include "HTMLRenderer.h"
2012-12-11 12:48:01 +00:00
#include "TextLineBuffer.h"
2012-11-29 09:28:05 +00:00
#include "util/namespace.h"
2012-11-29 09:45:26 +00:00
#include "util/unicode.h"
2012-08-14 08:23:15 +00:00
2012-09-12 15:26:14 +00:00
namespace pdf2htmlEX {
2012-09-06 07:09:47 +00:00
using std::all_of;
2012-11-29 10:28:07 +00:00
using std::cerr;
using std::endl;
2012-08-20 21:48:21 +00:00
2012-08-14 08:23:15 +00:00
void HTMLRenderer::drawString(GfxState * state, GooString * s)
{
if(s->getLength() == 0)
return;
auto font = state->getFont();
2013-02-05 05:57:11 +00:00
double cur_letter_space = state->getCharSpace();
2013-02-05 06:21:07 +00:00
double cur_word_space = state->getWordSpace();
// Writing mode fonts and Type 3 fonts are rendered as images
// I don't find a way to display writing mode fonts in HTML except for one div for each character, which is too costly
// For type 3 fonts, due to the font matrix, still it's hard to show it on HTML
if( (font == nullptr)
|| (font->getWMode())
|| (font->getType() == fontType3)
)
2012-08-14 08:23:15 +00:00
{
return;
}
// see if the line has to be closed due to state change
check_state_change(state);
2012-10-01 17:59:04 +00:00
prepare_text_line(state);
2012-08-14 08:23:15 +00:00
// Now ready to output
// get the unicodes
char *p = s->getCString();
int len = s->getLength();
double dx = 0;
double dy = 0;
double dx1,dy1;
double ox, oy;
int nChars = 0;
int nSpaces = 0;
int uLen;
CharCode code;
Unicode *u = nullptr;
2012-11-30 09:33:27 +00:00
while (len > 0)
{
2012-08-14 08:23:15 +00:00
auto n = font->getNextChar(p, len, &code, &u, &uLen, &dx1, &dy1, &ox, &oy);
2012-09-17 18:37:30 +00:00
2012-11-29 10:16:05 +00:00
if(!(equal(ox, 0) && equal(oy, 0)))
2012-08-14 08:23:15 +00:00
{
2012-08-14 09:13:29 +00:00
cerr << "TODO: non-zero origins" << endl;
2012-08-14 08:23:15 +00:00
}
2012-09-07 00:39:21 +00:00
bool is_space = false;
2012-08-19 20:50:28 +00:00
if (n == 1 && *p == ' ')
{
++nSpaces;
2012-09-07 00:39:21 +00:00
is_space = true;
2012-08-19 20:50:28 +00:00
}
2012-08-24 06:21:20 +00:00
2012-09-07 00:39:21 +00:00
if(is_space && (param->space_as_offset))
2012-09-06 07:09:47 +00:00
{
2012-09-07 00:39:21 +00:00
// ignore horiz_scaling, as it's merged in CTM
2012-12-11 12:48:01 +00:00
text_line_buf->append_offset((dx1 * cur_font_size + cur_letter_space + cur_word_space) * draw_text_scale);
2012-09-06 07:09:47 +00:00
}
else
{
if((param->decompose_ligature) && (uLen > 1) && all_of(u, u+uLen, isLegalUnicode))
2012-09-07 00:39:21 +00:00
{
2012-12-11 12:48:01 +00:00
text_line_buf->append_unicodes(u, uLen);
2012-09-07 00:39:21 +00:00
}
else
{
if(cur_font_info->use_tounicode)
{
Unicode uu = check_unicode(u, uLen, code, font);
2012-12-11 12:48:01 +00:00
text_line_buf->append_unicodes(&uu, 1);
}
else
{
Unicode uu = unicode_from_font(code, font);
2012-12-11 12:48:01 +00:00
text_line_buf->append_unicodes(&uu, 1);
}
2012-09-07 00:39:21 +00:00
}
2012-09-06 07:09:47 +00:00
}
2012-08-14 08:23:15 +00:00
2012-08-23 20:36:27 +00:00
dx += dx1;
dy += dy1;
2012-08-14 08:23:15 +00:00
++nChars;
p += n;
len -= n;
}
2012-09-04 04:54:47 +00:00
double hs = state->getHorizScaling();
2012-08-21 20:34:39 +00:00
// horiz_scaling is merged into ctm now,
2012-08-21 19:44:48 +00:00
// so the coordinate system is ugly
2012-09-04 04:54:47 +00:00
dx = (dx * cur_font_size + nChars * cur_letter_space + nSpaces * cur_word_space) * hs;
2012-08-14 08:23:15 +00:00
2012-09-04 04:54:47 +00:00
dy *= cur_font_size;
2012-08-14 08:23:15 +00:00
cur_tx += dx;
cur_ty += dy;
2013-01-26 11:45:48 +00:00
draw_tx += dx;
2012-08-14 08:23:15 +00:00
draw_ty += dy;
}
2012-09-12 15:26:14 +00:00
} // namespace pdf2htmlEX