/* * text.cc * * Handling text & font, and relative stuffs * * Copyright (C) 2012 Lu Wang */ #include #include "HTMLRenderer.h" #include "TextLineBuffer.h" #include "util/namespace.h" #include "util/unicode.h" namespace pdf2htmlEX { using std::all_of; using std::cerr; using std::endl; void HTMLRenderer::drawString(GfxState * state, GooString * s) { if(s->getLength() == 0) return; auto font = state->getFont(); double cur_letter_space = state->getCharSpace(); double cur_word_space = state->getWordSpace(); // Writing mode fonts and Type 3 fonts are rendered as images // I don't find a way to display writing mode fonts in HTML except for one div for each character, which is too costly // For type 3 fonts, due to the font matrix, still it's hard to show it on HTML if( (font == nullptr) || (font->getWMode()) || (font->getType() == fontType3) ) { return; } //hidden if((state->getRender() & 3) == 3) { return; } // see if the line has to be closed due to state change check_state_change(state); prepare_text_line(state); // Now ready to output // get the unicodes char *p = s->getCString(); int len = s->getLength(); double dx = 0; double dy = 0; double dx1,dy1; double ox, oy; int nChars = 0; int nSpaces = 0; int uLen; CharCode code; Unicode *u = nullptr; while (len > 0) { auto n = font->getNextChar(p, len, &code, &u, &uLen, &dx1, &dy1, &ox, &oy); if(!(equal(ox, 0) && equal(oy, 0))) { cerr << "TODO: non-zero origins" << endl; } bool is_space = false; if (n == 1 && *p == ' ') { ++nSpaces; is_space = true; } if(is_space && (param->space_as_offset)) { // ignore horiz_scaling, as it's merged in CTM text_line_buf->append_offset((dx1 * cur_font_size + cur_letter_space + cur_word_space) * draw_text_scale); } else { if((param->decompose_ligature) && (uLen > 1) && all_of(u, u+uLen, isLegalUnicode)) { text_line_buf->append_unicodes(u, uLen); } else { if(cur_font_info->use_tounicode) { Unicode uu = check_unicode(u, uLen, code, font); text_line_buf->append_unicodes(&uu, 1); } else { Unicode uu = unicode_from_font(code, font); text_line_buf->append_unicodes(&uu, 1); } } } dx += dx1; dy += dy1; ++nChars; p += n; len -= n; } double hs = state->getHorizScaling(); // horiz_scaling is merged into ctm now, // so the coordinate system is ugly dx = (dx * cur_font_size + nChars * cur_letter_space + nSpaces * cur_word_space) * hs; dy *= cur_font_size; cur_tx += dx; cur_ty += dy; draw_tx += dx; draw_ty += dy; } } // namespace pdf2htmlEX