/* * text.ccc * * Handling text and relative stuffs * * by WangLu * 2012.08.14 */ #include #include #include "HTMLRenderer.h" std::string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id) { // mupdf consulted Object ref_obj, font_obj, font_obj2, fontdesc_obj; Object obj, obj1, obj2; Dict * dict = nullptr; std::string suffix, subtype; char buf[1024]; int len; ofstream outf; auto * id = font->getID(); ref_obj.initRef(id->num, id->gen); ref_obj.fetch(xref, &font_obj); ref_obj.free(); if(!font_obj.isDict()) { std::cerr << "Font object is not a dictionary" << std::endl; goto err; } dict = font_obj.getDict(); if(dict->lookup("DescendantFonts", &font_obj2)->isArray()) { if(font_obj2.arrayGetLength() == 0) { std::cerr << "Warning: empty DescendantFonts array" << std::endl; } else { if(font_obj2.arrayGetLength() > 1) std::cerr << "TODO: multiple entries in DescendantFonts array" << std::endl; if(font_obj2.arrayGet(0, &obj2)->isDict()) { dict = obj2.getDict(); } } } if(!dict->lookup("FontDescriptor", &fontdesc_obj)->isDict()) { std::cerr << "Cannot find FontDescriptor " << std::endl; goto err; } dict = fontdesc_obj.getDict(); if(dict->lookup("FontFile3", &obj)->isStream()) { if(obj.streamGetDict()->lookup("Subtype", &obj1)->isName()) { subtype = obj1.getName(); if(subtype == "Type1C") { suffix = ".cff"; } else if (subtype == "CIDFontType0C") { suffix = ".cid"; } else { std::cerr << "Unknown subtype: " << subtype << std::endl; goto err; } } else { std::cerr << "Invalid subtype in font descriptor" << std::endl; goto err; } } else if (dict->lookup("FontFile2", &obj)->isStream()) { suffix = ".ttf"; } else if (dict->lookup("FontFile", &obj)->isStream()) { suffix = ".ttf"; } else { std::cerr << "Cannot find FontFile for dump" << std::endl; goto err; } if(suffix == "") { std::cerr << "Font type unrecognized" << std::endl; goto err; } obj.streamReset(); outf.open((boost::format("%1%/f%|2$x|%3%")%TMP_DIR%fn_id%suffix).str().c_str(), ofstream::binary); while((len = obj.streamGetChars(1024, (Guchar*)buf)) > 0) { outf.write(buf, len); } outf.close(); obj.streamClose(); err: obj2.free(); obj1.free(); obj.free(); fontdesc_obj.free(); font_obj2.free(); font_obj.free(); return suffix; } void HTMLRenderer::drawString(GfxState * state, GooString * s) { if(s->getLength() == 0) return; auto font = state->getFont(); if((font == nullptr) || (font->getWMode())) { return; } //hidden if((state->getRender() & 3) == 3) { return; } // see if the line has to be closed due to state change check_state_change(state); // if the line is still open, try to merge with it if(line_opened) { double target = (cur_tx - draw_tx) * draw_scale; if(target > -param->h_eps) { if(target > param->h_eps) { double w; auto wid = install_whitespace(target, w); html_fout << boost::format(" ") % wid; draw_tx += w / draw_scale; } } else { // or can we shift left using simple tags? close_cur_line(); } } if(!line_opened) { // have to open a new line // classes html_fout << "
transform(state->getCurX(), state->getCurY(), &x, &y); // TODO: recheck descent/ascent html_fout << "\" style=\"" << "bottom:" << (y + state->getFont()->getDescent() * draw_font_size) << "px;" << "top:" << (pageHeight - y - state->getFont()->getAscent() * draw_font_size) << "px;" << "left:" << x << "px;" ; } // TODO: tracking // letter & word spacing if(_is_positive(state->getCharSpace())) html_fout << "letter-spacing:" << state->getCharSpace() << "px;"; if(_is_positive(state->getWordSpace())) html_fout << "word-spacing:" << state->getWordSpace() << "px;"; //debug //real pos & hori_scale if(0) { #if 0 html_fout << "\""; double x,y; state->transform(state->getCurX(), state->getCurY(), &x, &y); html_fout << boost::format("data-lx=\"%5%\" data-ly=\"%6%\" data-drawscale=\"%4%\" data-x=\"%1%\" data-y=\"%2%\" data-hs=\"%3%") %x%y%(state->getHorizScaling())%draw_scale%state->getLineX()%state->getLineY(); #endif } html_fout << "\">"; line_opened = true; draw_tx = cur_tx; } // Now ready to output // get the unicodes char *p = s->getCString(); int len = s->getLength(); double dx = 0; double dy = 0; double dx1,dy1; double ox, oy; int nChars = 0; int nSpaces = 0; int uLen; CharCode code; Unicode *u = nullptr; while (len > 0) { auto n = font->getNextChar(p, len, &code, &u, &uLen, &dx1, &dy1, &ox, &oy); if(!(_equal(ox, 0) && _equal(oy, 0))) { std::cerr << "TODO: non-zero origins" << std::endl; } if(uLen == 0) { // TODO #if 0 CharCode c = 0; for(int i = 0; i < n; ++i) { c = (c<<8) | (code&0xff); code >>= 8; } for(int i = 0; i < n; ++i) { Unicode u = (c&0xff); c >>= 8; outputUnicodes(html_fout, &u, 1); } #endif } else { outputUnicodes(html_fout, u, uLen); } dx += dx1; dy += dy1; if (n == 1 && *p == ' ') { ++nSpaces; } ++nChars; p += n; len -= n; } dx = (dx * state->getFontSize() + nChars * state->getCharSpace() + nSpaces * state->getWordSpace()) * state->getHorizScaling(); dy *= state->getFontSize(); cur_tx += dx; cur_ty += dy; draw_tx += dx; draw_ty += dy; }