diff --git a/CMakeLists.txt b/CMakeLists.txt index 42b9ea4..8a0d41f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,9 +24,12 @@ add_executable(pdf2htmlEX src/pdf2htmlEX.cc src/HTMLRenderer.h src/HTMLRenderer.cc + src/HTMLRenderer/general.cc src/HTMLRenderer/state.cc src/HTMLRenderer/install.cc src/HTMLRenderer/export.cc + src/HTMLRenderer/text.cc + src/HTMLRenderer/image.cc src/BackgroundRenderer.h src/BackgroundRenderer.cc src/Consts.h diff --git a/README.md b/README.md index 546e6ee..197f1e7 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Not supported yet Dependency ---------------------------- * libpoppler with xpdf header >= 0.20.2 -* boost c++ library (format, program options, gil) +* boost c++ library (format, program options, gil, filesystem) * fontforge **Please use [the lastest version](https://github.com/fontforge/fontforge)** HOW TO COMPILE diff --git a/src/HTMLRenderer.cc b/src/HTMLRenderer.cc deleted file mode 100644 index c1e7b84..0000000 --- a/src/HTMLRenderer.cc +++ /dev/null @@ -1,383 +0,0 @@ -/* - * HTMLRenderer.cc - * - * Copyright (C) 2011 by Hongliang TIAN(tatetian@gmail.com) - * Copyright (C) 2012 by Lu Wang coolwanglugmail.com - */ - -/* - * TODO - * font base64 embedding - */ - -#include -#include -#include -#include - -#include -#include -#include -// for gil bug -const int *int_p_NULL = nullptr; -#include -#include - -#include -#include -#include -#include -#include - -#include "HTMLRenderer.h" -#include "BackgroundRenderer.h" -#include "Consts.h" -#include "util.h" -#include "config.h" - -/* - * CSS classes - * - * p - Page - * l - Line - * w - White space - * i - Image - * - * - * Reusable CSS classes - * - * f - Font (also for font names) - * s - font Size - * w - White space - * t - Transform matrix - * c - Color - * - */ - -HTMLRenderer::HTMLRenderer(const Param * param) - :line_opened(false) - ,html_fout(param->output_filename.c_str(), ofstream::binary) - ,allcss_fout("all.css") - ,fontscript_fout(TMP_DIR+"/convert.pe") - ,image_count(0) - ,param(param) -{ - // install default font & size - install_font(nullptr); - install_font_size(0); - - install_transform_matrix(id_matrix); - - GfxRGB black; - black.r = black.g = black.b = 0; - install_color(&black); -} - -HTMLRenderer::~HTMLRenderer() -{ -} - -void HTMLRenderer::process(PDFDoc *doc) -{ - std::cerr << "Processing Text: "; - write_html_head(); - xref = doc->getXRef(); - for(int i = param->first_page; i <= param->last_page ; ++i) - { - doc->displayPage(this, i, param->h_dpi, param->v_dpi, - 0, true, false, false, - nullptr, nullptr, nullptr, nullptr); - - std::cerr << "."; - std::cerr.flush(); - } - write_html_tail(); - std::cerr << std::endl; - - if(param->process_nontext) - { - // Render non-text objects as image - std::cerr << "Processing Others: "; - // copied from poppler - SplashColor color; - color[0] = color[1] = color[2] = 255; - - auto bg_renderer = new BackgroundRenderer(splashModeRGB8, 4, gFalse, color); - bg_renderer->startDoc(doc); - - for(int i = param->first_page; i <= param->last_page ; ++i) - { - doc->displayPage(bg_renderer, i, param->h_dpi2, param->v_dpi2, - 0, true, false, false, - nullptr, nullptr, nullptr, nullptr); - bg_renderer->getBitmap()->writeImgFile(splashFormatPng, (char*)(boost::format("p%|1$x|.png")%i).str().c_str(), param->h_dpi2, param->v_dpi2); - - std::cerr << "."; - std::cerr.flush(); - } - delete bg_renderer; - std::cerr << std::endl; - } -} - -void HTMLRenderer::write_html_head() -{ - html_fout << boost::filesystem::ifstream(PDF2HTMLEX_LIB_PATH / "head.html", ifstream::binary).rdbuf(); -} - -void HTMLRenderer::write_html_tail() -{ - html_fout << boost::filesystem::ifstream(PDF2HTMLEX_LIB_PATH / "tail.html", ifstream::binary).rdbuf(); -} - -void HTMLRenderer::startPage(int pageNum, GfxState *state) -{ - this->pageNum = pageNum; - this->pageWidth = state->getPageWidth(); - this->pageHeight = state->getPageHeight(); - - assert(!line_opened); - - html_fout << boost::format("
" << endl; - - cur_fn_id = cur_fs_id = cur_tm_id = cur_color_id = 0; - cur_tx = cur_ty = 0; - cur_font_size = 0; - - memcpy(cur_ctm, id_matrix, sizeof(cur_ctm)); - memcpy(draw_ctm, id_matrix, sizeof(draw_ctm)); - draw_font_size = 0; - draw_scale = 1.0; - draw_tx = draw_ty = 0; - - cur_color.r = cur_color.g = cur_color.b = 0; - - reset_state_track(); -} - -void HTMLRenderer::endPage() { - close_cur_line(); - // close page - html_fout << "
" << endl; -} - - -void HTMLRenderer::drawString(GfxState * state, GooString * s) -{ - if(s->getLength() == 0) - return; - - auto font = state->getFont(); - if((font == nullptr) || (font->getWMode())) - { - return; - } - - //hidden - if((state->getRender() & 3) == 3) - { - return; - } - - // see if the line has to be closed due to state change - check_state_change(state); - - // if the line is still open, try to merge with it - if(line_opened) - { - double target = (cur_tx - draw_tx) * draw_scale; - if(target > -param->h_eps) - { - if(target > param->h_eps) - { - double w; - auto wid = install_whitespace(target, w); - html_fout << boost::format(" ") % wid; - draw_tx += w / draw_scale; - } - } - else - { - // or can we shift left using simple tags? - close_cur_line(); - } - } - - if(!line_opened) - { - // have to open a new line - - // classes - html_fout << "
transform(state->getCurX(), state->getCurY(), &x, &y); - // TODO: recheck descent/ascent - html_fout << "\" style=\"" - << "bottom:" << (y + state->getFont()->getDescent() * draw_font_size) << "px;" - << "top:" << (pageHeight - y - state->getFont()->getAscent() * draw_font_size) << "px;" - << "left:" << x << "px;" - ; - } - - // TODO: tracking - // letter & word spacing - if(_is_positive(state->getCharSpace())) - html_fout << "letter-spacing:" << state->getCharSpace() << "px;"; - if(_is_positive(state->getWordSpace())) - html_fout << "word-spacing:" << state->getWordSpace() << "px;"; - - //debug - //real pos & hori_scale - if(0) - { -#if 0 - html_fout << "\""; - double x,y; - state->transform(state->getCurX(), state->getCurY(), &x, &y); - html_fout << boost::format("data-lx=\"%5%\" data-ly=\"%6%\" data-drawscale=\"%4%\" data-x=\"%1%\" data-y=\"%2%\" data-hs=\"%3%") - %x%y%(state->getHorizScaling())%draw_scale%state->getLineX()%state->getLineY(); -#endif - } - - html_fout << "\">"; - - line_opened = true; - - draw_tx = cur_tx; - } - - - // Now ready to output - // get the unicodes - char *p = s->getCString(); - int len = s->getLength(); - - double dx = 0; - double dy = 0; - double dx1,dy1; - double ox, oy; - - int nChars = 0; - int nSpaces = 0; - int uLen; - - CharCode code; - Unicode *u = nullptr; - - while (len > 0) { - auto n = font->getNextChar(p, len, &code, &u, &uLen, &dx1, &dy1, &ox, &oy); - - if(!(_equal(ox, 0) && _equal(oy, 0))) - { - std::cerr << "TODO: non-zero origins" << std::endl; - } - - if(uLen == 0) - { - // TODO -#if 0 - CharCode c = 0; - for(int i = 0; i < n; ++i) - { - c = (c<<8) | (code&0xff); - code >>= 8; - } - for(int i = 0; i < n; ++i) - { - Unicode u = (c&0xff); - c >>= 8; - outputUnicodes(html_fout, &u, 1); - } -#endif - } - else - { - outputUnicodes(html_fout, u, uLen); - } - - dx += dx1; - dy += dy1; - - if (n == 1 && *p == ' ') - { - ++nSpaces; - } - - ++nChars; - p += n; - len -= n; - } - - dx = (dx * state->getFontSize() - + nChars * state->getCharSpace() - + nSpaces * state->getWordSpace()) * state->getHorizScaling(); - - dy *= state->getFontSize(); - - cur_tx += dx; - cur_ty += dy; - - draw_tx += dx; - draw_ty += dy; -} - -void HTMLRenderer::drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg) -{ - if(maskColors) - return; - - boost::gil::rgb8_image_t img(width, height); - auto imgview = view(img); - auto loc = imgview.xy_at(0,0); - - ImageStream * img_stream = new ImageStream(str, width, colorMap->getNumPixelComps(), colorMap->getBits()); - img_stream->reset(); - - for(int i = 0; i < height; ++i) - { - auto p = img_stream->getLine(); - for(int j = 0; j < width; ++j) - { - GfxRGB rgb; - colorMap->getRGB(p, &rgb); - - *loc = boost::gil::rgb8_pixel_t(colToByte(rgb.r), colToByte(rgb.g), colToByte(rgb.b)); - - p += colorMap->getNumPixelComps(); - - ++ loc.x(); - } - - loc = imgview.xy_at(0, i+1); - } - - boost::gil::png_write_view((boost::format("i%|1$x|.png")%image_count).str(), imgview); - - img_stream->close(); - delete img_stream; - - close_cur_line(); - - double * ctm = state->getCTM(); - ctm[4] = ctm[5] = 0.0; - html_fout << boost::format("") % image_count % install_transform_matrix(ctm) % state->getCurX() % state->getCurY() % width % height << endl; - - - ++ image_count; -} - - - - - diff --git a/src/HTMLRenderer/export.cc b/src/HTMLRenderer/export.cc index f326a46..840e5cf 100644 --- a/src/HTMLRenderer/export.cc +++ b/src/HTMLRenderer/export.cc @@ -12,6 +12,26 @@ #include #include + +/* + * CSS classes + * + * p - Page + * l - Line + * w - White space + * i - Image + * + * + * Reusable CSS classes + * + * f - Font (also for font names) + * s - font Size + * w - White space + * t - Transform matrix + * c - Color + * + */ + void HTMLRenderer::export_remote_font(long long fn_id, const string & suffix, const string & format, GfxFont * font) { allcss_fout << boost::format("@font-face{font-family:f%|1$x|;src:url(f%|1$x|%2%)format(\"%3%\");}.f%|1$x|{font-family:f%|1$x|;") % fn_id % suffix % format; diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc new file mode 100644 index 0000000..60cd67c --- /dev/null +++ b/src/HTMLRenderer/general.cc @@ -0,0 +1,139 @@ +/* + * general.cc + * + * Hanlding general stuffs + * + * TODO: better name for this file? + * + * by WangLu + * 2012.08.14 + */ + +#include + +#include +#include + +#include + +#include "HTMLRenderer.h" +#include "BackgroundRenderer.h" +#include "config.h" + +HTMLRenderer::HTMLRenderer(const Param * param) + :line_opened(false) + ,html_fout(param->output_filename.c_str(), ofstream::binary) + ,allcss_fout("all.css") + ,fontscript_fout(TMP_DIR+"/convert.pe") + ,image_count(0) + ,param(param) +{ + // install default font & size + install_font(nullptr); + install_font_size(0); + + install_transform_matrix(id_matrix); + + GfxRGB black; + black.r = black.g = black.b = 0; + install_color(&black); +} + +HTMLRenderer::~HTMLRenderer() +{ } + +void HTMLRenderer::process(PDFDoc *doc) +{ + std::cerr << "Processing Text: "; + write_html_head(); + xref = doc->getXRef(); + for(int i = param->first_page; i <= param->last_page ; ++i) + { + doc->displayPage(this, i, param->h_dpi, param->v_dpi, + 0, true, false, false, + nullptr, nullptr, nullptr, nullptr); + + std::cerr << "."; + std::cerr.flush(); + } + write_html_tail(); + std::cerr << std::endl; + + if(param->process_nontext) + { + // Render non-text objects as image + std::cerr << "Processing Others: "; + // copied from poppler + SplashColor color; + color[0] = color[1] = color[2] = 255; + + auto bg_renderer = new BackgroundRenderer(splashModeRGB8, 4, gFalse, color); + bg_renderer->startDoc(doc); + + for(int i = param->first_page; i <= param->last_page ; ++i) + { + doc->displayPage(bg_renderer, i, param->h_dpi2, param->v_dpi2, + 0, true, false, false, + nullptr, nullptr, nullptr, nullptr); + bg_renderer->getBitmap()->writeImgFile(splashFormatPng, (char*)(boost::format("p%|1$x|.png")%i).str().c_str(), param->h_dpi2, param->v_dpi2); + + std::cerr << "."; + std::cerr.flush(); + } + delete bg_renderer; + std::cerr << std::endl; + } +} + +void HTMLRenderer::write_html_head() +{ + html_fout << boost::filesystem::ifstream(PDF2HTMLEX_LIB_PATH / "head.html", ifstream::binary).rdbuf(); +} + +void HTMLRenderer::write_html_tail() +{ + html_fout << boost::filesystem::ifstream(PDF2HTMLEX_LIB_PATH / "tail.html", ifstream::binary).rdbuf(); +} + +void HTMLRenderer::startPage(int pageNum, GfxState *state) +{ + this->pageNum = pageNum; + this->pageWidth = state->getPageWidth(); + this->pageHeight = state->getPageHeight(); + + assert(!line_opened); + + html_fout << boost::format("
" << endl; + + cur_fn_id = cur_fs_id = cur_tm_id = cur_color_id = 0; + cur_tx = cur_ty = 0; + cur_font_size = 0; + + memcpy(cur_ctm, id_matrix, sizeof(cur_ctm)); + memcpy(draw_ctm, id_matrix, sizeof(draw_ctm)); + draw_font_size = 0; + draw_scale = 1.0; + draw_tx = draw_ty = 0; + + cur_color.r = cur_color.g = cur_color.b = 0; + + reset_state_track(); +} + +void HTMLRenderer::endPage() { + close_cur_line(); + // close page + html_fout << "
" << endl; +} + + + + + + + + diff --git a/src/HTMLRenderer/image.cc b/src/HTMLRenderer/image.cc new file mode 100644 index 0000000..aac3df0 --- /dev/null +++ b/src/HTMLRenderer/image.cc @@ -0,0 +1,62 @@ +/* + * image.cc + * + * Handling images + * + * by WangLu + * 2012.08.14 + */ + +#include +// for gil bug +const int *int_p_NULL = nullptr; +#include +#include + +#include "HTMLRenderer.h" + + +void HTMLRenderer::drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg) +{ + if(maskColors) + return; + + boost::gil::rgb8_image_t img(width, height); + auto imgview = view(img); + auto loc = imgview.xy_at(0,0); + + ImageStream * img_stream = new ImageStream(str, width, colorMap->getNumPixelComps(), colorMap->getBits()); + img_stream->reset(); + + for(int i = 0; i < height; ++i) + { + auto p = img_stream->getLine(); + for(int j = 0; j < width; ++j) + { + GfxRGB rgb; + colorMap->getRGB(p, &rgb); + + *loc = boost::gil::rgb8_pixel_t(colToByte(rgb.r), colToByte(rgb.g), colToByte(rgb.b)); + + p += colorMap->getNumPixelComps(); + + ++ loc.x(); + } + + loc = imgview.xy_at(0, i+1); + } + + boost::gil::png_write_view((boost::format("i%|1$x|.png")%image_count).str(), imgview); + + img_stream->close(); + delete img_stream; + + close_cur_line(); + + double * ctm = state->getCTM(); + ctm[4] = ctm[5] = 0.0; + html_fout << boost::format("") % image_count % install_transform_matrix(ctm) % state->getCurX() % state->getCurY() % width % height << endl; + + + ++ image_count; +} diff --git a/src/HTMLRenderer/install.cc b/src/HTMLRenderer/install.cc index 1ece909..1c31236 100644 --- a/src/HTMLRenderer/install.cc +++ b/src/HTMLRenderer/install.cc @@ -91,124 +91,8 @@ long long HTMLRenderer::install_font(GfxFont * font) } -std::string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id) -{ - // mupdf consulted - - Object ref_obj, font_obj, font_obj2, fontdesc_obj; - Object obj, obj1, obj2; - Dict * dict = nullptr; - - std::string suffix, subtype; - - char buf[1024]; - int len; - - ofstream outf; - - auto * id = font->getID(); - ref_obj.initRef(id->num, id->gen); - ref_obj.fetch(xref, &font_obj); - ref_obj.free(); - - if(!font_obj.isDict()) - { - std::cerr << "Font object is not a dictionary" << std::endl; - goto err; - } - - dict = font_obj.getDict(); - if(dict->lookup("DescendantFonts", &font_obj2)->isArray()) - { - if(font_obj2.arrayGetLength() == 0) - { - std::cerr << "Warning: empty DescendantFonts array" << std::endl; - } - else - { - if(font_obj2.arrayGetLength() > 1) - std::cerr << "TODO: multiple entries in DescendantFonts array" << std::endl; - - if(font_obj2.arrayGet(0, &obj2)->isDict()) - { - dict = obj2.getDict(); - } - } - } - - if(!dict->lookup("FontDescriptor", &fontdesc_obj)->isDict()) - { - std::cerr << "Cannot find FontDescriptor " << std::endl; - goto err; - } - - dict = fontdesc_obj.getDict(); - - if(dict->lookup("FontFile3", &obj)->isStream()) - { - if(obj.streamGetDict()->lookup("Subtype", &obj1)->isName()) - { - subtype = obj1.getName(); - if(subtype == "Type1C") - { - suffix = ".cff"; - } - else if (subtype == "CIDFontType0C") - { - suffix = ".cid"; - } - else - { - std::cerr << "Unknown subtype: " << subtype << std::endl; - goto err; - } - } - else - { - std::cerr << "Invalid subtype in font descriptor" << std::endl; - goto err; - } - } - else if (dict->lookup("FontFile2", &obj)->isStream()) - { - suffix = ".ttf"; - } - else if (dict->lookup("FontFile", &obj)->isStream()) - { - suffix = ".ttf"; - } - else - { - std::cerr << "Cannot find FontFile for dump" << std::endl; - goto err; - } - - if(suffix == "") - { - std::cerr << "Font type unrecognized" << std::endl; - goto err; - } - - obj.streamReset(); - outf.open((boost::format("%1%/f%|2$x|%3%")%TMP_DIR%fn_id%suffix).str().c_str(), ofstream::binary); - while((len = obj.streamGetChars(1024, (Guchar*)buf)) > 0) - { - outf.write(buf, len); - } - outf.close(); - obj.streamClose(); - -err: - obj2.free(); - obj1.free(); - obj.free(); - - fontdesc_obj.free(); - font_obj2.free(); - font_obj.free(); - return suffix; -} - +// TODO +// add a new function and move to text.cc void HTMLRenderer::install_embedded_font(GfxFont * font, const std::string & suffix, long long fn_id) { // TODO Should use standard way to handle CID fonts diff --git a/src/HTMLRenderer/text.cc b/src/HTMLRenderer/text.cc new file mode 100644 index 0000000..38a36a3 --- /dev/null +++ b/src/HTMLRenderer/text.cc @@ -0,0 +1,298 @@ +/* + * text.ccc + * + * Handling text and relative stuffs + * + * by WangLu + * 2012.08.14 + */ + +#include + +#include + +#include "HTMLRenderer.h" + +std::string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id) +{ + // mupdf consulted + + Object ref_obj, font_obj, font_obj2, fontdesc_obj; + Object obj, obj1, obj2; + Dict * dict = nullptr; + + std::string suffix, subtype; + + char buf[1024]; + int len; + + ofstream outf; + + auto * id = font->getID(); + ref_obj.initRef(id->num, id->gen); + ref_obj.fetch(xref, &font_obj); + ref_obj.free(); + + if(!font_obj.isDict()) + { + std::cerr << "Font object is not a dictionary" << std::endl; + goto err; + } + + dict = font_obj.getDict(); + if(dict->lookup("DescendantFonts", &font_obj2)->isArray()) + { + if(font_obj2.arrayGetLength() == 0) + { + std::cerr << "Warning: empty DescendantFonts array" << std::endl; + } + else + { + if(font_obj2.arrayGetLength() > 1) + std::cerr << "TODO: multiple entries in DescendantFonts array" << std::endl; + + if(font_obj2.arrayGet(0, &obj2)->isDict()) + { + dict = obj2.getDict(); + } + } + } + + if(!dict->lookup("FontDescriptor", &fontdesc_obj)->isDict()) + { + std::cerr << "Cannot find FontDescriptor " << std::endl; + goto err; + } + + dict = fontdesc_obj.getDict(); + + if(dict->lookup("FontFile3", &obj)->isStream()) + { + if(obj.streamGetDict()->lookup("Subtype", &obj1)->isName()) + { + subtype = obj1.getName(); + if(subtype == "Type1C") + { + suffix = ".cff"; + } + else if (subtype == "CIDFontType0C") + { + suffix = ".cid"; + } + else + { + std::cerr << "Unknown subtype: " << subtype << std::endl; + goto err; + } + } + else + { + std::cerr << "Invalid subtype in font descriptor" << std::endl; + goto err; + } + } + else if (dict->lookup("FontFile2", &obj)->isStream()) + { + suffix = ".ttf"; + } + else if (dict->lookup("FontFile", &obj)->isStream()) + { + suffix = ".ttf"; + } + else + { + std::cerr << "Cannot find FontFile for dump" << std::endl; + goto err; + } + + if(suffix == "") + { + std::cerr << "Font type unrecognized" << std::endl; + goto err; + } + + obj.streamReset(); + outf.open((boost::format("%1%/f%|2$x|%3%")%TMP_DIR%fn_id%suffix).str().c_str(), ofstream::binary); + while((len = obj.streamGetChars(1024, (Guchar*)buf)) > 0) + { + outf.write(buf, len); + } + outf.close(); + obj.streamClose(); + +err: + obj2.free(); + obj1.free(); + obj.free(); + + fontdesc_obj.free(); + font_obj2.free(); + font_obj.free(); + return suffix; +} + +void HTMLRenderer::drawString(GfxState * state, GooString * s) +{ + if(s->getLength() == 0) + return; + + auto font = state->getFont(); + if((font == nullptr) || (font->getWMode())) + { + return; + } + + //hidden + if((state->getRender() & 3) == 3) + { + return; + } + + // see if the line has to be closed due to state change + check_state_change(state); + + // if the line is still open, try to merge with it + if(line_opened) + { + double target = (cur_tx - draw_tx) * draw_scale; + if(target > -param->h_eps) + { + if(target > param->h_eps) + { + double w; + auto wid = install_whitespace(target, w); + html_fout << boost::format(" ") % wid; + draw_tx += w / draw_scale; + } + } + else + { + // or can we shift left using simple tags? + close_cur_line(); + } + } + + if(!line_opened) + { + // have to open a new line + + // classes + html_fout << "
transform(state->getCurX(), state->getCurY(), &x, &y); + // TODO: recheck descent/ascent + html_fout << "\" style=\"" + << "bottom:" << (y + state->getFont()->getDescent() * draw_font_size) << "px;" + << "top:" << (pageHeight - y - state->getFont()->getAscent() * draw_font_size) << "px;" + << "left:" << x << "px;" + ; + } + + // TODO: tracking + // letter & word spacing + if(_is_positive(state->getCharSpace())) + html_fout << "letter-spacing:" << state->getCharSpace() << "px;"; + if(_is_positive(state->getWordSpace())) + html_fout << "word-spacing:" << state->getWordSpace() << "px;"; + + //debug + //real pos & hori_scale + if(0) + { +#if 0 + html_fout << "\""; + double x,y; + state->transform(state->getCurX(), state->getCurY(), &x, &y); + html_fout << boost::format("data-lx=\"%5%\" data-ly=\"%6%\" data-drawscale=\"%4%\" data-x=\"%1%\" data-y=\"%2%\" data-hs=\"%3%") + %x%y%(state->getHorizScaling())%draw_scale%state->getLineX()%state->getLineY(); +#endif + } + + html_fout << "\">"; + + line_opened = true; + + draw_tx = cur_tx; + } + + + // Now ready to output + // get the unicodes + char *p = s->getCString(); + int len = s->getLength(); + + double dx = 0; + double dy = 0; + double dx1,dy1; + double ox, oy; + + int nChars = 0; + int nSpaces = 0; + int uLen; + + CharCode code; + Unicode *u = nullptr; + + while (len > 0) { + auto n = font->getNextChar(p, len, &code, &u, &uLen, &dx1, &dy1, &ox, &oy); + + if(!(_equal(ox, 0) && _equal(oy, 0))) + { + std::cerr << "TODO: non-zero origins" << std::endl; + } + + if(uLen == 0) + { + // TODO +#if 0 + CharCode c = 0; + for(int i = 0; i < n; ++i) + { + c = (c<<8) | (code&0xff); + code >>= 8; + } + for(int i = 0; i < n; ++i) + { + Unicode u = (c&0xff); + c >>= 8; + outputUnicodes(html_fout, &u, 1); + } +#endif + } + else + { + outputUnicodes(html_fout, u, uLen); + } + + dx += dx1; + dy += dy1; + + if (n == 1 && *p == ' ') + { + ++nSpaces; + } + + ++nChars; + p += n; + len -= n; + } + + dx = (dx * state->getFontSize() + + nChars * state->getCharSpace() + + nSpaces * state->getWordSpace()) * state->getHorizScaling(); + + dy *= state->getFontSize(); + + cur_tx += dx; + cur_ty += dy; + + draw_tx += dx; + draw_ty += dy; +}