From 2041c2d146d2bcfba372d521da568823bfb6a107 Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Wed, 3 Oct 2012 02:19:40 +0800 Subject: [PATCH] working on linear gradient --- CMakeLists.txt | 14 +- src/CairoBackgroundRenderer.cc | 4 + src/HTMLRenderer/LineBuffer.cc | 4 +- src/HTMLRenderer/draw.cc | 202 ++++++++- src/HTMLRenderer/general.cc | 2 +- src/HTMLRenderer/install.cc | 6 +- src/HTMLRenderer/link.cc | 8 +- src/HTMLRenderer/state.cc | 20 +- src/HTMLRenderer/text.cc | 2 +- src/HTMLRenderer/text.cc.orig | 567 -------------------------- src/Preprocessor.cc | 4 +- src/include/CairoBackgroundRenderer.h | 25 +- src/include/HTMLRenderer.h | 16 +- src/include/util.h | 19 +- src/pdf2htmlEX.cc | 4 +- 15 files changed, 261 insertions(+), 636 deletions(-) delete mode 100644 src/HTMLRenderer/text.cc.orig diff --git a/CMakeLists.txt b/CMakeLists.txt index 324591d..18dd525 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,14 +20,20 @@ include_directories(${POPPLER_INCLUDE_DIRS}) link_directories(${POPPLER_LIBRARY_DIRS}) set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${POPPLER_LIBRARIES}) -find_path(CAIRO_OUTPUTDEV_INCLUDE_PATH CairoOutputDev.h PATHS - ${POPPLER_INCLUDE_DIRS} NO_DEFAULT_PATH) -if(CAIRO_OUTPUTDEV_INCLUDE_PATH) - message("Found CairoOutputDev.h: ${POPPLER_INCLUDE_DIRS}/CairoOutputDev.h") +# disable CAIRO for now +if(0) +pkg_check_modules(POPPLER_CAIRO poppler-cairo>=0.20.0) +if(POPPLER_CAIRO_FOUND) set(HAVE_CAIRO 1) + include_directories(${POPPLER_CAIRO_INCLUDE_DIRS}) + link_directories(${POPPLER_CAIRO_LIBRARY_DIRS}) + set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${POPPLER_CAIRO_LIBRARIES}) else() set(HAVE_CAIRO 0) endif() +else() +set(HAVE_CAIRO 0) +endif() # fontforge starts using pkg-config 'correctly' since 2.0.0 diff --git a/src/CairoBackgroundRenderer.cc b/src/CairoBackgroundRenderer.cc index 9005ba9..3e51741 100644 --- a/src/CairoBackgroundRenderer.cc +++ b/src/CairoBackgroundRenderer.cc @@ -20,6 +20,10 @@ void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y, // CairoOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen); } +void CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno, const std::string & filename) +{ +} + } // namespace pdf2htmlEX #endif // HAVE_CAIRO diff --git a/src/HTMLRenderer/LineBuffer.cc b/src/HTMLRenderer/LineBuffer.cc index 32e557a..acbb944 100644 --- a/src/HTMLRenderer/LineBuffer.cc +++ b/src/HTMLRenderer/LineBuffer.cc @@ -75,7 +75,7 @@ void HTMLRenderer::LineBuffer::flush(void) for(auto iter = states.begin(); iter != states.end(); ++iter) { const auto & s = *iter; - max_ascent = max(max_ascent, s.ascent * s.draw_font_size); + max_ascent = max(max_ascent, s.ascent * s.draw_font_size); } ostream & out = renderer->html_fout; @@ -155,7 +155,7 @@ void HTMLRenderer::LineBuffer::flush(void) ++ cur_offset_iter; } - size_t next_text_idx = min(cur_state_iter->start_idx, cur_offset_iter->start_idx); + size_t next_text_idx = min(cur_state_iter->start_idx, cur_offset_iter->start_idx); outputUnicodes(out, (&text.front()) + cur_text_idx, next_text_idx - cur_text_idx); cur_text_idx = next_text_idx; diff --git a/src/HTMLRenderer/draw.cc b/src/HTMLRenderer/draw.cc index 9e3d611..1ee161f 100644 --- a/src/HTMLRenderer/draw.cc +++ b/src/HTMLRenderer/draw.cc @@ -7,6 +7,12 @@ * 2012.10.01 */ +#include +#include +#include +#include +#include + #include "HTMLRenderer.h" #include "util.h" #include "namespace.h" @@ -14,6 +20,14 @@ namespace pdf2htmlEX { using std::swap; +using std::min; +using std::max; +using std::acos; +using std::asin; +using std::ostringstream; +using std::sqrt; +using std::vector; +using std::ostream; static bool is_horizontal_line(GfxSubpath * path) { @@ -44,6 +58,157 @@ static bool is_rectangle(GfxSubpath * path) && _equal(path->getY(3), path->getY(0))); } +static void get_shading_bbox(GfxState * state, GfxShading * shading, + double & x1, double & y1, double & x2, double & y2) +{ + // from SplashOutputDev.cc in poppler + if(shading->getHasBBox()) + { + shading->getBBox(&x1, &y1, &x2, &y2); + } + else + { + state->getClipBBox(&x1, &y1, &x2, &y2); + Matrix ctm, ictm; + state->getCTM(&ctm); + ctm.invertTo(&ictm); + + double x[4], y[4]; + ictm.transform(x1, y1, &x[0], &y[0]); + ictm.transform(x2, y1, &x[1], &y[1]); + ictm.transform(x1, y2, &x[2], &y[2]); + ictm.transform(x2, y2, &x[3], &y[3]); + + x1 = x2 = x[0]; + y1 = y2 = y[0]; + + for(int i = 1; i < 4; ++i) + { + x1 = min(x1, x[i]); + y1 = min(y1, y[i]); + x2 = max(x2, x[i]); + y2 = max(y2, y[i]); + } + } +} + +static double get_degree(double dx, double dy) +{ + static const double PI = acos(-1.0); + double r = hypot(dx, dy); + + double ang = acos(dx / r); + if(!_is_positive(dy)) + ang = 2 * PI - ang; + + return ang * 180.0 / PI; +} + +class LinearGradient +{ +public: + LinearGradient(GfxAxialShading * shading, + double x1, double y1, double x2, double y2); + + void dumpto (ostream & out); + + static void style_function (void * p, ostream & out) + { + static_cast(p)->dumpto(out); + } + + // TODO, add alpha + class ColorStop + { + public: + GfxRGB rgb; + double pos; // [0,1] + }; + + vector stops; + double degree; +}; + +LinearGradient::LinearGradient (GfxAxialShading * shading, + double x1, double y1, double x2, double y2) +{ + // coordinate for t = 0 and t = 1 + double t0x, t0y, t1x, t1y; + shading->getCoords(&t0x, &t0y, &t1x, &t1y); + + degree = get_degree(t1x - t0x, t1y - t0y); + + // get the range of t in the box + // from GfxState.cc in poppler + double box_tmin, box_tmax; + { + double idx = t1x - t0x; + double idy = t1y - t0y; + double inv_len = 1.0 / (idx * idx + idy * idy); + idx *= inv_len; + idy *= inv_len; + + // t of (x1,y1) + box_tmin = box_tmax = (x1 - t0x) * idx + (y1 - t0y) * idy; + double tdx = (x2 - x1) * idx; + if(tdx < 0) + box_tmin += tdx; + else + box_tmax += tdx; + + double tdy = (y2 - y1) * idy; + if(tdy < 0) + box_tmin += tdy; + else + box_tmax += tdy; + } + + // get the domain of t in the box + double domain_tmin = max(box_tmin, shading->getDomain0()); + double domain_tmax = min(box_tmax, shading->getDomain1()); + + // TODO: better sampling + // TODO: check background color + { + stops.clear(); + double tstep = (domain_tmax - domain_tmin) / 13.0; + for(double t = domain_tmin; t <= domain_tmax; t += tstep) + { + GfxColor color; + shading->getColor(t, &color); + + ColorStop stop; + shading->getColorSpace()->getRGB(&color, &stop.rgb); + stop.pos = (t - box_tmin) / (box_tmax - box_tmin); + + stops.push_back(stop); + } + } +} + +void LinearGradient::dumpto (ostream & out) +{ + out << "background-color:red;"; +} + +GBool HTMLRenderer::axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax) +{ + if(!(param->css_draw)) return gFalse; + + double x1, y1, x2, y2; + get_shading_bbox(state, shading, x1, y1, x2, y2); + + LinearGradient lg(shading, x1, y1, x2, y2); + + // TODO: check background color + css_draw_rectangle(x1, y1, x2-x1, y2-y1, state->getCTM(), + nullptr, 0, + nullptr, nullptr, + LinearGradient::style_function, &lg); + + return gTrue; +} + //TODO track state //TODO connection style void HTMLRenderer::css_draw(GfxState *state, bool fill) @@ -67,9 +232,9 @@ void HTMLRenderer::css_draw(GfxState *state, bool fill) double lw = state->getLineWidth(); - css_draw_rectangle(x1, y - lw/2, x2-x1, lw, + css_draw_rectangle(x1, y - lw/2, x2-x1, lw, state->getCTM(), nullptr, 0, - nullptr, &stroke_color, state); + nullptr, &stroke_color); } else if (is_rectangle(subpath)) { @@ -108,34 +273,36 @@ void HTMLRenderer::css_draw(GfxState *state, bool fill) w += lw[1]; } - css_draw_rectangle(x, y, w, h, + css_draw_rectangle(x, y, w, h, state->getCTM(), lw, lw_count, - ps, pf, state); + ps, pf); } } } -void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, +void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, const double * tm, double * line_width_array, int line_width_count, - const GfxRGB * line_color, const GfxRGB * fill_color, - GfxState * state) + const GfxRGB * line_color, const GfxRGB * fill_color, + void (*style_function)(void *, ostream &), void * style_function_data) { close_text_line(); - double ctm[6]; - memcpy(ctm, state->getCTM(), sizeof(ctm)); + double new_tm[6]; + memcpy(new_tm, tm, sizeof(new_tm)); - _transform(ctm, x, y); + _transform(new_tm, x, y); double scale = 1.0; { - double i1 = ctm[0] + ctm[2]; - double i2 = ctm[1] + ctm[3]; - scale = sqrt((i1 * i1 + i2 * i2) / 2.0); + static const double sqrt2 = sqrt(2.0); + + double i1 = (new_tm[0] + new_tm[2]) / sqrt2; + double i2 = (new_tm[1] + new_tm[3]) / sqrt2; + scale = hypot(i1, i2); if(_is_positive(scale)) { for(int i = 0; i < 4; ++i) - ctm[i] /= scale; + new_tm[i] /= scale; } else { @@ -143,7 +310,7 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, } } - html_fout << "
font_size_multiplier); + text_scale_factor1 = max(zoom, param->font_size_multiplier); text_scale_factor2 = zoom / text_scale_factor1; } diff --git a/src/HTMLRenderer/install.cc b/src/HTMLRenderer/install.cc index e2bd654..b741e26 100644 --- a/src/HTMLRenderer/install.cc +++ b/src/HTMLRenderer/install.cc @@ -215,9 +215,11 @@ long long HTMLRenderer::install_font_size(double font_size) long long HTMLRenderer::install_transform_matrix(const double * tm) { - TM m(tm); + Matrix m; + memcpy(m.m, tm, sizeof(m.m)); + auto iter = transform_matrix_map.lower_bound(m); - if((iter != transform_matrix_map.end()) && (m == (iter->first))) + if((iter != transform_matrix_map.end()) && (_tm_equal(m.m, iter->first.m, 4))) return iter->second; long long new_tm_id = transform_matrix_map.size(); diff --git a/src/HTMLRenderer/link.cc b/src/HTMLRenderer/link.cc index a38fc65..aa579c4 100644 --- a/src/HTMLRenderer/link.cc +++ b/src/HTMLRenderer/link.cc @@ -191,10 +191,10 @@ void HTMLRenderer::processLink(AnnotLink * al) double x,y,w,h; double x1, y1, x2, y2; al->getRect(&x1, &y1, &x2, &y2); - x = min(x1, x2); - y = min(y1, y2); - w = max(x1, x2) - x; - h = max(y1, y2) - y; + x = min(x1, x2); + y = min(y1, y2); + w = max(x1, x2) - x; + h = max(y1, y2) - y; double border_width = 0; double border_top_bottom_width = 0; diff --git a/src/HTMLRenderer/state.cc b/src/HTMLRenderer/state.cc index 862d7d8..fc94015 100644 --- a/src/HTMLRenderer/state.cc +++ b/src/HTMLRenderer/state.cc @@ -12,6 +12,7 @@ * optimize lines using nested (reuse classes) */ +#include #include #include "HTMLRenderer.h" @@ -22,6 +23,7 @@ namespace pdf2htmlEX { using std::max; using std::abs; +using std::hypot; void HTMLRenderer::updateAll(GfxState * state) { @@ -98,7 +100,7 @@ void HTMLRenderer::check_state_change(GfxState * state) if(!(new_font_info->id == cur_font_info->id)) { - new_line_state = max(new_line_state, NLS_SPAN); + new_line_state = max(new_line_state, NLS_SPAN); cur_font_info = new_font_info; } @@ -146,7 +148,7 @@ void HTMLRenderer::check_state_change(GfxState * state) double new_draw_text_tm[6]; memcpy(new_draw_text_tm, cur_text_tm, sizeof(new_draw_text_tm)); - double new_draw_text_scale = 1.0/text_scale_factor2 * sqrt(new_draw_text_tm[2] * new_draw_text_tm[2] + new_draw_text_tm[3] * new_draw_text_tm[3]); + double new_draw_text_scale = 1.0/text_scale_factor2 * hypot(new_draw_text_tm[2], new_draw_text_tm[3]); double new_draw_font_size = cur_font_size; if(_is_positive(new_draw_text_scale)) @@ -168,13 +170,13 @@ void HTMLRenderer::check_state_change(GfxState * state) if(!(_equal(new_draw_font_size, draw_font_size))) { - new_line_state = max(new_line_state, NLS_SPAN); + new_line_state = max(new_line_state, NLS_SPAN); draw_font_size = new_draw_font_size; cur_fs_id = install_font_size(draw_font_size); } if(!(_tm_equal(new_draw_text_tm, draw_text_tm, 4))) { - new_line_state = max(new_line_state, NLS_DIV); + new_line_state = max(new_line_state, NLS_DIV); memcpy(draw_text_tm, new_draw_text_tm, sizeof(draw_text_tm)); cur_ttm_id = install_transform_matrix(draw_text_tm); } @@ -236,7 +238,7 @@ void HTMLRenderer::check_state_change(GfxState * state) if(!merged) { - new_line_state = max(new_line_state, NLS_DIV); + new_line_state = max(new_line_state, NLS_DIV); } } @@ -247,7 +249,7 @@ void HTMLRenderer::check_state_change(GfxState * state) double new_letter_space = state->getCharSpace(); if(!_equal(cur_letter_space, new_letter_space)) { - new_line_state = max(new_line_state, NLS_SPAN); + new_line_state = max(new_line_state, NLS_SPAN); cur_letter_space = new_letter_space; cur_ls_id = install_letter_space(cur_letter_space * draw_text_scale); } @@ -260,7 +262,7 @@ void HTMLRenderer::check_state_change(GfxState * state) double new_word_space = state->getWordSpace(); if(!_equal(cur_word_space, new_word_space)) { - new_line_state = max(new_line_state, NLS_SPAN); + new_line_state = max(new_line_state, NLS_SPAN); cur_word_space = new_word_space; cur_ws_id = install_word_space(cur_word_space * draw_text_scale); } @@ -273,7 +275,7 @@ void HTMLRenderer::check_state_change(GfxState * state) state->getFillRGB(&new_color); if(!((new_color.r == cur_color.r) && (new_color.g == cur_color.g) && (new_color.b == cur_color.b))) { - new_line_state = max(new_line_state, NLS_SPAN); + new_line_state = max(new_line_state, NLS_SPAN); cur_color = new_color; cur_color_id = install_color(&new_color); } @@ -286,7 +288,7 @@ void HTMLRenderer::check_state_change(GfxState * state) double new_rise = state->getRise(); if(!_equal(cur_rise, new_rise)) { - new_line_state = max(new_line_state, NLS_SPAN); + new_line_state = max(new_line_state, NLS_SPAN); cur_rise = new_rise; cur_rise_id = install_rise(new_rise * draw_text_scale); } diff --git a/src/HTMLRenderer/text.cc b/src/HTMLRenderer/text.cc index 1a6e323..1486d23 100644 --- a/src/HTMLRenderer/text.cc +++ b/src/HTMLRenderer/text.cc @@ -310,7 +310,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo memset(width_list, -1, 0x10000 * sizeof(*width_list)); if(code2GID) - maxcode = min(maxcode, code2GID_len - 1); + maxcode = min(maxcode, code2GID_len - 1); bool is_truetype = is_truetype_suffix(suffix); int max_key = maxcode; diff --git a/src/HTMLRenderer/text.cc.orig b/src/HTMLRenderer/text.cc.orig deleted file mode 100644 index 1a4b44f..0000000 --- a/src/HTMLRenderer/text.cc.orig +++ /dev/null @@ -1,567 +0,0 @@ -/* - * text.cc - * - * Handling text & font, and relative stuffs - * - * by WangLu - * 2012.08.14 - */ - -#include -#include -#include -#include -#include - -#include -#include - -#include "ffw.h" -#include "HTMLRenderer.h" -#include "namespace.h" - -namespace pdf2htmlEX { - -using std::unordered_set; -using std::min; -using std::all_of; -using std::floor; -using std::swap; - -string HTMLRenderer::dump_embedded_font (GfxFont * font, long long fn_id) -{ - Object obj, obj1, obj2; - Object font_obj, font_obj2, fontdesc_obj; - string suffix; - string filepath; - - try - { - // mupdf consulted - string subtype; - - auto * id = font->getID(); - - Object ref_obj; - ref_obj.initRef(id->num, id->gen); - ref_obj.fetch(xref, &font_obj); - ref_obj.free(); - - if(!font_obj.isDict()) - { - cerr << "Font object is not a dictionary" << endl; - throw 0; - } - - Dict * dict = font_obj.getDict(); - if(dict->lookup("DescendantFonts", &font_obj2)->isArray()) - { - if(font_obj2.arrayGetLength() == 0) - { - cerr << "Warning: empty DescendantFonts array" << endl; - } - else - { - if(font_obj2.arrayGetLength() > 1) - cerr << "TODO: multiple entries in DescendantFonts array" << endl; - - if(font_obj2.arrayGet(0, &obj2)->isDict()) - { - dict = obj2.getDict(); - } - } - } - - if(!dict->lookup("FontDescriptor", &fontdesc_obj)->isDict()) - { - cerr << "Cannot find FontDescriptor " << endl; - throw 0; - } - - dict = fontdesc_obj.getDict(); - - if(dict->lookup("FontFile3", &obj)->isStream()) - { - if(obj.streamGetDict()->lookup("Subtype", &obj1)->isName()) - { - subtype = obj1.getName(); - if(subtype == "Type1C") - { - suffix = ".cff"; - } - else if (subtype == "CIDFontType0C") - { - suffix = ".cid"; - } - else - { - cerr << "Unknown subtype: " << subtype << endl; - throw 0; - } - } - else - { - cerr << "Invalid subtype in font descriptor" << endl; - throw 0; - } - } - else if (dict->lookup("FontFile2", &obj)->isStream()) - { - suffix = ".ttf"; - } - else if (dict->lookup("FontFile", &obj)->isStream()) - { - suffix = ".pfa"; - } - else - { - cerr << "Cannot find FontFile for dump" << endl; - throw 0; - } - - if(suffix == "") - { - cerr << "Font type unrecognized" << endl; - throw 0; - } - - obj.streamReset(); - - filepath = (char*)str_fmt("%s/f%llx%s", param->tmp_dir.c_str(), fn_id, suffix.c_str()); - add_tmp_file(filepath); - - ofstream outf(filepath, ofstream::binary); - if(!outf) - throw string("Cannot open file ") + filepath + " for writing"; - - char buf[1024]; - int len; - while((len = obj.streamGetChars(1024, (Guchar*)buf)) > 0) - { - outf.write(buf, len); - } - outf.close(); - obj.streamClose(); - } - catch(int) - { - cerr << "Someting wrong when trying to dump font " << hex << fn_id << dec << endl; - } - - obj2.free(); - obj1.free(); - obj.free(); - - fontdesc_obj.free(); - font_obj2.free(); - font_obj.free(); - - return filepath; -} - -void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo & info, bool get_metric_only) -{ - if(param->debug) - { - cerr << "Embed font: " << filepath << " " << info.id << endl; - } - - ffw_load_font(filepath.c_str()); - int * code2GID = nullptr; - int code2GID_len = 0; - int maxcode = 0; - - Gfx8BitFont * font_8bit = nullptr; - GfxCIDFont * font_cid = nullptr; - - string suffix = get_suffix(filepath); - for(auto iter = suffix.begin(); iter != suffix.end(); ++iter) - *iter = tolower(*iter); - - /* - * if parm->tounicode is 0, try the provided tounicode map first - */ - info.use_tounicode = (is_truetype_suffix(suffix) || (param->tounicode >= 0)); - info.has_space = false; - - const char * used_map = nullptr; - - info.em_size = ffw_get_em_size(); - - if(get_metric_only) - return; - - used_map = preprocessor.get_code_map(hash_ref(font->getID())); - - /* - * Step 1 - * dump the font file directly from the font descriptor and put the glyphs into the correct slots - * - * for 8bit + nonTrueType - * re-encoding the font using a PostScript encoding list (glyph id <-> glpyh name) - * - * for 8bit + TrueType - * sort the glpyhs as the original order, and later will map GID (instead of char code) to Unicode - * - * for CID + nonTrueType - * Flatten the font - * - * for CID Truetype - * same as 8bitTrueType, except for that we have to check 65536 charcodes - */ - if(!font->isCIDFont()) - { - font_8bit = dynamic_cast(font); - maxcode = 0xff; - if(is_truetype_suffix(suffix)) - { - ffw_reencode_glyph_order(); - FoFiTrueType *fftt = nullptr; - if((fftt = FoFiTrueType::load((char*)filepath.c_str())) != nullptr) - { - code2GID = font_8bit->getCodeToGIDMap(fftt); - code2GID_len = 256; - delete fftt; - } - } - else - { - // move the slot such that it's consistent with the encoding seen in PDF - unordered_set nameset; - bool name_conflict_warned = false; - - memset(cur_mapping2, 0, 0x100 * sizeof(char*)); - - for(int i = 0; i < 256; ++i) - { - if(!used_map[i]) continue; - - auto cn = font_8bit->getCharName(i); - if(cn == nullptr) - { - continue; - } - else - { - if(nameset.insert(string(cn)).second) - { - cur_mapping2[i] = cn; - } - else - { - if(!name_conflict_warned) - { - name_conflict_warned = true; - //TODO: may be resolved using advanced font properties? - cerr << "Warning: encoding confliction detected in font: " << hex << info.id << dec << endl; - } - } - } - } - - ffw_reencode_raw2(cur_mapping2, 256, 0); - } - } - else - { - font_cid = dynamic_cast(font); - maxcode = 0xffff; - - if(is_truetype_suffix(suffix)) - { - ffw_reencode_glyph_order(); - - GfxCIDFont * _font = dynamic_cast(font); - - // code2GID has been stored for embedded CID fonts - code2GID = _font->getCIDToGID(); - code2GID_len = _font->getCIDToGIDLen(); - } - else - { - ffw_cidflatten(); - } - } - - /* - * Step 2 - * map charcode (or GID for CID truetype) - * generate an Consortium encoding file and let fontforge handle it. - * - * - Always map to Unicode for 8bit TrueType fonts and CID fonts - * - * - For 8bit nonTruetype fonts: - * Try to calculate the correct Unicode value from the glyph names, unless param->always_apply_tounicode is set - * - * - * Also fill in the width_list, and set widths accordingly - */ - - - { - unordered_set codeset; - bool name_conflict_warned = false; - - auto ctu = font->getToUnicode(); - memset(cur_mapping, -1, 0x10000 * sizeof(*cur_mapping)); - memset(width_list, -1, 0x10000 * sizeof(*width_list)); - - if(code2GID) - maxcode = min(maxcode, code2GID_len - 1); - - bool is_truetype = is_truetype_suffix(suffix); - int max_key = maxcode; - /* - * Traverse all possible codes - */ - bool retried = false; // avoid infinite loop - for(int i = 0; i <= maxcode; ++i) - { - if(!used_map[i]) - continue; - - /* - * Skip glyphs without names (only for non-ttf fonts) - */ - if(!is_truetype && (font_8bit != nullptr) - && (font_8bit->getCharName(i) == nullptr)) - { - continue; - } - - int k = i; - if(code2GID) - { - if((k = code2GID[i]) == 0) continue; - } - - if(k > max_key) - max_key = k; - - Unicode u, *pu=&u; - if(info.use_tounicode) - { - int n = ctu ? (ctu->mapToUnicode(i, &pu)) : 0; - u = check_unicode(pu, n, i, font); - } - else - { - u = unicode_from_font(i, font); - } - - if(u == ' ') - info.has_space = true; - - if(codeset.insert(u).second) - { - cur_mapping[k] = u; - } - else - { - // collision detected - if(param->tounicode == 0) - { - // in auto mode, just drop the tounicode map - if(!retried) - { - cerr << "ToUnicode CMap is not valid and got dropped" << endl; - retried = true; - codeset.clear(); - info.use_tounicode = false; - memset(cur_mapping, -1, 0x10000 * sizeof(*cur_mapping)); - memset(width_list, -1, 0x10000 * sizeof(*width_list)); - i = -1; - continue; - } - } - if(!name_conflict_warned) - { - name_conflict_warned = true; - //TODO: may be resolved using advanced font properties? - cerr << "Warning: encoding confliction detected in font: " << hex << info.id << dec << endl; - } - } - - if(font_8bit) - { - width_list[k] = (int)floor(font_8bit->getWidth(i) * info.em_size + 0.5); - } - else - { - char buf[2]; - buf[0] = (i >> 8) & 0xff; - buf[1] = (i & 0xff); - width_list[k] = (int)floor(font_cid->getWidth(buf, 2) * info.em_size + 0.5); - } - } - - ffw_reencode_raw(cur_mapping, max_key + 1, 1); - ffw_set_widths(width_list, max_key + 1); - - if(ctu) - ctu->decRefCnt(); - } - - /* - * Step 3 - * - * Generate the font as desired - * - */ - string cur_tmp_fn = (char*)str_fmt("%s/__tmp_font1%s", param->tmp_dir.c_str(), param->font_suffix.c_str()); - add_tmp_file(cur_tmp_fn); - string other_tmp_fn = (char*)str_fmt("%s/__tmp_font2%s", param->tmp_dir.c_str(), param->font_suffix.c_str()); - add_tmp_file(other_tmp_fn); - - ffw_save(cur_tmp_fn.c_str()); - ffw_close(); - - /* - * Step 4 - * Font Hinting - */ - bool hinted = false; - - // Call external hinting program if specified - if(param->external_hint_tool != "") - { - hinted = (system((char*)str_fmt("%s \"%s\" \"%s\"", param->external_hint_tool.c_str(), cur_tmp_fn.c_str(), other_tmp_fn.c_str())) == 0); - } - - // Call internal hinting procedure if specified - if((!hinted) && (param->auto_hint)) - { - ffw_load_font(cur_tmp_fn.c_str()); - ffw_auto_hint(); - ffw_save(other_tmp_fn.c_str()); - ffw_close(); - hinted = true; - } - - if(hinted) - { - swap(cur_tmp_fn, other_tmp_fn); - } - - /* - * Step 5 - * Generate the font - * Reload to retrieve/fix accurate ascent/descent - */ - string fn = (char*)str_fmt("%s/f%llx%s", - (param->single_html ? param->tmp_dir : param->dest_dir).c_str(), - info.id, param->font_suffix.c_str()); - - if(param->single_html) - add_tmp_file(fn); - - ffw_load_font(cur_tmp_fn.c_str()); - ffw_metric(&info.ascent, &info.descent); - ffw_save(fn.c_str()); - ffw_close(); -} - -void HTMLRenderer::drawString(GfxState * state, GooString * s) -{ - if(s->getLength() == 0) - return; - - auto font = state->getFont(); - if((font == nullptr) || (font->getWMode())) - { - return; - } - - //hidden - if((state->getRender() & 3) == 3) - { - return; - } - - // see if the line has to be closed due to state change - check_state_change(state); - prepare_line(state); - - // Now ready to output - // get the unicodes - char *p = s->getCString(); - int len = s->getLength(); - - double dx = 0; - double dy = 0; - double dxerr = 0; - double dx1,dy1; - double ox, oy; - - int nChars = 0; - int nSpaces = 0; - int uLen; - - CharCode code; - Unicode *u = nullptr; - - while (len > 0) { - auto n = font->getNextChar(p, len, &code, &u, &uLen, &dx1, &dy1, &ox, &oy); - - if(!(_equal(ox, 0) && _equal(oy, 0))) - { - cerr << "TODO: non-zero origins" << endl; - } - - bool is_space = false; - if (n == 1 && *p == ' ') - { - ++nSpaces; - is_space = true; - } - - if(is_space && (param->space_as_offset)) - { - // ignore horiz_scaling, as it's merged in CTM - line_buf.append_offset((dx1 * cur_font_size + cur_letter_space + cur_word_space) * draw_scale); - } - else - { - if((param->decompose_ligature) && (uLen > 1) && all_of(u, u+uLen, isLegalUnicode)) - { - line_buf.append_unicodes(u, uLen); - } - else - { - if(cur_font_info->use_tounicode) - { - Unicode uu = check_unicode(u, uLen, code, font); - line_buf.append_unicodes(&uu, 1); - } - else - { - Unicode uu = unicode_from_font(code, font); - line_buf.append_unicodes(&uu, 1); - } - } - } - - dx += dx1; - dy += dy1; - - ++nChars; - p += n; - len -= n; - } - - double hs = state->getHorizScaling(); - - // horiz_scaling is merged into ctm now, - // so the coordinate system is ugly - dx = (dx * cur_font_size + nChars * cur_letter_space + nSpaces * cur_word_space) * hs; - - dy *= cur_font_size; - - cur_tx += dx; - cur_ty += dy; - - draw_tx += dx + dxerr * cur_font_size * hs; - draw_ty += dy; -} - -} // namespace pdf2htmlEX diff --git a/src/Preprocessor.cc b/src/Preprocessor.cc index c64b04a..f96b602 100644 --- a/src/Preprocessor.cc +++ b/src/Preprocessor.cc @@ -82,8 +82,8 @@ void Preprocessor::drawChar(GfxState *state, double x, double y, void Preprocessor::startPage(int pageNum, GfxState *state) { - max_width = max(max_width, state->getPageWidth()); - max_height = max(max_height, state->getPageHeight()); + max_width = max(max_width, state->getPageWidth()); + max_height = max(max_height, state->getPageHeight()); } const char * Preprocessor::get_code_map (long long font_id) const diff --git a/src/include/CairoBackgroundRenderer.h b/src/include/CairoBackgroundRenderer.h index 5a2d2ab..c3c8e90 100644 --- a/src/include/CairoBackgroundRenderer.h +++ b/src/include/CairoBackgroundRenderer.h @@ -1,6 +1,6 @@ /* - * Splash Background renderer - * Render all those things not supported as Image, with Splash + * Cairo Background renderer + * Render all those things not supported as Image, with Cairo * * Copyright (C) 2012 Lu Wang */ @@ -9,27 +9,32 @@ #ifndef CAIRO_BACKGROUND_RENDERER_H__ #define CAIRO_BACKGROUND_RENDERER_H__ -#include -#include +#include + +#include "Param.h" namespace pdf2htmlEX { // Based on BackgroundRenderer from poppler -class SplashBackgroundRenderer : public SplashOutputDev +class CairoBackgroundRenderer : public CairoOutputDev { public: - static const SplashColor white; - - SplashBackgroundRenderer() - :SplashOutputDev(splashModeRGB8, 4, gFalse, (SplashColorPtr)&white, gTrue, gTrue) + CairoBackgroundRenderer(const Param * param) + :CairoOutputDev() + , param(param) { } - virtual ~SplashBackgroundRenderer() { } + virtual ~CairoBackgroundRenderer() { } virtual void drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, int nBytes, Unicode *u, int uLen); + + void render_page(PDFDoc * doc, int pageno, const std::string & filename); + +protected: + const Param * param; }; } diff --git a/src/include/HTMLRenderer.h b/src/include/HTMLRenderer.h index 9ebc872..ec7692b 100644 --- a/src/include/HTMLRenderer.h +++ b/src/include/HTMLRenderer.h @@ -79,6 +79,12 @@ class HTMLRenderer : public OutputDev // Does this device use drawChar() or drawString()? virtual GBool useDrawChar() { return gFalse; } + // Does this device use functionShadedFill(), axialShadedFill(), and + // radialShadedFill()? If this returns false, these shaded fills + // will be reduced to a series of other drawing operations. + virtual GBool useShadedFills(int type) { return type == 2; } + + // Does this device use beginType3Char/endType3Char? Otherwise, // text in Type 3 fonts will be drawn with drawChar/drawString. virtual GBool interpretType3Chars() { return gFalse; } @@ -125,6 +131,7 @@ class HTMLRenderer : public OutputDev virtual void stroke(GfxState *state) { css_draw(state, false); } virtual void fill(GfxState *state) { css_draw(state, true); } + virtual GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax); virtual void processLink(AnnotLink * al); @@ -208,11 +215,12 @@ class HTMLRenderer : public OutputDev * w,h should be the metrics WITHOUT border * * line_color & fill_color may be specified as nullptr to indicate none + * style_function & style_function_data may be provided to provide more styles */ - void css_draw_rectangle(double x, double y, double w, double h, + void css_draw_rectangle(double x, double y, double w, double h, const double * tm, double * line_width_array, int line_width_count, - const GfxRGB * line_color, const GfxRGB * fill_color, - GfxState * state); + const GfxRGB * line_color, const GfxRGB * fill_color, + void (*style_function)(void *, std::ostream &) = nullptr, void * style_function_data = nullptr ); //////////////////////////////////////////////////// @@ -396,7 +404,7 @@ class HTMLRenderer : public OutputDev std::unordered_map font_name_map; std::map font_size_map; - std::map transform_matrix_map; + std::map transform_matrix_map; std::map letter_space_map; std::map word_space_map; std::unordered_map color_map; diff --git a/src/include/util.h b/src/include/util.h index 746077b..1ea84fe 100644 --- a/src/include/util.h +++ b/src/include/util.h @@ -104,28 +104,21 @@ public: bool has_space; // whether space is included in the font }; -// wrapper of the transform matrix double[6] -// Transform Matrix -class TM +class Matrix_less { public: - TM() {} - TM(const double * m) {memcpy(_, m, sizeof(_));} - bool operator < (const TM & m) const { + bool operator () (const Matrix & m1, const Matrix & m2) const + { // Note that we only care about the first 4 elements for(int i = 0; i < 4; ++i) { - if(_[i] < m._[i] - EPS) + if(m1.m[i] < m2.m[i] - EPS) return true; - if(_[i] > m._[i] + EPS) + if(m1.m[i] > m2.m[i] + EPS) return false; } return false; } - bool operator == (const TM & m) const { - return _tm_equal(_, m._, 4); - } - double _[6]; }; class base64stream @@ -203,7 +196,7 @@ public: va_end(vlist); if(l >= (int)buf.capacity()) { - buf.reserve(std::max((long)(l+1), (long)buf.capacity() * 2)); + buf.reserve(std::max((long)(l+1), (long)buf.capacity() * 2)); va_start(vlist, format); l = vsnprintf(&buf.front(), buf.capacity(), format, vlist); va_end(vlist); diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index d95c70e..c65eeb0 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -183,8 +183,8 @@ int main(int argc, char **argv) throw "Copying of text from this document is not allowed."; } - param.first_page = min(max(param.first_page, 1), doc->getNumPages()); - param.last_page = min(max(param.last_page, param.first_page), doc->getNumPages()); + param.first_page = min(max(param.first_page, 1), doc->getNumPages()); + param.last_page = min(max(param.last_page, param.first_page), doc->getNumPages()); if(param.output_filename == "") {