diff --git a/CMakeLists.txt b/CMakeLists.txt index e80d0c3..18dd525 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,6 +20,22 @@ include_directories(${POPPLER_INCLUDE_DIRS}) link_directories(${POPPLER_LIBRARY_DIRS}) set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${POPPLER_LIBRARIES}) +# disable CAIRO for now +if(0) +pkg_check_modules(POPPLER_CAIRO poppler-cairo>=0.20.0) +if(POPPLER_CAIRO_FOUND) + set(HAVE_CAIRO 1) + include_directories(${POPPLER_CAIRO_INCLUDE_DIRS}) + link_directories(${POPPLER_CAIRO_LIBRARY_DIRS}) + set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} ${POPPLER_CAIRO_LIBRARIES}) +else() + set(HAVE_CAIRO 0) +endif() +else() +set(HAVE_CAIRO 0) +endif() + + # fontforge starts using pkg-config 'correctly' since 2.0.0 pkg_check_modules(FONTFORGE libfontforge>=2.0.0) if(FONTFORGE_FOUND) @@ -127,13 +143,17 @@ add_executable(pdf2htmlEX src/HTMLRenderer/export.cc src/HTMLRenderer/text.cc src/HTMLRenderer/image.cc + src/HTMLRenderer/draw.cc src/HTMLRenderer/link.cc src/include/namespace.h src/HTMLRenderer/LineBuffer.cc src/include/ffw.h src/ffw.c src/include/BackgroundRenderer.h - src/BackgroundRenderer.cc + src/include/SplashBackgroundRenderer.h + src/SplashBackgroundRenderer.cc + src/include/CairoBackgroundRenderer.h + src/CairoBackgroundRenderer.cc src/include/Preprocessor.h src/Preprocessor.cc src/include/util.h diff --git a/README.md b/README.md index a9029e5..ae1355f 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ This program is designed for scientific papers with complicate formulas and figu * Proper styling (Color, Transformation...) * Links * Optimization for Web +* [EXPERIMENTAL] Path drawing with CSS ### Not supported yet @@ -89,6 +90,12 @@ I have tested with CYGWIN without any problem, and I believe it also works on Mi pdf2htmlEX --help +### For Geeks + +* Experimental and unsupported + + pdf2htmlEX --process-nontext 0 --css-draw 0 /path/to/foobar.pdf + ## FAQ [here](https://github.com/coolwanglu/pdf2htmlEX/wiki/FAQ) diff --git a/TODO b/TODO index faf9ff4..4246d7b 100644 --- a/TODO +++ b/TODO @@ -1,3 +1,6 @@ +Integrate splash/cairo +native support for image +native support for draw about glyph width: - IE @@ -29,5 +32,3 @@ combine lines (unwarp) Printing multiple charcode mapped to a same glyph check if we can add information to the font, and let browsers show ligatures automatically -native support for image -native support for draw diff --git a/share/base.css b/share/base.css index b1394de..acc5098 100644 --- a/share/base.css +++ b/share/base.css @@ -81,4 +81,12 @@ span { } .a { } +.Cd { + position:absolute; + transform-origin:0% 100%; + -ms-transform-origin:0% 100%; + -moz-transform-origin:0% 100%; + -webkit-transform-origin:0% 100%; + -o-transform-origin:0% 100%; +} /* Base CSS END */ diff --git a/src/BackgroundRenderer.cc b/src/BackgroundRenderer.cc deleted file mode 100644 index 6956408..0000000 --- a/src/BackgroundRenderer.cc +++ /dev/null @@ -1,23 +0,0 @@ -/* - * BackgroundRenderer.cc - * - * Copyright (C) 2012 by Lu Wang coolwanglugmail.com - */ - -#include - -#include "GfxFont.h" - -#include "BackgroundRenderer.h" -#include "util.h" - -using namespace pdf2htmlEX; - -void BackgroundRenderer::drawChar(GfxState *state, double x, double y, - double dx, double dy, - double originX, double originY, - CharCode code, int nBytes, Unicode *u, int uLen) -{ -// SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen); -} - diff --git a/src/CairoBackgroundRenderer.cc b/src/CairoBackgroundRenderer.cc new file mode 100644 index 0000000..3e51741 --- /dev/null +++ b/src/CairoBackgroundRenderer.cc @@ -0,0 +1,30 @@ +/* + * CairoBackgroundRenderer.cc + * + * Copyright (C) 2012 Lu Wang + */ + +#include "pdf2htmlEX-config.h" + +#if HAVE_CAIRO + +#include "CairoBackgroundRenderer.h" + +namespace pdf2htmlEX { + +void CairoBackgroundRenderer::drawChar(GfxState *state, double x, double y, + double dx, double dy, + double originX, double originY, + CharCode code, int nBytes, Unicode *u, int uLen) +{ + // CairoOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen); +} + +void CairoBackgroundRenderer::render_page(PDFDoc * doc, int pageno, const std::string & filename) +{ +} + +} // namespace pdf2htmlEX + +#endif // HAVE_CAIRO + diff --git a/src/HTMLRenderer/LineBuffer.cc b/src/HTMLRenderer/LineBuffer.cc index 6ad6eb3..acbb944 100644 --- a/src/HTMLRenderer/LineBuffer.cc +++ b/src/HTMLRenderer/LineBuffer.cc @@ -22,7 +22,7 @@ using std::ostream; void HTMLRenderer::LineBuffer::reset(GfxState * state) { state->transform(state->getCurX(), state->getCurY(), &x, &y); - tm_id = renderer->cur_tm_id; + tm_id = renderer->cur_ttm_id; } void HTMLRenderer::LineBuffer::append_unicodes(const Unicode * u, int l) @@ -75,7 +75,7 @@ void HTMLRenderer::LineBuffer::flush(void) for(auto iter = states.begin(); iter != states.end(); ++iter) { const auto & s = *iter; - max_ascent = max(max_ascent, s.ascent * s.draw_font_size); + max_ascent = max(max_ascent, s.ascent * s.draw_font_size); } ostream & out = renderer->html_fout; @@ -155,7 +155,7 @@ void HTMLRenderer::LineBuffer::flush(void) ++ cur_offset_iter; } - size_t next_text_idx = min(cur_state_iter->start_idx, cur_offset_iter->start_idx); + size_t next_text_idx = min(cur_state_iter->start_idx, cur_offset_iter->start_idx); outputUnicodes(out, (&text.front()) + cur_text_idx, next_text_idx - cur_text_idx); cur_text_idx = next_text_idx; diff --git a/src/HTMLRenderer/draw.cc b/src/HTMLRenderer/draw.cc new file mode 100644 index 0000000..8ed7602 --- /dev/null +++ b/src/HTMLRenderer/draw.cc @@ -0,0 +1,399 @@ +/* + * Draw.cc + * + * Handling path drawing + * + * by WangLu + * 2012.10.01 + */ + +#include +#include +#include +#include +#include + +#include "HTMLRenderer.h" +#include "util.h" +#include "namespace.h" + +namespace pdf2htmlEX { + +using std::swap; +using std::min; +using std::max; +using std::acos; +using std::asin; +using std::ostringstream; +using std::sqrt; +using std::vector; +using std::ostream; + +static bool is_horizontal_line(GfxSubpath * path) +{ + return ((path->getNumPoints() == 2) + && (!path->getCurve(1)) + && (_equal(path->getY(0), path->getY(1)))); +} + +static bool is_vertical_line(GfxSubpath * path) +{ + return ((path->getNumPoints() == 2) + && (!path->getCurve(1)) + && (_equal(path->getX(0), path->getX(1)))); +} + +static bool is_rectangle(GfxSubpath * path) +{ + if (!(((path->getNumPoints() != 4) && (path->isClosed())) + || ((path->getNumPoints() == 5) + && _equal(path->getX(0), path->getX(4)) + && _equal(path->getY(0), path->getY(4))))) + return false; + + for(int i = 1; i < path->getNumPoints(); ++i) + if(path->getCurve(i)) + return false; + + return (_equal(path->getY(0), path->getY(1)) + && _equal(path->getX(1), path->getX(2)) + && _equal(path->getY(2), path->getY(3)) + && _equal(path->getX(3), path->getX(0))) + || (_equal(path->getX(0), path->getX(1)) + && _equal(path->getY(1), path->getY(2)) + && _equal(path->getX(2), path->getX(3)) + && _equal(path->getY(3), path->getY(0))); +} + +static void get_shading_bbox(GfxState * state, GfxShading * shading, + double & x1, double & y1, double & x2, double & y2) +{ + // from SplashOutputDev.cc in poppler + if(shading->getHasBBox()) + { + shading->getBBox(&x1, &y1, &x2, &y2); + } + else + { + state->getClipBBox(&x1, &y1, &x2, &y2); + Matrix ctm, ictm; + state->getCTM(&ctm); + ctm.invertTo(&ictm); + + double x[4], y[4]; + ictm.transform(x1, y1, &x[0], &y[0]); + ictm.transform(x2, y1, &x[1], &y[1]); + ictm.transform(x1, y2, &x[2], &y[2]); + ictm.transform(x2, y2, &x[3], &y[3]); + + x1 = x2 = x[0]; + y1 = y2 = y[0]; + + for(int i = 1; i < 4; ++i) + { + x1 = min(x1, x[i]); + y1 = min(y1, y[i]); + x2 = max(x2, x[i]); + y2 = max(y2, y[i]); + } + } +} + +/* + * Note that the coordinate system in HTML and PDF are different + */ +static double get_angle(double dx, double dy) +{ + double r = hypot(dx, dy); + + /* + * acos always returns [0, pi] + */ + double ang = acos(dx / r); + /* + * for angle below x-axis + */ + if(dy < 0) + ang = -ang; + + return ang; +} + +class LinearGradient +{ +public: + LinearGradient(GfxAxialShading * shading, + double x1, double y1, double x2, double y2); + + void dumpto (ostream & out); + + static void style_function (void * p, ostream & out) + { + static_cast(p)->dumpto(out); + } + + // TODO, add alpha + + class ColorStop + { + public: + GfxRGB rgb; + double pos; // [0,1] + }; + + vector stops; + double angle; +}; + +LinearGradient::LinearGradient (GfxAxialShading * shading, + double x1, double y1, double x2, double y2) +{ + // coordinate for t = 0 and t = 1 + double t0x, t0y, t1x, t1y; + shading->getCoords(&t0x, &t0y, &t1x, &t1y); + + angle = get_angle(t1x - t0x, t1y - t0y); + + // get the range of t in the box + // from GfxState.cc in poppler + double box_tmin, box_tmax; + { + double idx = t1x - t0x; + double idy = t1y - t0y; + double inv_len = 1.0 / (idx * idx + idy * idy); + idx *= inv_len; + idy *= inv_len; + + // t of (x1,y1) + box_tmin = box_tmax = (x1 - t0x) * idx + (y1 - t0y) * idy; + double tdx = (x2 - x1) * idx; + if(tdx < 0) + box_tmin += tdx; + else + box_tmax += tdx; + + double tdy = (y2 - y1) * idy; + if(tdy < 0) + box_tmin += tdy; + else + box_tmax += tdy; + } + + // get the domain of t in the box + double domain_tmin = max(box_tmin, shading->getDomain0()); + double domain_tmax = min(box_tmax, shading->getDomain1()); + + // TODO: better sampling + // TODO: check background color + { + stops.clear(); + double tstep = (domain_tmax - domain_tmin) / 13.0; + for(double t = domain_tmin; t <= domain_tmax; t += tstep) + { + GfxColor color; + shading->getColor(t, &color); + + ColorStop stop; + shading->getColorSpace()->getRGB(&color, &stop.rgb); + stop.pos = (t - box_tmin) / (box_tmax - box_tmin); + + stops.push_back(stop); + } + } +} + +void LinearGradient::dumpto (ostream & out) +{ + auto prefixes = {"", "-ms-", "-moz-", "-webkit-", "-o-"}; + for(auto iter = prefixes.begin(); iter != prefixes.end(); ++iter) + { + out << "background-image:" << (*iter) << "linear-gradient(" << _round(angle) << "rad"; + for(auto iter2 = stops.begin(); iter2 != stops.end(); ++iter2) + { + out << "," << (iter2->rgb) << " " << _round((iter2->pos) * 100) << "%"; + } + out << ");"; + } +} + +GBool HTMLRenderer::axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax) +{ + if(!(param->css_draw)) return gFalse; + + double x1, y1, x2, y2; + get_shading_bbox(state, shading, x1, y1, x2, y2); + + LinearGradient lg(shading, x1, y1, x2, y2); + + // TODO: check background color + css_draw_rectangle(x1, y1, x2-x1, y2-y1, state->getCTM(), + nullptr, 0, + nullptr, nullptr, + LinearGradient::style_function, &lg); + + return gTrue; +} + +//TODO track state +//TODO connection style +void HTMLRenderer::css_draw(GfxState *state, bool fill) +{ + if(!(param->css_draw)) return; + + GfxPath * path = state->getPath(); + for(int i = 0; i < path->getNumSubpaths(); ++i) + { + GfxSubpath * subpath = path->getSubpath(i); + + if(is_horizontal_line(subpath)) + { + double x1 = subpath->getX(0); + double x2 = subpath->getX(1); + double y = subpath->getY(0); + if(x1 > x2) swap(x1, x2); + + GfxRGB stroke_color; + state->getStrokeRGB(&stroke_color); + + double lw = state->getLineWidth(); + + css_draw_rectangle(x1, y - lw/2, x2-x1, lw, state->getCTM(), + nullptr, 0, + nullptr, &stroke_color); + } + else if(is_vertical_line(subpath)) + { + double x = subpath->getX(0); + double y1 = subpath->getY(0); + double y2 = subpath->getY(1); + if(y1 > y2) swap(y1, y2); + + GfxRGB stroke_color; + state->getStrokeRGB(&stroke_color); + + double lw = state->getLineWidth(); + + css_draw_rectangle(x-lw/2, y1, lw, y2-y1, state->getCTM(), + nullptr, 0, + nullptr, &stroke_color); + } + else if(is_rectangle(subpath)) + { + close_text_line(); + double x1 = subpath->getX(0); + double x2 = subpath->getX(2); + double y1 = subpath->getY(0); + double y2 = subpath->getY(2); + + if(x1 > x2) swap(x1, x2); + if(y1 > y2) swap(y1, y2); + + double x,y,w,h,lw[2]; + css_fix_rectangle_border_width(x1, y1, x2, y2, (fill ? 0.0 : state->getLineWidth()), + x,y,w,h,lw[0],lw[1]); + + GfxRGB stroke_color; + if(!fill) state->getStrokeRGB(&stroke_color); + + GfxRGB fill_color; + if(fill) state->getFillRGB(&fill_color); + + int lw_count = 2; + + GfxRGB * ps = fill ? nullptr : (&stroke_color); + GfxRGB * pf = fill ? (&fill_color) : nullptr; + + if(_equal(h, 0) || _equal(w, 0)) + { + // orthogonal line + + // TODO: check length + pf = ps; + ps = nullptr; + h += lw[0]; + w += lw[1]; + } + + css_draw_rectangle(x, y, w, h, state->getCTM(), + lw, lw_count, + ps, pf); + } + } +} + +void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, const double * tm, + double * line_width_array, int line_width_count, + const GfxRGB * line_color, const GfxRGB * fill_color, + void (*style_function)(void *, ostream &), void * style_function_data) +{ + close_text_line(); + + double new_tm[6]; + memcpy(new_tm, tm, sizeof(new_tm)); + + _tm_transform(new_tm, x, y); + + double scale = 1.0; + { + static const double sqrt2 = sqrt(2.0); + + double i1 = (new_tm[0] + new_tm[2]) / sqrt2; + double i2 = (new_tm[1] + new_tm[3]) / sqrt2; + scale = hypot(i1, i2); + if(_is_positive(scale)) + { + for(int i = 0; i < 4; ++i) + new_tm[i] /= scale; + } + else + { + scale = 1.0; + } + } + + html_fout << "
0) html_fout << ' '; + + double lw = line_width_array[i] * scale; + html_fout << _round(lw); + if(_is_positive(lw)) html_fout << "px"; + } + html_fout << ";"; + } + else + { + html_fout << "border:none;"; + } + + if(fill_color) + { + html_fout << "background-color:" << (*fill_color) << ";"; + } + else + { + html_fout << "background-color:transparent;"; + } + + if(style_function) + { + style_function(style_function_data, html_fout); + } + + html_fout << "bottom:" << _round(y) << "px;" + << "left:" << _round(x) << "px;" + << "width:" << _round(w * scale) << "px;" + << "height:" << _round(h * scale) << "px;"; + + html_fout << "\">
"; +} + + +} // namespace pdf2htmlEX diff --git a/src/HTMLRenderer/export.cc b/src/HTMLRenderer/export.cc index 5fb5a5b..62a1856 100644 --- a/src/HTMLRenderer/export.cc +++ b/src/HTMLRenderer/export.cc @@ -141,9 +141,7 @@ void HTMLRenderer::export_word_space (long long ws_id, double word_space) void HTMLRenderer::export_color (long long color_id, const GfxRGB * rgb) { - css_fout << ".c" << color_id << "{color:rgb(" - << dec << (int)colToByte(rgb->r) << "," << (int)colToByte(rgb->g) << "," << (int)colToByte(rgb->b) << ");}" << hex - << endl; + css_fout << ".c" << color_id << "{color:" << (*rgb) << ";}" << endl; } void HTMLRenderer::export_whitespace (long long ws_id, double ws_width) diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index ffd3602..eedbc70 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -3,7 +3,7 @@ * * Handling general stuffs * - * by WangLu + * Copyright (C) 2012 Lu Wang * 2012.08.14 */ @@ -13,8 +13,6 @@ #include #include -#include - #include "HTMLRenderer.h" #include "BackgroundRenderer.h" #include "namespace.h" @@ -64,10 +62,6 @@ HTMLRenderer::~HTMLRenderer() delete [] width_list; } -static GBool annot_cb(Annot *, void *) { - return false; -}; - void HTMLRenderer::process(PDFDoc *doc) { cur_doc = doc; @@ -84,17 +78,17 @@ void HTMLRenderer::process(PDFDoc *doc) vector zoom_factors; - if(abs(param->zoom) > EPS) + if(_is_positive(param->zoom)) { zoom_factors.push_back(param->zoom); } - if(abs(param->fit_width) > EPS) + if(_is_positive(param->fit_width)) { zoom_factors.push_back((param->fit_width) / preprocessor.get_max_width()); } - if(abs(param->fit_height) > EPS) + if(_is_positive(param->fit_height)) { zoom_factors.push_back((param->fit_height) / preprocessor.get_max_height()); } @@ -108,8 +102,8 @@ void HTMLRenderer::process(PDFDoc *doc) zoom = *min_element(zoom_factors.begin(), zoom_factors.end()); } - scale_factor1 = max(zoom, param->font_size_multiplier); - scale_factor2 = zoom / scale_factor1; + text_scale_factor1 = max(zoom, param->font_size_multiplier); + text_scale_factor2 = zoom / text_scale_factor1; } @@ -117,12 +111,7 @@ void HTMLRenderer::process(PDFDoc *doc) BackgroundRenderer * bg_renderer = nullptr; if(param->process_nontext) { - // Render non-text objects as image - // copied from poppler - SplashColor color; - color[0] = color[1] = color[2] = 255; - - bg_renderer = new BackgroundRenderer(splashModeRGB8, 4, gFalse, color); + bg_renderer = new BackgroundRenderer(param); bg_renderer->startDoc(doc); } @@ -141,22 +130,15 @@ void HTMLRenderer::process(PDFDoc *doc) if(param->process_nontext) { - doc->displayPage(bg_renderer, i, param->h_dpi, param->v_dpi, - 0, true, false, false, - nullptr, nullptr, &annot_cb, nullptr); + auto fn = str_fmt("%s/p%x.png", (param->single_html ? param->tmp_dir : param->dest_dir).c_str(), i); + if(param->single_html) + add_tmp_file((char*)fn); - { - auto fn = str_fmt("%s/p%x.png", (param->single_html ? param->tmp_dir : param->dest_dir).c_str(), i); - if(param->single_html) - add_tmp_file((char*)fn); - - bg_renderer->getBitmap()->writeImgFile(splashFormatPng, - (char*)fn, - param->h_dpi, param->v_dpi); - } + bg_renderer->render_page(doc, i, (char*)fn); } - doc->displayPage(this, i, zoom_factor() * DEFAULT_DPI, zoom_factor() * DEFAULT_DPI, + doc->displayPage(this, i, + text_zoom_factor() * DEFAULT_DPI, text_zoom_factor() * DEFAULT_DPI, 0, true, false, false, nullptr, nullptr, nullptr, nullptr); @@ -219,15 +201,15 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state) } html_fout << "\">"; - draw_scale = 1.0; + draw_text_scale = 1.0; cur_font_info = install_font(nullptr); cur_font_size = draw_font_size = 0; cur_fs_id = install_font_size(cur_font_size); - memcpy(cur_ctm, id_matrix, sizeof(cur_ctm)); - memcpy(draw_ctm, id_matrix, sizeof(draw_ctm)); - cur_tm_id = install_transform_matrix(draw_ctm); + memcpy(cur_text_tm, id_matrix, sizeof(cur_text_tm)); + memcpy(draw_text_tm, id_matrix, sizeof(draw_text_tm)); + cur_ttm_id = install_transform_matrix(draw_text_tm); cur_letter_space = cur_word_space = 0; cur_ls_id = install_letter_space(cur_letter_space); @@ -247,7 +229,7 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state) } void HTMLRenderer::endPage() { - close_line(); + close_text_line(); // process links before the page is closed cur_doc->processLinks(this, pageNum); @@ -404,7 +386,9 @@ void HTMLRenderer::post_process() void HTMLRenderer::fix_stream (std::ostream & out) { - out << hex; + // we output all ID's in hex + // browsers are not happy with scientific notations + out << hex << fixed; } void HTMLRenderer::add_tmp_file(const string & fn) diff --git a/src/HTMLRenderer/install.cc b/src/HTMLRenderer/install.cc index e2bd654..b741e26 100644 --- a/src/HTMLRenderer/install.cc +++ b/src/HTMLRenderer/install.cc @@ -215,9 +215,11 @@ long long HTMLRenderer::install_font_size(double font_size) long long HTMLRenderer::install_transform_matrix(const double * tm) { - TM m(tm); + Matrix m; + memcpy(m.m, tm, sizeof(m.m)); + auto iter = transform_matrix_map.lower_bound(m); - if((iter != transform_matrix_map.end()) && (m == (iter->first))) + if((iter != transform_matrix_map.end()) && (_tm_equal(m.m, iter->first.m, 4))) return iter->second; long long new_tm_id = transform_matrix_map.size(); diff --git a/src/HTMLRenderer/link.cc b/src/HTMLRenderer/link.cc index 9abb917..83cf6aa 100644 --- a/src/HTMLRenderer/link.cc +++ b/src/HTMLRenderer/link.cc @@ -22,62 +22,6 @@ using std::ostringstream; using std::min; using std::max; -static void _transform(const double * ctm, double & x, double & y) -{ - double xx = x, yy = y; - x = ctm[0] * xx + ctm[2] * yy + ctm[4]; - y = ctm[1] * xx + ctm[3] * yy + ctm[5]; -} - -static void _get_transformed_rect(AnnotLink * link, const double * ctm, double & x1, double & y1, double & x2, double & y2) -{ - double _x1, _x2, _y1, _y2; - link->getRect(&_x1, &_y1, &_x2, &_y2); - - _transform(ctm, _x1, _y1); - _transform(ctm, _x2, _y2); - - x1 = min(_x1, _x2); - x2 = max(_x1, _x2); - y1 = min(_y1, _y2); - y2 = max(_y1, _y2); -} - -/* - * In PDF, edges of the rectangle are in the middle of the borders - * In HTML, edges are completely outside the rectangle - */ -static void _fix_border_width(double & x1, double & y1, double & x2, double & y2, - double border_width, double & border_top_bottom_width, double & border_left_right_width) -{ - double w = x2 - x1; - if(w > border_width) - { - x1 += border_width / 2; - x2 -= border_width / 2; - border_left_right_width = border_width; - } - else - { - x1 += w / 2; - x2 -= w / 2; - border_left_right_width = border_width + w/2; - } - double h = y2 - y1; - if(h > border_width) - { - y1 += border_width / 2; - y2 -= border_width / 2; - border_top_bottom_width = border_width; - } - else - { - y1 += h / 2; - y2 -= h / 2; - border_top_bottom_width = border_width + h/2; - } -} - /* * The detailed rectangle area of the link destination * Will be parsed and performed by Javascript @@ -164,6 +108,7 @@ static string get_dest_detail_str(int pageno, LinkDest * dest) /* * Based on pdftohtml from poppler * TODO: CSS for link rectangles + * TODO: share rectangle draw with css-draw */ void HTMLRenderer::processLink(AnnotLink * al) { @@ -239,10 +184,17 @@ void HTMLRenderer::processLink(AnnotLink * al) html_fout << ">"; } - html_fout << "
getRect(&x1, &y1, &x2, &y2); + x = min(x1, x2); + y = min(y1, y2); + w = max(x1, x2) - x; + h = max(y1, y2) - y; double border_width = 0; double border_top_bottom_width = 0; @@ -250,12 +202,14 @@ void HTMLRenderer::processLink(AnnotLink * al) auto * border = al->getBorder(); if(border) { - border_width = border->getWidth() * zoom_factor(); + border_width = border->getWidth(); if(border_width > 0) { { - _fix_border_width(x1, y1, x2, y1, - border_width, border_top_bottom_width, border_left_right_width); + css_fix_rectangle_border_width(x1, y1, x2, y2, border_width, + x, y, w, h, + border_top_bottom_width, border_left_right_width); + if(abs(border_top_bottom_width - border_left_right_width) < EPS) html_fout << "border-width:" << _round(border_top_bottom_width) << "px;"; else @@ -313,12 +267,13 @@ void HTMLRenderer::processLink(AnnotLink * al) html_fout << "border-style:none;"; } + _tm_transform(default_ctm, x, y); html_fout << "position:absolute;" - << "left:" << _round(x1- border_left_right_width) << "px;" - << "bottom:" << _round(y1 - border_top_bottom_width) << "px;" - << "width:" << _round(x2-x1) << "px;" - << "height:" << _round(y2-y1) << "px;"; + << "left:" << _round(x) << "px;" + << "bottom:" << _round(y) << "px;" + << "width:" << _round(w) << "px;" + << "height:" << _round(h) << "px;"; // fix for IE html_fout << "background-color:rgba(255,255,255,0.000001);"; diff --git a/src/HTMLRenderer/state.cc b/src/HTMLRenderer/state.cc index 6f7cd55..fc94015 100644 --- a/src/HTMLRenderer/state.cc +++ b/src/HTMLRenderer/state.cc @@ -12,6 +12,7 @@ * optimize lines using nested (reuse classes) */ +#include #include #include "HTMLRenderer.h" @@ -22,6 +23,7 @@ namespace pdf2htmlEX { using std::max; using std::abs; +using std::hypot; void HTMLRenderer::updateAll(GfxState * state) { @@ -82,7 +84,7 @@ void HTMLRenderer::check_state_change(GfxState * state) bool need_recheck_position = false; bool need_rescale_font = false; - bool draw_scale_changed = false; + bool draw_text_scale_changed = false; // text position // we've been tracking the text position positively in the update*** functions @@ -98,7 +100,7 @@ void HTMLRenderer::check_state_change(GfxState * state) if(!(new_font_info->id == cur_font_info->id)) { - new_line_state = max(new_line_state, NLS_SPAN); + new_line_state = max(new_line_state, NLS_SPAN); cur_font_info = new_font_info; } @@ -112,7 +114,7 @@ void HTMLRenderer::check_state_change(GfxState * state) // backup the current ctm for need_recheck_position double old_ctm[6]; - memcpy(old_ctm, cur_ctm, sizeof(old_ctm)); + memcpy(old_ctm, cur_text_tm, sizeof(old_ctm)); // ctm & text ctm & hori scale if(all_changed || ctm_changed || text_mat_changed || hori_scale_changed) @@ -131,52 +133,52 @@ void HTMLRenderer::check_state_change(GfxState * state) new_ctm[5] = m1[1] * m2[4] + m1[3] * m2[5] + m1[5]; //new_ctm[4] = new_ctm[5] = 0; - if(!_tm_equal(new_ctm, cur_ctm)) + if(!_tm_equal(new_ctm, cur_text_tm)) { need_recheck_position = true; need_rescale_font = true; - memcpy(cur_ctm, new_ctm, sizeof(cur_ctm)); + memcpy(cur_text_tm, new_ctm, sizeof(cur_text_tm)); } } - // draw_ctm, draw_scale + // draw_text_tm, draw_text_scale // depends: font size & ctm & text_ctm & hori scale if(need_rescale_font) { - double new_draw_ctm[6]; - memcpy(new_draw_ctm, cur_ctm, sizeof(new_draw_ctm)); + double new_draw_text_tm[6]; + memcpy(new_draw_text_tm, cur_text_tm, sizeof(new_draw_text_tm)); - double new_draw_scale = 1.0/scale_factor2 * sqrt(new_draw_ctm[2] * new_draw_ctm[2] + new_draw_ctm[3] * new_draw_ctm[3]); + double new_draw_text_scale = 1.0/text_scale_factor2 * hypot(new_draw_text_tm[2], new_draw_text_tm[3]); double new_draw_font_size = cur_font_size; - if(_is_positive(new_draw_scale)) + if(_is_positive(new_draw_text_scale)) { - new_draw_font_size *= new_draw_scale; + new_draw_font_size *= new_draw_text_scale; for(int i = 0; i < 4; ++i) - new_draw_ctm[i] /= new_draw_scale; + new_draw_text_tm[i] /= new_draw_text_scale; } else { - new_draw_scale = 1.0; + new_draw_text_scale = 1.0; } - if(!(_equal(new_draw_scale, draw_scale))) + if(!(_equal(new_draw_text_scale, draw_text_scale))) { - draw_scale_changed = true; - draw_scale = new_draw_scale; + draw_text_scale_changed = true; + draw_text_scale = new_draw_text_scale; } if(!(_equal(new_draw_font_size, draw_font_size))) { - new_line_state = max(new_line_state, NLS_SPAN); + new_line_state = max(new_line_state, NLS_SPAN); draw_font_size = new_draw_font_size; cur_fs_id = install_font_size(draw_font_size); } - if(!(_tm_equal(new_draw_ctm, draw_ctm, 4))) + if(!(_tm_equal(new_draw_text_tm, draw_text_tm, 4))) { - new_line_state = max(new_line_state, NLS_DIV); - memcpy(draw_ctm, new_draw_ctm, sizeof(draw_ctm)); - cur_tm_id = install_transform_matrix(draw_ctm); + new_line_state = max(new_line_state, NLS_DIV); + memcpy(draw_text_tm, new_draw_text_tm, sizeof(draw_text_tm)); + cur_ttm_id = install_transform_matrix(draw_text_tm); } } @@ -198,29 +200,29 @@ void HTMLRenderer::check_state_change(GfxState * state) */ bool merged = false; - if(_tm_equal(old_ctm, cur_ctm, 4)) + if(_tm_equal(old_ctm, cur_text_tm, 4)) { double dy = cur_ty - draw_ty; - double tdx = old_ctm[4] - cur_ctm[4] - cur_ctm[2] * dy; - double tdy = old_ctm[5] - cur_ctm[5] - cur_ctm[3] * dy; + double tdx = old_ctm[4] - cur_text_tm[4] - cur_text_tm[2] * dy; + double tdy = old_ctm[5] - cur_text_tm[5] - cur_text_tm[3] * dy; - if(_equal(cur_ctm[0] * tdy, cur_ctm[1] * tdx)) + if(_equal(cur_text_tm[0] * tdy, cur_text_tm[1] * tdx)) { - if(abs(cur_ctm[0]) > EPS) + if(_is_positive(cur_text_tm[0])) { - draw_tx += tdx / cur_ctm[0]; + draw_tx += tdx / cur_text_tm[0]; draw_ty += dy; merged = true; } - else if (abs(cur_ctm[1]) > EPS) + else if (_is_positive(cur_text_tm[1])) { - draw_tx += tdy / cur_ctm[1]; + draw_tx += tdy / cur_text_tm[1]; draw_ty += dy; merged = true; } else { - if((abs(tdx) < EPS) && (abs(tdy) < EPS)) + if((_equal(tdx,0)) && (_equal(tdy,0))) { // free draw_tx = cur_tx; @@ -236,33 +238,33 @@ void HTMLRenderer::check_state_change(GfxState * state) if(!merged) { - new_line_state = max(new_line_state, NLS_DIV); + new_line_state = max(new_line_state, NLS_DIV); } } // letter space - // depends: draw_scale - if(all_changed || letter_space_changed || draw_scale_changed) + // depends: draw_text_scale + if(all_changed || letter_space_changed || draw_text_scale_changed) { double new_letter_space = state->getCharSpace(); if(!_equal(cur_letter_space, new_letter_space)) { - new_line_state = max(new_line_state, NLS_SPAN); + new_line_state = max(new_line_state, NLS_SPAN); cur_letter_space = new_letter_space; - cur_ls_id = install_letter_space(cur_letter_space * draw_scale); + cur_ls_id = install_letter_space(cur_letter_space * draw_text_scale); } } // word space - // depends draw_scale - if(all_changed || word_space_changed || draw_scale_changed) + // depends draw_text_scale + if(all_changed || word_space_changed || draw_text_scale_changed) { double new_word_space = state->getWordSpace(); if(!_equal(cur_word_space, new_word_space)) { - new_line_state = max(new_line_state, NLS_SPAN); + new_line_state = max(new_line_state, NLS_SPAN); cur_word_space = new_word_space; - cur_ws_id = install_word_space(cur_word_space * draw_scale); + cur_ws_id = install_word_space(cur_word_space * draw_text_scale); } } @@ -273,22 +275,22 @@ void HTMLRenderer::check_state_change(GfxState * state) state->getFillRGB(&new_color); if(!((new_color.r == cur_color.r) && (new_color.g == cur_color.g) && (new_color.b == cur_color.b))) { - new_line_state = max(new_line_state, NLS_SPAN); + new_line_state = max(new_line_state, NLS_SPAN); cur_color = new_color; cur_color_id = install_color(&new_color); } } // rise - // depends draw_scale - if(all_changed || rise_changed || draw_scale_changed) + // depends draw_text_scale + if(all_changed || rise_changed || draw_text_scale_changed) { double new_rise = state->getRise(); if(!_equal(cur_rise, new_rise)) { - new_line_state = max(new_line_state, NLS_SPAN); + new_line_state = max(new_line_state, NLS_SPAN); cur_rise = new_rise; - cur_rise_id = install_rise(new_rise * draw_scale); + cur_rise_id = install_rise(new_rise * draw_text_scale); } } @@ -312,7 +314,7 @@ void HTMLRenderer::reset_state_change() color_changed = false; } -void HTMLRenderer::prepare_line(GfxState * state) +void HTMLRenderer::prepare_text_line(GfxState * state) { if(!line_opened) { @@ -321,7 +323,7 @@ void HTMLRenderer::prepare_line(GfxState * state) if(new_line_state == NLS_DIV) { - close_line(); + close_text_line(); line_buf.reset(state); @@ -333,7 +335,7 @@ void HTMLRenderer::prepare_line(GfxState * state) { // align horizontal position // try to merge with the last line if possible - double target = (cur_tx - draw_tx) * draw_scale; + double target = (cur_tx - draw_tx) * draw_text_scale; if(abs(target) < param->h_eps) { // ignore it @@ -341,7 +343,7 @@ void HTMLRenderer::prepare_line(GfxState * state) else { line_buf.append_offset(target); - draw_tx += target / draw_scale; + draw_tx += target / draw_text_scale; } } @@ -353,7 +355,7 @@ void HTMLRenderer::prepare_line(GfxState * state) line_opened = true; } -void HTMLRenderer::close_line() +void HTMLRenderer::close_text_line() { if(line_opened) { diff --git a/src/HTMLRenderer/text.cc b/src/HTMLRenderer/text.cc index 47a24ec..1486d23 100644 --- a/src/HTMLRenderer/text.cc +++ b/src/HTMLRenderer/text.cc @@ -310,7 +310,7 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo memset(width_list, -1, 0x10000 * sizeof(*width_list)); if(code2GID) - maxcode = min(maxcode, code2GID_len - 1); + maxcode = min(maxcode, code2GID_len - 1); bool is_truetype = is_truetype_suffix(suffix); int max_key = maxcode; @@ -484,7 +484,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s) // see if the line has to be closed due to state change check_state_change(state); - prepare_line(state); + prepare_text_line(state); // Now ready to output // get the unicodes @@ -522,7 +522,7 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s) if(is_space && (param->space_as_offset)) { // ignore horiz_scaling, as it's merged in CTM - line_buf.append_offset((dx1 * cur_font_size + cur_letter_space + cur_word_space) * draw_scale); + line_buf.append_offset((dx1 * cur_font_size + cur_letter_space + cur_word_space) * draw_text_scale); } else { diff --git a/src/Preprocessor.cc b/src/Preprocessor.cc index c64b04a..f96b602 100644 --- a/src/Preprocessor.cc +++ b/src/Preprocessor.cc @@ -82,8 +82,8 @@ void Preprocessor::drawChar(GfxState *state, double x, double y, void Preprocessor::startPage(int pageNum, GfxState *state) { - max_width = max(max_width, state->getPageWidth()); - max_height = max(max_height, state->getPageHeight()); + max_width = max(max_width, state->getPageWidth()); + max_height = max(max_height, state->getPageHeight()); } const char * Preprocessor::get_code_map (long long font_id) const diff --git a/src/SplashBackgroundRenderer.cc b/src/SplashBackgroundRenderer.cc new file mode 100644 index 0000000..f2d7a85 --- /dev/null +++ b/src/SplashBackgroundRenderer.cc @@ -0,0 +1,40 @@ +/* + * SplashBackgroundRenderer.cc + * + * Copyright (C) 2012 Lu Wang + */ + +#include + +#include "SplashBackgroundRenderer.h" + +namespace pdf2htmlEX { + +using std::string; + +const SplashColor SplashBackgroundRenderer::white = {255,255,255}; + +void SplashBackgroundRenderer::drawChar(GfxState *state, double x, double y, + double dx, double dy, + double originX, double originY, + CharCode code, int nBytes, Unicode *u, int uLen) +{ +// SplashOutputDev::drawChar(state,x,y,dx,dy,originX,originY,code, nBytes, u, uLen); +} + +static GBool annot_cb(Annot *, void *) { + return false; +}; + +void SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno, const string & filename) +{ + doc->displayPage(this, pageno, param->h_dpi, param->v_dpi, + 0, true, false, false, + nullptr, nullptr, &annot_cb, nullptr); + + getBitmap()->writeImgFile(splashFormatPng, + (char*)filename.c_str(), + param->h_dpi, param->v_dpi); +} + +} // namespace pdf2htmlEX diff --git a/src/ffw.c b/src/ffw.c index dc8e913..2bdef39 100644 --- a/src/ffw.c +++ b/src/ffw.c @@ -332,15 +332,11 @@ void ffw_set_widths(int * width_list, int mapping_len, int stretch_narrow, int s SplineChar * sc = sf->glyphs[j]; if(sc == NULL) continue; - DBounds bb; - SplineCharFindBounds(sc, &bb); - - double glyph_width = bb.maxx - bb.minx; - if((glyph_width > EPS) - && (((glyph_width > width_list[i] + EPS) && (squeeze_wide)) - || ((glyph_width < width_list[i] - EPS) && (stretch_narrow)))) + if(((sc->width > EPS) + && (((sc->width > width_list[i] + EPS) && (squeeze_wide)) + || ((sc->width < width_list[i] - EPS) && (stretch_narrow))))) { - real transform[6]; transform[0] = ((double)width_list[i]) / glyph_width; + real transform[6]; transform[0] = ((double)width_list[i]) / (sc->width); transform[3] = 1.0; transform[1] = transform[2] = transform[4] = transform[5] = 0; FVTrans(cur_fv, sc, transform, NULL, fvt_alllayers | fvt_dontmovewidth); diff --git a/src/include/BackgroundRenderer.h b/src/include/BackgroundRenderer.h index 39cbd07..b3cd623 100644 --- a/src/include/BackgroundRenderer.h +++ b/src/include/BackgroundRenderer.h @@ -2,37 +2,31 @@ * Background renderer * Render all those things not supported as Image * - * by WangLu - * 2012.08.06 + * Copyright (C) 2012 Lu Wang */ #ifndef BACKGROUND_RENDERER_H__ #define BACKGROUND_RENDERER_H__ -#include +#include "pdf2htmlEX-config.h" + +#if HAVE_CAIRO + +#include "CairoBackgroundRenderer.h" namespace pdf2htmlEX { - -// Based on BackgroundRenderer from poppler -class BackgroundRenderer : public SplashOutputDev { -public: - BackgroundRenderer(SplashColorMode colorModeA, int bitmapRowPadA, - GBool reverseVideoA, SplashColorPtr paperColorA, - GBool bitmapTopDownA = gTrue, - GBool allowAntialiasA = gTrue) - : SplashOutputDev(colorModeA, - bitmapRowPadA, reverseVideoA, paperColorA, bitmapTopDownA, - allowAntialiasA) - { } - virtual ~BackgroundRenderer() { } - - virtual void drawChar(GfxState *state, double x, double y, - double dx, double dy, - double originX, double originY, - CharCode code, int nBytes, Unicode *u, int uLen); -}; - + typedef CairoBackgroundRenderer BackgroundRenderer; } +#else + +#include "SplashBackgroundRenderer.h" + +namespace pdf2htmlEX { + typedef SplashBackgroundRenderer BackgroundRenderer; +} + +#endif // HAVE_CAIRO + #endif //BACKGROUND_RENDERER_H__ diff --git a/src/include/CairoBackgroundRenderer.h b/src/include/CairoBackgroundRenderer.h new file mode 100644 index 0000000..c3c8e90 --- /dev/null +++ b/src/include/CairoBackgroundRenderer.h @@ -0,0 +1,42 @@ +/* + * Cairo Background renderer + * Render all those things not supported as Image, with Cairo + * + * Copyright (C) 2012 Lu Wang + */ + + +#ifndef CAIRO_BACKGROUND_RENDERER_H__ +#define CAIRO_BACKGROUND_RENDERER_H__ + +#include + +#include "Param.h" + +namespace pdf2htmlEX { + +// Based on BackgroundRenderer from poppler +class CairoBackgroundRenderer : public CairoOutputDev +{ +public: + CairoBackgroundRenderer(const Param * param) + :CairoOutputDev() + , param(param) + { } + + virtual ~CairoBackgroundRenderer() { } + + virtual void drawChar(GfxState *state, double x, double y, + double dx, double dy, + double originX, double originY, + CharCode code, int nBytes, Unicode *u, int uLen); + + void render_page(PDFDoc * doc, int pageno, const std::string & filename); + +protected: + const Param * param; +}; + +} + +#endif //CAIRO_BACKGROUND_RENDERER_H__ diff --git a/src/include/HTMLRenderer.h b/src/include/HTMLRenderer.h index 5457296..ec7692b 100644 --- a/src/include/HTMLRenderer.h +++ b/src/include/HTMLRenderer.h @@ -42,6 +42,8 @@ * j - Js data * p - Page * + * Cd - CSS Draw + * * Reusable CSS classes * * t - Transform matrix @@ -77,12 +79,18 @@ class HTMLRenderer : public OutputDev // Does this device use drawChar() or drawString()? virtual GBool useDrawChar() { return gFalse; } + // Does this device use functionShadedFill(), axialShadedFill(), and + // radialShadedFill()? If this returns false, these shaded fills + // will be reduced to a series of other drawing operations. + virtual GBool useShadedFills(int type) { return type == 2; } + + // Does this device use beginType3Char/endType3Char? Otherwise, // text in Type 3 fonts will be drawn with drawChar/drawString. virtual GBool interpretType3Chars() { return gFalse; } // Does this device need non-text content? - virtual GBool needNonText() { return gFalse; } + virtual GBool needNonText() { return gTrue; } virtual void setDefaultCTM(double *ctm); @@ -121,6 +129,10 @@ class HTMLRenderer : public OutputDev virtual void drawImage(GfxState * state, Object * ref, Stream * str, int width, int height, GfxImageColorMap * colorMap, GBool interpolate, int *maskColors, GBool inlineImg); + virtual void stroke(GfxState *state) { css_draw(state, false); } + virtual void fill(GfxState *state) { css_draw(state, true); } + virtual GBool axialShadedFill(GfxState *state, GfxAxialShading *shading, double tMin, double tMax); + virtual void processLink(AnnotLink * al); protected: @@ -190,8 +202,25 @@ class HTMLRenderer : public OutputDev void reset_state_change(); // prepare the line context, (close old tags, open new tags) // make sure the current HTML style consistent with PDF - void prepare_line(GfxState * state); - void close_line(); + void prepare_text_line(GfxState * state); + void close_text_line(); + + //////////////////////////////////////////////////// + // CSS drawing + //////////////////////////////////////////////////// + void css_draw(GfxState *state, bool fill); + /* + * coordinates are to transformed by state->getCTM() + * (x,y) should be the bottom-left corner INCLUDING border + * w,h should be the metrics WITHOUT border + * + * line_color & fill_color may be specified as nullptr to indicate none + * style_function & style_function_data may be provided to provide more styles + */ + void css_draw_rectangle(double x, double y, double w, double h, const double * tm, + double * line_width_array, int line_width_count, + const GfxRGB * line_color, const GfxRGB * fill_color, + void (*style_function)(void *, std::ostream &) = nullptr, void * style_function_data = nullptr ); //////////////////////////////////////////////////// @@ -216,9 +245,9 @@ class HTMLRenderer : public OutputDev * factor1 & factor 2 are determined according to zoom and font-size-multiplier * */ - double zoom_factor (void) const { return scale_factor1 * scale_factor2; } - double scale_factor1; - double scale_factor2; + double text_zoom_factor (void) const { return text_scale_factor1 * text_scale_factor2; } + double text_scale_factor1; + double text_scale_factor2; //////////////////////////////////////////////////// @@ -246,16 +275,15 @@ class HTMLRenderer : public OutputDev bool font_changed; // transform matrix - long long cur_tm_id; + long long cur_ttm_id; bool ctm_changed; bool text_mat_changed; // horizontal scaling bool hori_scale_changed; - // this is CTM * TextMAT in PDF, not only CTM + // this is CTM * TextMAT in PDF // [4] and [5] are ignored, // as we'll calculate the position of the origin separately - // TODO: changed this for images - double cur_ctm[6]; // unscaled + double cur_text_tm[6]; // unscaled // letter spacing long long cur_ls_id; @@ -281,11 +309,11 @@ class HTMLRenderer : public OutputDev // we try to render the final font size directly // to reduce the effect of ctm as much as possible - // draw_ctm is cur_ctm scaled by 1/draw_scale, - // so everything redenered should be multiplied by draw_scale - double draw_ctm[6]; + // draw_ctm is cur_ctm scaled by 1/draw_text_scale, + // so everything redenered should be multiplied by draw_text_scale + double draw_text_tm[6]; double draw_font_size; - double draw_scale; + double draw_text_scale; // the position of next char, in text coords // this is actual position (in HTML), which might be different from cur_tx/ty (in PDF) @@ -376,7 +404,7 @@ class HTMLRenderer : public OutputDev std::unordered_map font_name_map; std::map font_size_map; - std::map transform_matrix_map; + std::map transform_matrix_map; std::map letter_space_map; std::map word_space_map; std::unordered_map color_map; diff --git a/src/include/Param.h b/src/include/Param.h index db90188..6136a55 100644 --- a/src/include/Param.h +++ b/src/include/Param.h @@ -55,6 +55,9 @@ struct Param int debug; int clean_tmp; + + // experimental + int css_draw; }; } // namespace pdf2htmlEX diff --git a/src/include/SplashBackgroundRenderer.h b/src/include/SplashBackgroundRenderer.h new file mode 100644 index 0000000..e56bf1d --- /dev/null +++ b/src/include/SplashBackgroundRenderer.h @@ -0,0 +1,48 @@ +/* + * Splash Background renderer + * Render all those things not supported as Image, with Splash + * + * by WangLu + * 2012.08.06 + */ + + +#ifndef SPLASH_BACKGROUND_RENDERER_H__ +#define SPLASH_BACKGROUND_RENDERER_H__ + +#include + +#include +#include + +#include "Param.h" + +namespace pdf2htmlEX { + +// Based on BackgroundRenderer from poppler +class SplashBackgroundRenderer : public SplashOutputDev +{ +public: + static const SplashColor white; + + SplashBackgroundRenderer(const Param * param) + : SplashOutputDev(splashModeRGB8, 4, gFalse, (SplashColorPtr)&white, gTrue, gTrue) + , param(param) + { } + + virtual ~SplashBackgroundRenderer() { } + + virtual void drawChar(GfxState *state, double x, double y, + double dx, double dy, + double originX, double originY, + CharCode code, int nBytes, Unicode *u, int uLen); + + void render_page(PDFDoc * doc, int pageno, const std::string & filename); + +protected: + const Param * param; +}; + +} // namespace pdf2htmlEX + +#endif // SPLASH_BACKGROUND_RENDERER_H__ diff --git a/src/include/pdf2htmlEX-config.h b/src/include/pdf2htmlEX-config.h new file mode 100644 index 0000000..ffb69e8 --- /dev/null +++ b/src/include/pdf2htmlEX-config.h @@ -0,0 +1,24 @@ +/* + * config.h + * Compile time constants + * + * by WangLu + */ + + +#ifndef PDF2HTMLEX_CONFIG_H__ +#define PDF2HTMLEX_CONFIG_H__ + +#include + +#define HAVE_CAIRO 0 + +namespace pdf2htmlEX { + +static const std::string PDF2HTMLEX_VERSION = "0.5"; +static const std::string PDF2HTMLEX_PREFIX = "/usr/local"; +static const std::string PDF2HTMLEX_DATA_PATH = "/usr/local""/share/pdf2htmlEX"; + +} // namespace pdf2htmlEX + +#endif //PDF2HTMLEX_CONFIG_H__ diff --git a/src/include/pdf2htmlEX-config.h.in b/src/include/pdf2htmlEX-config.h.in index 7a7ef62..695a8dd 100644 --- a/src/include/pdf2htmlEX-config.h.in +++ b/src/include/pdf2htmlEX-config.h.in @@ -11,6 +11,8 @@ #include +#define HAVE_CAIRO @HAVE_CAIRO@ + namespace pdf2htmlEX { static const std::string PDF2HTMLEX_VERSION = "@PDF2HTMLEX_VERSION@"; diff --git a/src/include/util.h b/src/include/util.h index 1284837..5c1032b 100644 --- a/src/include/util.h +++ b/src/include/util.h @@ -47,6 +47,9 @@ static inline bool _tm_equal(const double * tm1, const double * tm2, int size = return true; } +void _tm_transform(const double * tm, double & x, double & y, bool is_delta = false); +void _tm_multiply(double * tm_left, const double * tm_right); + static inline long long hash_ref(const Ref * id) { return (((long long)(id->num)) << (sizeof(id->gen)*8)) | (id->gen); @@ -102,28 +105,21 @@ public: bool has_space; // whether space is included in the font }; -// wrapper of the transform matrix double[6] -// Transform Matrix -class TM +class Matrix_less { public: - TM() {} - TM(const double * m) {memcpy(_, m, sizeof(_));} - bool operator < (const TM & m) const { + bool operator () (const Matrix & m1, const Matrix & m2) const + { // Note that we only care about the first 4 elements for(int i = 0; i < 4; ++i) { - if(_[i] < m._[i] - EPS) + if(m1.m[i] < m2.m[i] - EPS) return true; - if(_[i] > m._[i] + EPS) + if(m1.m[i] > m2.m[i] + EPS) return false; } return false; } - bool operator == (const TM & m) const { - return _tm_equal(_, m._, 4); - } - double _[6]; }; class base64stream @@ -201,7 +197,7 @@ public: va_end(vlist); if(l >= (int)buf.capacity()) { - buf.reserve(std::max((long)(l+1), (long)buf.capacity() * 2)); + buf.reserve(std::max((long)(l+1), (long)buf.capacity() * 2)); va_start(vlist, format); l = vsnprintf(&buf.front(), buf.capacity(), format, vlist); va_end(vlist); @@ -223,5 +219,17 @@ bool is_truetype_suffix(const std::string & suffix); std::string get_filename(const std::string & path); std::string get_suffix(const std::string & path); +/* + * In PDF, edges of the rectangle are in the middle of the borders + * In HTML, edges are completely outside the rectangle + */ +void css_fix_rectangle_border_width(double x1, double y1, double x2, double y2, + double border_width, + double & x, double & y, double & w, double & h, + double & border_top_bottom_width, + double & border_left_right_width); + +std::ostream & operator << (std::ostream & out, const GfxRGB & rgb); + } // namespace util #endif //UTIL_H__ diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index c712df0..c65eeb0 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -82,7 +82,7 @@ void parse_options (int argc, char **argv) .add("tounicode", ¶m.tounicode, 0, "Specify how to deal with ToUnicode map, 0 for auto, 1 for forced, -1 for disabled") .add("space-as-offset", ¶m.space_as_offset, 0, "treat space characters as offsets") .add("stretch_narrow_glyph", ¶m.stretch_narrow_glyph, 0, "stretch narrow glyphs instead of padding space") - .add("squeeze_wide_glyph", ¶m.squeeze_wide_glyph, 0, "squeeze wide glyphs instead of truncating") + .add("squeeze-wide-glyph", ¶m.squeeze_wide_glyph, 0, "squeeze wide glyphs instead of truncating") .add("css-filename", ¶m.css_filename, "", "Specify the file name of the generated css file") .add("font-suffix", ¶m.font_suffix, ".ttf", "suffix for extracted font files") @@ -91,6 +91,7 @@ void parse_options (int argc, char **argv) .add("debug", ¶m.debug, 0, "output debug information") .add("clean-tmp", ¶m.clean_tmp, 1, "clean temporary files after processing") + .add("css-draw", ¶m.css_draw, 0, "[Experimental and Unsupported] CSS Drawing") .add("", ¶m.input_filename, "", "") .add("", ¶m.output_filename, "", "") ; @@ -182,8 +183,8 @@ int main(int argc, char **argv) throw "Copying of text from this document is not allowed."; } - param.first_page = min(max(param.first_page, 1), doc->getNumPages()); - param.last_page = min(max(param.last_page, param.first_page), doc->getNumPages()); + param.first_page = min(max(param.first_page, 1), doc->getNumPages()); + param.last_page = min(max(param.last_page, param.first_page), doc->getNumPages()); if(param.output_filename == "") { diff --git a/src/util.cc b/src/util.cc index 2c854f3..a69654e 100644 --- a/src/util.cc +++ b/src/util.cc @@ -54,6 +54,31 @@ const std::map, std::pair {{".js", 1}, {""}} }); +void _tm_transform(const double * tm, double & x, double & y, bool is_delta) +{ + double xx = x, yy = y; + x = tm[0] * xx + tm[2] * yy; + y = tm[1] * xx + tm[3] * yy; + if(!is_delta) + { + x += tm[4]; + y += tm[5]; + } +} + +void _tm_multiply(double * tm_left, const double * tm_right) +{ + double old[4]; + memcpy(old, tm_left, sizeof(old)); + + tm_left[0] = old[0] * tm_right[0] + old[2] * tm_right[1]; + tm_left[1] = old[1] * tm_right[0] + old[3] * tm_right[1]; + tm_left[2] = old[0] * tm_right[2] + old[2] * tm_right[3]; + tm_left[3] = old[1] * tm_right[2] + old[3] * tm_right[3]; + tm_left[4] += old[0] * tm_right[4] + old[2] * tm_right[5]; + tm_left[5] += old[1] * tm_right[4] + old[3] * tm_right[5]; +} + bool isLegalUnicode(Unicode u) { /* @@ -249,4 +274,49 @@ string get_suffix(const string & path) } } +void css_fix_rectangle_border_width(double x1, double y1, + double x2, double y2, + double border_width, + double & x, double & y, double & w, double & h, + double & border_top_bottom_width, + double & border_left_right_width) +{ + w = x2 - x1; + if(w > border_width) + { + w -= border_width; + border_left_right_width = border_width; + } + else + { + border_left_right_width = border_width + w/2; + w = 0; + } + x = x1 - border_width / 2; + + h = y2 - y1; + if(h > border_width) + { + h -= border_width; + border_top_bottom_width = border_width; + } + else + { + border_top_bottom_width = border_width + h/2; + h = 0; + } + y = y1 - border_width / 2; +} + +ostream & operator << (ostream & out, const GfxRGB & rgb) +{ + auto flags= out.flags(); + out << std::dec << "rgb(" + << (int)colToByte(rgb.r) << "," + << (int)colToByte(rgb.g) << "," + << (int)colToByte(rgb.b) << ")"; + out.flags(flags); + return out; +} + } // namespace pdf2htmlEX diff --git a/test/test.py b/test/test.py index 6ccdf11..bf81859 100755 --- a/test/test.py +++ b/test/test.py @@ -1,6 +1,7 @@ #!/usr/bin/env python DIR = 'pdf' +DIR = '../../pdf.js/test/pdfs' import os @@ -11,7 +12,8 @@ with open('out.html','w') as outf: if not f.lower().endswith('.pdf'): continue print f - os.system('pdf2htmlEX --dest-dir html --auto-hint=1 --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) + #os.system('pdf2htmlEX --dest-dir html --auto-hint=1 --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) + os.system('pdf2htmlEX --dest-dir html --process-nontext 0 --css-draw 1 "%s/%s"' % (DIR,f)) ff = f[:-3] outf.write('%s
' % (ff,ff)) outf.flush();