From e230bc06a9eaeef30a9b850289f3e878896c9926 Mon Sep 17 00:00:00 2001 From: Duan Yao Date: Sun, 3 Aug 2014 16:42:40 +0800 Subject: [PATCH 1/3] Force --svg-embed-bitmap on when user set --embed-image on. --- src/pdf2htmlEX.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc index 3d4972a..8009538 100644 --- a/src/pdf2htmlEX.cc +++ b/src/pdf2htmlEX.cc @@ -344,6 +344,12 @@ void check_param() { cerr << "Warning: No hint tool is specified for truetype fonts, the result may be rendered poorly in some circumstances." << endl; } + + if (param.embed_image && (param.bg_format == "svg") && !param.svg_embed_bitmap) + { + cerr << "Warning: --svg-embed-bitmap is forced on because --embed-image is on, or the dumped bitmaps can't be loaded." << endl; + param.svg_embed_bitmap = 1; + } } int main(int argc, char **argv) From 144ec439f0091bb820d295447afe0444e065c197 Mon Sep 17 00:00:00 2001 From: Duan Yao Date: Mon, 4 Aug 2014 02:38:28 +0800 Subject: [PATCH 2/3] Don't dump jpeg not in RGB or Gray colorspaces (Fix 400). --- .../CairoBackgroundRenderer.cc | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/BackgroundRenderer/CairoBackgroundRenderer.cc b/src/BackgroundRenderer/CairoBackgroundRenderer.cc index d7c48b9..86fef1c 100644 --- a/src/BackgroundRenderer/CairoBackgroundRenderer.cc +++ b/src/BackgroundRenderer/CairoBackgroundRenderer.cc @@ -246,6 +246,34 @@ void CairoBackgroundRenderer::setMimeData(Stream *str, Object *ref, cairo_surfac if (ref == nullptr || !ref->isRef()) return; + // We only dump rgb or gray jpeg without /Decode array. + // + // Although jpeg support CMYK, PDF readers do color conversion incompatibly with most other + // programs (including browsers): other programs invert CMYK color if 'Adobe' marker (app14) presents + // in a jpeg file; while PDF readers don't, they solely rely on /Decode array to invert color. + // It's a bit complicated to decide whether a CMYK jpeg is safe to dump, so we don't dump at all. + // See also: + // JPEG file embedded in PDF (CMYK) https://forums.adobe.com/thread/975777 + // http://stackoverflow.com/questions/3123574/how-to-convert-from-cmyk-to-rgb-in-java-correctly + // + // In PDF, jpeg stream objects can also specify other color spaces like DeviceN and Separation, + // It is also not safe to dump them directly. + Object obj; + str->getDict()->lookup("ColorSpace", &obj); + if (!obj.isName() || (strcmp(obj.getName(), "DeviceRGB") && strcmp(obj.getName(), "DeviceGray")) ) + { + obj.free(); + return; + } + obj.free(); + str->getDict()->lookup("Decode", &obj); + if (obj.isArray()) + { + obj.free(); + return; + } + obj.free(); + int imgId = ref->getRef().num; auto uri = strdup((char*) html_renderer->str_fmt("o%d.jpg", imgId)); auto st = cairo_surface_set_mime_data(image, CAIRO_MIME_TYPE_URI, From 166625fb7106bc3433a48d4a2dba20cb1fb604ad Mon Sep 17 00:00:00 2001 From: Duan Yao Date: Tue, 5 Aug 2014 18:59:19 +0800 Subject: [PATCH 3/3] Update man page for --svg-embed-bitmap. --- pdf2htmlEX.1.in | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pdf2htmlEX.1.in b/pdf2htmlEX.1.in index 1f44984..ffb7ecc 100644 --- a/pdf2htmlEX.1.in +++ b/pdf2htmlEX.1.in @@ -261,7 +261,11 @@ This option is only useful when '\-\-bg\-format svg' is specified. Note that nod .TP .B \-\-svg\-embed\-bitmap <0|1> (Default: 1) Whether embed bitmaps in svg background image. 1: embed bitmaps in svg background; 0: dump bitmaps to external files if possible. -JPEG images in a PDF are most possibly dumped. This option is only useful when '\-\-bg\-format svg' is specified. + +This option is only useful when '\-\-bg\-format svg' is specified and '\-\-embed\-image' is off. + +Currently, RGB or Gray JPEG bitmaps in a PDF can be dumped, while those in other formats or colorspaces are still embedded. +If bitmaps are not dumped as expected, try pre-processing your PDF by ghostscript or acrobat and make sure bitmaps in it are converted to RGB/Gray JPEG format. See the project wiki for more details. .SS PDF Protection