diff --git a/pdf2htmlEX.1.in b/pdf2htmlEX.1.in
index 1f44984..ffb7ecc 100644
--- a/pdf2htmlEX.1.in
+++ b/pdf2htmlEX.1.in
@@ -261,7 +261,11 @@ This option is only useful when '\-\-bg\-format svg' is specified. Note that nod
.TP
.B \-\-svg\-embed\-bitmap <0|1> (Default: 1)
Whether embed bitmaps in svg background image. 1: embed bitmaps in svg background; 0: dump bitmaps to external files if possible.
-JPEG images in a PDF are most possibly dumped. This option is only useful when '\-\-bg\-format svg' is specified.
+
+This option is only useful when '\-\-bg\-format svg' is specified and '\-\-embed\-image' is off.
+
+Currently, RGB or Gray JPEG bitmaps in a PDF can be dumped, while those in other formats or colorspaces are still embedded.
+If bitmaps are not dumped as expected, try pre-processing your PDF by ghostscript or acrobat and make sure bitmaps in it are converted to RGB/Gray JPEG format. See the project wiki for more details.
.SS PDF Protection
diff --git a/src/BackgroundRenderer/CairoBackgroundRenderer.cc b/src/BackgroundRenderer/CairoBackgroundRenderer.cc
index d7c48b9..86fef1c 100644
--- a/src/BackgroundRenderer/CairoBackgroundRenderer.cc
+++ b/src/BackgroundRenderer/CairoBackgroundRenderer.cc
@@ -246,6 +246,34 @@ void CairoBackgroundRenderer::setMimeData(Stream *str, Object *ref, cairo_surfac
if (ref == nullptr || !ref->isRef())
return;
+ // We only dump rgb or gray jpeg without /Decode array.
+ //
+ // Although jpeg support CMYK, PDF readers do color conversion incompatibly with most other
+ // programs (including browsers): other programs invert CMYK color if 'Adobe' marker (app14) presents
+ // in a jpeg file; while PDF readers don't, they solely rely on /Decode array to invert color.
+ // It's a bit complicated to decide whether a CMYK jpeg is safe to dump, so we don't dump at all.
+ // See also:
+ // JPEG file embedded in PDF (CMYK) https://forums.adobe.com/thread/975777
+ // http://stackoverflow.com/questions/3123574/how-to-convert-from-cmyk-to-rgb-in-java-correctly
+ //
+ // In PDF, jpeg stream objects can also specify other color spaces like DeviceN and Separation,
+ // It is also not safe to dump them directly.
+ Object obj;
+ str->getDict()->lookup("ColorSpace", &obj);
+ if (!obj.isName() || (strcmp(obj.getName(), "DeviceRGB") && strcmp(obj.getName(), "DeviceGray")) )
+ {
+ obj.free();
+ return;
+ }
+ obj.free();
+ str->getDict()->lookup("Decode", &obj);
+ if (obj.isArray())
+ {
+ obj.free();
+ return;
+ }
+ obj.free();
+
int imgId = ref->getRef().num;
auto uri = strdup((char*) html_renderer->str_fmt("o%d.jpg", imgId));
auto st = cairo_surface_set_mime_data(image, CAIRO_MIME_TYPE_URI,
diff --git a/src/pdf2htmlEX.cc b/src/pdf2htmlEX.cc
index 3d4972a..8009538 100644
--- a/src/pdf2htmlEX.cc
+++ b/src/pdf2htmlEX.cc
@@ -344,6 +344,12 @@ void check_param()
{
cerr << "Warning: No hint tool is specified for truetype fonts, the result may be rendered poorly in some circumstances." << endl;
}
+
+ if (param.embed_image && (param.bg_format == "svg") && !param.svg_embed_bitmap)
+ {
+ cerr << "Warning: --svg-embed-bitmap is forced on because --embed-image is on, or the dumped bitmaps can't be loaded." << endl;
+ param.svg_embed_bitmap = 1;
+ }
}
int main(int argc, char **argv)