1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 04:50:09 +00:00

Merge pull request #407 from duanyao/fix_bitmap_dump

Fix bitmap dump
This commit is contained in:
Lu Wang 2014-08-05 14:20:18 -07:00
commit 4df4ea5e4c
3 changed files with 39 additions and 1 deletions

View File

@ -261,7 +261,11 @@ This option is only useful when '\-\-bg\-format svg' is specified. Note that nod
.TP
.B \-\-svg\-embed\-bitmap <0|1> (Default: 1)
Whether embed bitmaps in svg background image. 1: embed bitmaps in svg background; 0: dump bitmaps to external files if possible.
JPEG images in a PDF are most possibly dumped. This option is only useful when '\-\-bg\-format svg' is specified.
This option is only useful when '\-\-bg\-format svg' is specified and '\-\-embed\-image' is off.
Currently, RGB or Gray JPEG bitmaps in a PDF can be dumped, while those in other formats or colorspaces are still embedded.
If bitmaps are not dumped as expected, try pre-processing your PDF by ghostscript or acrobat and make sure bitmaps in it are converted to RGB/Gray JPEG format. See the project wiki for more details.
.SS PDF Protection

View File

@ -246,6 +246,34 @@ void CairoBackgroundRenderer::setMimeData(Stream *str, Object *ref, cairo_surfac
if (ref == nullptr || !ref->isRef())
return;
// We only dump rgb or gray jpeg without /Decode array.
//
// Although jpeg support CMYK, PDF readers do color conversion incompatibly with most other
// programs (including browsers): other programs invert CMYK color if 'Adobe' marker (app14) presents
// in a jpeg file; while PDF readers don't, they solely rely on /Decode array to invert color.
// It's a bit complicated to decide whether a CMYK jpeg is safe to dump, so we don't dump at all.
// See also:
// JPEG file embedded in PDF (CMYK) https://forums.adobe.com/thread/975777
// http://stackoverflow.com/questions/3123574/how-to-convert-from-cmyk-to-rgb-in-java-correctly
//
// In PDF, jpeg stream objects can also specify other color spaces like DeviceN and Separation,
// It is also not safe to dump them directly.
Object obj;
str->getDict()->lookup("ColorSpace", &obj);
if (!obj.isName() || (strcmp(obj.getName(), "DeviceRGB") && strcmp(obj.getName(), "DeviceGray")) )
{
obj.free();
return;
}
obj.free();
str->getDict()->lookup("Decode", &obj);
if (obj.isArray())
{
obj.free();
return;
}
obj.free();
int imgId = ref->getRef().num;
auto uri = strdup((char*) html_renderer->str_fmt("o%d.jpg", imgId));
auto st = cairo_surface_set_mime_data(image, CAIRO_MIME_TYPE_URI,

View File

@ -344,6 +344,12 @@ void check_param()
{
cerr << "Warning: No hint tool is specified for truetype fonts, the result may be rendered poorly in some circumstances." << endl;
}
if (param.embed_image && (param.bg_format == "svg") && !param.svg_embed_bitmap)
{
cerr << "Warning: --svg-embed-bitmap is forced on because --embed-image is on, or the dumped bitmaps can't be loaded." << endl;
param.svg_embed_bitmap = 1;
}
}
int main(int argc, char **argv)