1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 13:00:08 +00:00

Compare commits

..

10 Commits

Author SHA1 Message Date
Trent Petersen
6f85c88b1d
Merge pull request #156 from ViliusSutkus89/background-image
Fix out of bounds read in SplashBackgroundRenderer
2024-02-01 09:12:42 -06:00
Trent Petersen
1fabed18de
Merge pull request #154 from ViliusSutkus89/master
Update Poppler to 23.12.0 and Fontforge to 20230101 (based on #145)
2024-02-01 09:12:28 -06:00
Vilius Sutkus '89
5727b6dcf4 Upgrade Poppler to 24.01.0 2024-01-13 20:07:15 +02:00
Vilius Sutkus '89
904e068384 Update comment about Poppler-data version 2024-01-13 20:03:20 +02:00
Vilius Sutkus '89
3f0f523ed9 Remove dump_image from SplashBackgroundRenderer, let Poppler handle it 2023-12-27 06:18:49 +02:00
Vilius Sutkus '89
0a2478f6dc Add poppler-23.12.0 support 2023-12-18 13:30:57 +02:00
Pablo González L
b0e3529ac8 Update poppler data to 0.4.12 2023-05-14 15:39:57 -04:00
Pablo González L
f82a725eb7 Add test in github 2023-05-14 15:08:46 -04:00
Pablo González L
1d3e577440 Update poppler to 21.02.0 and fontforge to 0230101 2023-05-14 15:05:51 -04:00
Pablo González L
ed6528859b FindProgDir is FindProgRoot in fontforge 20230101 2023-05-14 15:02:48 -04:00
16 changed files with 114 additions and 183 deletions

19
.github/workflows/build.yml vendored Normal file
View File

@ -0,0 +1,19 @@
name: Build the project
on: [push]
jobs:
build:
runs-on: ubuntu-20.04
steps:
- name: Checkout Code
uses: actions/checkout@v3
- name: Build project with APT
run: './buildScripts/buildInstallLocallyApt'
- name: Upload artifact
uses: actions/upload-artifact@v2
with:
name: my-artifact
path: pdf2htmlEX/build

View File

@ -19,7 +19,10 @@ cmake \
-DENABLE_UNSTABLE_API_ABI_HEADERS=OFF \
-DBUILD_GTK_TESTS=OFF \
-DBUILD_QT5_TESTS=OFF \
-DBUILD_QT6_TESTS=OFF \
-DBUILD_CPP_TESTS=OFF \
-DBUILD_MANUAL_TESTS=OFF \
-DENABLE_BOOST=OFF \
-DENABLE_SPLASH=ON \
-DENABLE_UTILS=OFF \
-DENABLE_CPP=OFF \
@ -27,10 +30,17 @@ cmake \
-DENABLE_GOBJECT_INTROSPECTION=OFF \
-DENABLE_GTK_DOC=OFF \
-DENABLE_QT5=OFF \
-DENABLE_QT6=OFF \
-DENABLE_LIBOPENJPEG="none" \
-DENABLE_CMS="none" \
-DENABLE_DCTDECODER="libjpeg" \
-DENABLE_CMS="none" \
-DENABLE_LCMS=OFF \
-DENABLE_LIBCURL=OFF \
-DENABLE_LIBTIFF=OFF \
-DWITH_TIFF=OFF \
-DWITH_NSS3=OFF \
-DENABLE_NSS3=OFF \
-DENABLE_GPGME=OFF \
-DENABLE_ZLIB=ON \
-DENABLE_ZLIB_UNCOMPRESS=OFF \
-DUSE_FLOAT=OFF \
@ -39,8 +49,6 @@ cmake \
-DEXTRA_WARN=OFF \
-DWITH_JPEG=ON \
-DWITH_PNG=ON \
-DWITH_TIFF=OFF \
-DWITH_NSS3=OFF \
-DWITH_Cairo=ON \
..

View File

@ -17,7 +17,7 @@ echo "Getting poppler version: $POPPLER_VERSION"
rm -rf $POPPLER_VERSION.tar.xz
rm -rf poppler
rm -rf poppler-data-0.4.9.tar.gz
rm -rf poppler-data-0.4.12.tar.gz
rm -rf poppler-data
set -ev
@ -26,12 +26,12 @@ wget https://poppler.freedesktop.org/$POPPLER_VERSION.tar.xz
tar xvf $POPPLER_VERSION.tar.xz
echo "Getting poppler-data version: 0.4.9"
echo "Getting poppler-data version: 0.4.12"
mv $POPPLER_VERSION poppler
wget https://poppler.freedesktop.org/poppler-data-0.4.9.tar.gz
wget https://poppler.freedesktop.org/poppler-data-0.4.12.tar.gz
tar xvf poppler-data-0.4.9.tar.gz
tar xvf poppler-data-0.4.12.tar.gz
mv poppler-data-0.4.9 poppler-data
mv poppler-data-0.4.12 poppler-data

View File

@ -4,11 +4,14 @@
# versions
# see: https://poppler.freedesktop.org/releases.html
# current working: 0.89.0
# current working: 24.01.0
export PDF2HTMLEX_VERSION=0.18.8.rc2
export POPPLER_VERSION=poppler-0.89.0
export POPPLER_VERSION=poppler-24.01.0
#export POPPLER_VERSION=poppler-23.12.0
#export POPPLER_VERSION=poppler-21.02.0
#export POPPLER_VERSION=poppler-0.89.0
#export POPPLER_VERSION=poppler-0.88.0
#export POPPLER_VERSION=poppler-0.87.0
#export POPPLER_VERSION=poppler-0.86.1
@ -20,9 +23,10 @@ export POPPLER_VERSION=poppler-0.89.0
#export POPPLER_VERSION=poppler-0.81.0
# see: https://github.com/fontforge/fontforge/releases
# current working: 20190801
# current working: 20230101
export FONTFORGE_VERSION=20220308
export FONTFORGE_VERSION=20230101
#export FONTFORGE_VERSION=20220308
#export FONTFORGE_VERSION=20190801
#export FONTFORGE_VERSION=20190413
#export FONTFORGE_VERSION=20190413

View File

@ -104,7 +104,6 @@ set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS}
-lglib-2.0
-lgio-2.0
-lgobject-2.0
-pthread
-lz
-lm
)
@ -122,37 +121,17 @@ set(CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Woverloaded-virtual")
# clang compiler need c++11 flag
#if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
#endif()
# CYGWIN or GCC 4.5.x bug
if(CYGWIN)
# was: set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++0x")
# the following change is untested:
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++14")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -pthread")
set(CMAKE_THREAD_PREFER_PTHREAD ON)
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
set(PDF2HTMLEX_LIBS ${PDF2HTMLEX_LIBS} Threads::Threads)
# Poppler-23.12.0 requires CXX17
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
if(NOT CYGWIN)
set(CMAKE_CXX_EXTENSIONS OFF)
endif()
# check the C++11 features we need
include(CheckCXXSourceCompiles)
check_cxx_source_compiles("
#include <vector>
int main()
{
char * ptr = nullptr;
std::vector<int> v;
auto f = [&](){ for(auto & i : v) ++i; };
f();
}
" CXX0X_SUPPORT)
if(NOT CXX0X_SUPPORT)
message(FATAL_ERROR "Error: your compiler does not support C++0x/C++11, please update it.")
endif()
configure_file (${CMAKE_SOURCE_DIR}/src/pdf2htmlEX-config.h.in ${CMAKE_SOURCE_DIR}/src/pdf2htmlEX-config.h)
configure_file (${CMAKE_SOURCE_DIR}/pdf2htmlEX.1.in ${CMAKE_SOURCE_DIR}/pdf2htmlEX.1)

View File

@ -5,26 +5,17 @@
*/
#include <fstream>
#include <vector>
#include <memory>
#include <poppler-config.h>
#include <PDFDoc.h>
#include <goo/ImgWriter.h>
#include <goo/PNGWriter.h>
#include <goo/JpegWriter.h>
#include <splash/SplashErrorCodes.h>
#include "Base64Stream.h"
#include "util/const.h"
#include "SplashBackgroundRenderer.h"
namespace pdf2htmlEX {
using std::string;
using std::ifstream;
using std::vector;
using std::unique_ptr;
const SplashColor SplashBackgroundRenderer::white = {255,255,255};
@ -35,6 +26,7 @@ SplashBackgroundRenderer::SplashBackgroundRenderer(const string & imgFormat, HTM
, format(imgFormat)
{
bool supported = false;
// ENABLE_LIBPNG and ENABLE_LIBJPEG are defines coming in from poppler-config.h
#ifdef ENABLE_LIBPNG
if (format.empty())
format = "png";
@ -47,7 +39,7 @@ SplashBackgroundRenderer::SplashBackgroundRenderer(const string & imgFormat, HTM
#endif
if (!supported)
{
throw string("Image format not supported: ") + format;
throw string("Image format not supported by Poppler: ") + format;
}
}
@ -124,27 +116,26 @@ bool SplashBackgroundRenderer::render_page(PDFDoc * doc, int pageno)
void SplashBackgroundRenderer::embed_image(int pageno)
{
// xmin->xmax is top->bottom
int xmin, xmax, ymin, ymax;
// poppler-0.84.0 hack to recover from the removal of *ModRegion tracking
//
auto * bitmap = getBitmap();
xmin = 0;
xmax = bitmap->getWidth();
ymin = 0;
ymax = bitmap->getHeight();
//
// end of hack
// dump the background image only when it is not empty
if((xmin <= xmax) && (ymin <= ymax))
if(bitmap->getWidth() >= 0 && bitmap->getHeight() >= 0)
{
{
auto fn = html_renderer->str_fmt("%s/bg%x.%s", (param.embed_image ? param.tmp_dir : param.dest_dir).c_str(), pageno, format.c_str());
if(param.embed_image)
html_renderer->tmp_files.add((char*)fn);
html_renderer->tmp_files.add((const char *)fn);
dump_image((char*)fn, xmin, ymin, xmax, ymax);
SplashImageFileFormat splashImageFileFormat;
if(format == "png")
splashImageFileFormat = splashFormatPng;
else if(format == "jpg")
splashImageFileFormat = splashFormatJpeg;
else
throw string("Image format not supported: ") + format;
SplashError e = bitmap->writeImgFile(splashImageFileFormat, (const char *)fn, param.actual_dpi, param.actual_dpi);
if (e != splashOk)
throw string("Cannot write background image. SplashErrorCode: ") + std::to_string(e);
}
double h_scale = html_renderer->text_zoom_factor() * DEFAULT_DPI / param.actual_dpi;
@ -154,10 +145,10 @@ void SplashBackgroundRenderer::embed_image(int pageno)
auto & all_manager = html_renderer->all_manager;
f_page << "<img class=\"" << CSS::BACKGROUND_IMAGE_CN
<< " " << CSS::LEFT_CN << all_manager.left.install(((double)xmin) * h_scale)
<< " " << CSS::BOTTOM_CN << all_manager.bottom.install(((double)getBitmapHeight() - 1 - ymax) * v_scale)
<< " " << CSS::WIDTH_CN << all_manager.width.install(((double)(xmax - xmin + 1)) * h_scale)
<< " " << CSS::HEIGHT_CN << all_manager.height.install(((double)(ymax - ymin + 1)) * v_scale)
<< " " << CSS::LEFT_CN << all_manager.left.install(0.0L)
<< " " << CSS::BOTTOM_CN << all_manager.bottom.install(0.0L)
<< " " << CSS::WIDTH_CN << all_manager.width.install(h_scale * bitmap->getWidth())
<< " " << CSS::HEIGHT_CN << all_manager.height.install(v_scale * bitmap->getHeight())
<< "\" alt=\"\" src=\"";
if(param.embed_image)
@ -182,68 +173,4 @@ void SplashBackgroundRenderer::embed_image(int pageno)
}
}
// There might be mem leak when exception is thrown !
void SplashBackgroundRenderer::dump_image(const char * filename, int x1, int y1, int x2, int y2)
{
int width = x2 - x1 + 1;
int height = y2 - y1 + 1;
if((width <= 0) || (height <= 0))
throw "Bad metric for background image";
FILE * f = fopen(filename, "wb");
if(!f)
throw string("Cannot open file for background image " ) + filename;
// use unique_ptr to auto delete the object upon exception
unique_ptr<ImgWriter> writer;
if(false) { }
#ifdef ENABLE_LIBPNG
else if(format == "png")
{
writer = unique_ptr<ImgWriter>(new PNGWriter);
}
#endif
#ifdef ENABLE_LIBJPEG
else if(format == "jpg")
{
writer = unique_ptr<ImgWriter>(new JpegWriter);
}
#endif
else
{
throw string("Image format not supported: ") + format;
}
if(!writer->init(f, width, height, param.actual_dpi, param.actual_dpi))
throw "Cannot initialize image writer";
auto * bitmap = getBitmap();
assert(bitmap->getMode() == splashModeRGB8);
SplashColorPtr data = bitmap->getDataPtr();
int row_size = bitmap->getRowSize();
vector<unsigned char*> pointers;
pointers.reserve(height);
SplashColorPtr p = data + y1 * row_size + x1 * 3;
for(int i = 0; i < height; ++i)
{
pointers.push_back(p);
p += row_size;
}
if(!writer->writePointers(pointers.data(), height))
{
throw "Cannot write background image";
}
if(!writer->close())
{
throw "Cannot finish background image";
}
fclose(f);
}
} // namespace pdf2htmlEX

View File

@ -53,7 +53,6 @@ public:
void updateRender(GfxState *state);
protected:
void dump_image(const char * filename, int x1, int y1, int x2, int y2);
HTMLRenderer * html_renderer;
const Param & param;
std::string format;

View File

@ -201,7 +201,7 @@ string HTMLRenderer::dump_type3_font (GfxFont * font, FontInfo & info)
FT_Library ft_lib;
FT_Init_FreeType(&ft_lib);
CairoFontEngine font_engine(ft_lib);
auto * cur_font = font_engine.getFont(font, cur_doc, true, xref);
std::shared_ptr<CairoFont> cur_font = font_engine.getFont(std::shared_ptr<GfxFont>(font), cur_doc, true, xref);
auto used_map = preprocessor.get_code_map(hash_ref(font->getID()));
//calculate transformed metrics
@ -486,11 +486,10 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
else
{
ffw_reencode_glyph_order();
if(FoFiTrueType * fftt = FoFiTrueType::load((char*)filepath.c_str()))
if(std::unique_ptr<FoFiTrueType> fftt = FoFiTrueType::load((char*)filepath.c_str()))
{
code2GID = font_8bit->getCodeToGIDMap(fftt);
code2GID = font_8bit->getCodeToGIDMap(fftt.get());
code2GID_len = 256;
delete fftt;
}
}
}
@ -553,10 +552,9 @@ void HTMLRenderer::embed_font(const string & filepath, GfxFont * font, FontInfo
else
{
// use the mapping stored in the file
if(FoFiTrueType * fftt = FoFiTrueType::load((char*)filepath.c_str()))
if(std::unique_ptr<FoFiTrueType> fftt = FoFiTrueType::load((char*)filepath.c_str()))
{
code2GID = _font->getCodeToGIDMap(fftt, &code2GID_len);
delete fftt;
code2GID = _font->getCodeToGIDMap(fftt.get(), &code2GID_len);
}
}
}
@ -878,7 +876,7 @@ const FontInfo * HTMLRenderer::install_font(GfxFont * font)
{
cerr << "Install font " << hex << new_fn_id << dec
<< ": (" << (font->getID()->num) << ' ' << (font->getID()->gen) << ") "
<< (font->getName() ? font->getName()->toStr() : "")
<< font->getName().value_or("")
<< endl;
}
@ -910,7 +908,8 @@ const FontInfo * HTMLRenderer::install_font(GfxFont * font)
* which does not make much sense in our case
* If we specify false here, font_loc->locType cannot be gfxFontLocResident
*/
if(auto * font_loc = font->locateFont(xref, nullptr))
std::optional<GfxFontLoc> font_loc = font->locateFont(xref, nullptr);
if(font_loc.has_value())
{
switch(font_loc -> locType)
{
@ -928,7 +927,6 @@ const FontInfo * HTMLRenderer::install_font(GfxFont * font)
export_remote_default_font(new_fn_id);
break;
}
delete font_loc;
}
else
{
@ -955,7 +953,7 @@ void HTMLRenderer::install_embedded_font(GfxFont * font, FontInfo & info)
void HTMLRenderer::install_external_font(GfxFont * font, FontInfo & info)
{
string fontname(font->getName()->toStr());
string fontname(font->getName().value_or(""));
// resolve bad encodings in GB
auto iter = GB_ENCODED_FONT_NAME_MAP.find(fontname);
@ -965,15 +963,14 @@ void HTMLRenderer::install_external_font(GfxFont * font, FontInfo & info)
cerr << "Warning: workaround for font names in bad encodings." << endl;
}
GfxFontLoc * localfontloc = font->locateFont(xref, nullptr);
std::optional<GfxFontLoc> localfontloc = font->locateFont(xref, nullptr);
if(param.embed_external_font)
{
if(localfontloc != nullptr)
if(localfontloc.has_value())
{
embed_font(string(localfontloc->path->toStr()), font, info);
embed_font(string(localfontloc.value().path), font, info);
export_remote_font(info, param.font_format, font);
delete localfontloc;
return;
}
else
@ -984,11 +981,10 @@ void HTMLRenderer::install_external_font(GfxFont * font, FontInfo & info)
}
// still try to get an idea of read ascent/descent
if(localfontloc != nullptr)
if(localfontloc.has_value())
{
// fill in ascent/descent only, do not embed
embed_font(string(localfontloc->path->toStr()), font, info, true);
delete localfontloc;
embed_font(string(localfontloc.value().path), font, info, true);
}
else
{

View File

@ -22,7 +22,7 @@ using std::cerr;
void HTMLRenderer::process_form(ofstream & out)
{
FormPageWidgets * widgets = cur_catalog->getPage(pageNum)->getFormWidgets();
std::shared_ptr<FormPageWidgets> widgets = cur_catalog->getPage(pageNum)->getFormWidgets();
int num = widgets->getNumWidgets();
for(int i = 0; i < num; i++)

View File

@ -148,7 +148,7 @@ string HTMLRenderer::get_linkaction_str(
dynamic_cast<const LinkGoTo*>(action);
std::unique_ptr<LinkDest> dest = nullptr;
if(auto _ = real_action->getDest())
dest = std::unique_ptr<LinkDest>( _->copy() );
dest = std::make_unique<LinkDest>(*_);
else if (auto _ = real_action->getNamedDest())
dest = cur_catalog->findDest(_);
if(dest)

View File

@ -52,7 +52,6 @@ void HTMLRenderer::process_outline_items(const std::vector<OutlineItem*> * items
{
process_outline_items(item->getKids());
}
item->close();
f_outline.fs << "</li>";
}

View File

@ -207,7 +207,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
// font name & size
if(all_changed || font_changed)
{
const FontInfo * new_font_info = install_font(state->getFont());
const FontInfo * new_font_info = install_font(state->getFont().get());
if(!(new_font_info->id == cur_text_state.font_info->id))
{

View File

@ -95,9 +95,9 @@ void HTMLRenderer::drawString(GfxState * state, const GooString * s)
char buf[2];
buf[0] = (code >> 8) & 0xff;
buf[1] = (code & 0xff);
width = ((GfxCIDFont *)font)->getWidth(buf, 2);
width = ((GfxCIDFont *)font.get())->getWidth(buf, 2);
} else {
width = ((Gfx8BitFont *)font)->getWidth(code);
width = ((Gfx8BitFont *)font.get())->getWidth(code);
}
if (width == 0 || height == 0) {
@ -151,11 +151,11 @@ void HTMLRenderer::drawString(GfxState * state, const GooString * s)
Unicode uu;
if(cur_text_state.font_info->use_tounicode)
{
uu = check_unicode(u, uLen, code, font);
uu = check_unicode(u, uLen, code, font.get());
}
else
{
uu = unicode_from_font(code, font);
uu = unicode_from_font(code, font.get());
}
html_text_page.get_cur_line()->append_unicodes(&uu, 1, ddx);
/*

View File

@ -67,8 +67,7 @@ void Preprocessor::drawChar(GfxState *state, double x, double y,
double originX, double originY,
CharCode code, int nBytes, const Unicode *u, int uLen)
{
GfxFont * font = state->getFont();
if(!font) return;
std::shared_ptr<GfxFont> font = state->getFont();
long long fn_id = hash_ref(font->getID());

View File

@ -412,21 +412,23 @@ int main(int argc, char **argv)
!param.poppler_data_dir.empty() ? param.poppler_data_dir.c_str() : NULL
);
// open PDF file
PDFDoc * doc = nullptr;
try
{
{
GooString * ownerPW = (param.owner_password == "") ? (nullptr) : (new GooString(param.owner_password.c_str()));
GooString * userPW = (param.user_password == "") ? (nullptr) : (new GooString(param.user_password.c_str()));
GooString fileName(param.input_filename.c_str());
doc = PDFDocFactory().createPDFDoc(fileName, ownerPW, userPW);
delete userPW;
delete ownerPW;
std::optional<GooString> ownerPW;
if (!param.owner_password.empty()) {
ownerPW = GooString(param.owner_password);
}
std::optional<GooString> userPW;
if (!param.user_password.empty()) {
userPW = GooString(param.user_password);
}
GooString fileName(param.input_filename);
// open PDF file
std::unique_ptr<PDFDoc> doc(PDFDocFactory().createPDFDoc(fileName, ownerPW, userPW));
if (!doc->isOk())
throw "Cannot read the file";
@ -445,7 +447,7 @@ int main(int argc, char **argv)
doc->getNumPages());
unique_ptr<HTMLRenderer>(new HTMLRenderer(argv[0], param))->process(doc);
unique_ptr<HTMLRenderer>(new HTMLRenderer(argv[0], param))->process(doc.get());
finished = true;
}
@ -459,7 +461,6 @@ int main(int argc, char **argv)
}
// clean up
delete doc;
globalParams.reset();
// check for memory leaks

View File

@ -19,7 +19,7 @@
#include "SignalHandler.h"
#include "ffw.h" // needed for:
#include "gfile.h" // FindProgDir
#include "gfile.h" // FindProgDir => FindProgRoot in 20230101
#include "fontforge/autowidth.h" // FVRemoveKerns
#include "fontforge/bitmapchar.h" // SFReplaceEncodingBDFProps
#include "fontforge/cvimages.h" // FVImportImages
@ -71,7 +71,7 @@ void ffw_init(const char* progPath, int debug)
{
ffwSetAction("initialize");
char *localProgPath = strdup(progPath);
FindProgDir(localProgPath);
FindProgRoot(localProgPath);
InitSimpleStuff();
if ( default_encoding==NULL )
default_encoding=FindOrMakeEncoding("ISO8859-1");