pdf2htmlEX/pdf2htmlEX/src/Preprocessor.cc

110 lines
2.6 KiB
C++
Raw Normal View History

2012-09-07 07:03:03 +00:00
/*
* Preprocessor.cc
2012-09-07 07:03:03 +00:00
*
* Check used codes for each font
*
* by WangLu
* 2012.09.07
*/
#include <cstring>
2012-09-22 06:41:29 +00:00
#include <iostream>
2012-09-26 16:17:56 +00:00
#include <algorithm>
2012-09-07 07:03:03 +00:00
#include <GfxState.h>
#include <GfxFont.h>
#include "Preprocessor.h"
2012-11-29 10:28:07 +00:00
#include "util/misc.h"
#include "util/const.h"
2012-09-07 07:03:03 +00:00
namespace pdf2htmlEX {
2012-09-22 06:41:29 +00:00
using std::cerr;
using std::endl;
using std::flush;
2012-09-26 16:17:56 +00:00
using std::max;
2012-09-22 06:41:29 +00:00
2013-04-06 09:01:05 +00:00
Preprocessor::Preprocessor(const Param & param)
: OutputDev()
2012-09-22 06:41:29 +00:00
, param(param)
2012-09-26 16:17:56 +00:00
, max_width(0)
, max_height(0)
, cur_font_id(0)
2012-09-07 07:03:03 +00:00
, cur_code_map(nullptr)
{ }
Preprocessor::~Preprocessor(void)
2012-09-07 07:03:03 +00:00
{
2014-11-16 14:04:02 +00:00
for(auto & p : code_maps)
delete [] p.second;
2012-09-07 07:03:03 +00:00
}
2012-09-22 06:41:29 +00:00
void Preprocessor::process(PDFDoc * doc)
{
2013-04-06 09:01:05 +00:00
int page_count = (param.last_page - param.first_page + 1);
for(int i = param.first_page; i <= param.last_page ; ++i)
2012-09-22 06:41:29 +00:00
{
New master (#2) * Show header in font map files * fix a usage of unique_ptr with array * Added '--quiet' argument to hide progress messages (resolves #503) * Revert cout messages to cerr (see #622) * bump version * fix build; fix some coverity warnings * Many bug fixes and improvements, including: - Incorporated latest Cairo files from cairo-0.15.2 - Moved build to out-of-source - Added clean script - Rewritten correct_text_visibility option to improve accuracy - Transparent characters drawn on background layer - Improved bad unicode detection * Many bug fixes and improvements, including: - Incorporated latest Cairo files from cairo-0.15.2 - Moved build to out-of-source - Added clean script - Rewritten correct_text_visibility option to improve accuracy - Transparent characters drawn on background layer - Improved bad unicode detection * Rationlise DPI to single number. Implement actual_dpi - clamp maximum background image size in cases of huge PDF pages * DPI fixes - increase DPI when partially covered text to covered-text-dpi Add font-style italic for oblique fonts Reduce char bbox for occlusion tests * Don't shrink bbox - not required if zoom=25 used * Ignore occlusion from stroke/fill with opacity < 0.5 Better compute char bbox for occlusion Use 10% inset for char bbox for occlusion Back out adding font-weight: bold to potentially bold fonts Fix bug to ensure CID ascent/descent matches subfont values * Removed zero char logging * Remove forced italic - missing italic is due to fontforge bug which needs fixing * Typos fixed, readme updated * Typos * Increase maximum background image width Fix private use range to avoid stupid mobile safari switching to emoji font * included -pthread switch to link included 3rdparty poppler files. * Updated files from poppler 0.59.0 and adjusted includes. * Support updated "Object" class from poppler 0.59.0
2018-01-10 19:31:38 +00:00
if(param.quiet == 0)
cerr << "Preprocessing: " << (i - param.first_page) << "/" << page_count << '\r' << flush;
2012-10-24 14:30:52 +00:00
2012-09-26 16:17:56 +00:00
doc->displayPage(this, i, DEFAULT_DPI, DEFAULT_DPI,
0,
2013-04-06 09:01:05 +00:00
(!(param.use_cropbox)),
true, // crop
false, // printing
2012-09-22 06:41:29 +00:00
nullptr, nullptr, nullptr, nullptr);
}
New master (#2) * Show header in font map files * fix a usage of unique_ptr with array * Added '--quiet' argument to hide progress messages (resolves #503) * Revert cout messages to cerr (see #622) * bump version * fix build; fix some coverity warnings * Many bug fixes and improvements, including: - Incorporated latest Cairo files from cairo-0.15.2 - Moved build to out-of-source - Added clean script - Rewritten correct_text_visibility option to improve accuracy - Transparent characters drawn on background layer - Improved bad unicode detection * Many bug fixes and improvements, including: - Incorporated latest Cairo files from cairo-0.15.2 - Moved build to out-of-source - Added clean script - Rewritten correct_text_visibility option to improve accuracy - Transparent characters drawn on background layer - Improved bad unicode detection * Rationlise DPI to single number. Implement actual_dpi - clamp maximum background image size in cases of huge PDF pages * DPI fixes - increase DPI when partially covered text to covered-text-dpi Add font-style italic for oblique fonts Reduce char bbox for occlusion tests * Don't shrink bbox - not required if zoom=25 used * Ignore occlusion from stroke/fill with opacity < 0.5 Better compute char bbox for occlusion Use 10% inset for char bbox for occlusion Back out adding font-weight: bold to potentially bold fonts Fix bug to ensure CID ascent/descent matches subfont values * Removed zero char logging * Remove forced italic - missing italic is due to fontforge bug which needs fixing * Typos fixed, readme updated * Typos * Increase maximum background image width Fix private use range to avoid stupid mobile safari switching to emoji font * included -pthread switch to link included 3rdparty poppler files. * Updated files from poppler 0.59.0 and adjusted includes. * Support updated "Object" class from poppler 0.59.0
2018-01-10 19:31:38 +00:00
if(page_count >= 0 && param.quiet == 0)
2012-10-24 14:30:52 +00:00
cerr << "Preprocessing: " << page_count << "/" << page_count;
New master (#2) * Show header in font map files * fix a usage of unique_ptr with array * Added '--quiet' argument to hide progress messages (resolves #503) * Revert cout messages to cerr (see #622) * bump version * fix build; fix some coverity warnings * Many bug fixes and improvements, including: - Incorporated latest Cairo files from cairo-0.15.2 - Moved build to out-of-source - Added clean script - Rewritten correct_text_visibility option to improve accuracy - Transparent characters drawn on background layer - Improved bad unicode detection * Many bug fixes and improvements, including: - Incorporated latest Cairo files from cairo-0.15.2 - Moved build to out-of-source - Added clean script - Rewritten correct_text_visibility option to improve accuracy - Transparent characters drawn on background layer - Improved bad unicode detection * Rationlise DPI to single number. Implement actual_dpi - clamp maximum background image size in cases of huge PDF pages * DPI fixes - increase DPI when partially covered text to covered-text-dpi Add font-style italic for oblique fonts Reduce char bbox for occlusion tests * Don't shrink bbox - not required if zoom=25 used * Ignore occlusion from stroke/fill with opacity < 0.5 Better compute char bbox for occlusion Use 10% inset for char bbox for occlusion Back out adding font-weight: bold to potentially bold fonts Fix bug to ensure CID ascent/descent matches subfont values * Removed zero char logging * Remove forced italic - missing italic is due to fontforge bug which needs fixing * Typos fixed, readme updated * Typos * Increase maximum background image width Fix private use range to avoid stupid mobile safari switching to emoji font * included -pthread switch to link included 3rdparty poppler files. * Updated files from poppler 0.59.0 and adjusted includes. * Support updated "Object" class from poppler 0.59.0
2018-01-10 19:31:38 +00:00
if(param.quiet == 0)
cerr << endl;
2012-09-22 06:41:29 +00:00
}
void Preprocessor::drawChar(GfxState *state, double x, double y,
2012-09-07 07:03:03 +00:00
double dx, double dy,
double originX, double originY,
CharCode code, int nBytes, const Unicode *u, int uLen)
2012-09-07 07:03:03 +00:00
{
2023-12-18 10:39:47 +00:00
std::shared_ptr<GfxFont> font = state->getFont();
2012-09-07 07:03:03 +00:00
long long fn_id = hash_ref(font->getID());
if(fn_id != cur_font_id)
{
cur_font_id = fn_id;
2012-09-09 06:48:10 +00:00
auto p = code_maps.insert(std::make_pair(cur_font_id, (char*)nullptr));
2012-09-07 07:03:03 +00:00
if(p.second)
{
// this is a new font
int len = font->isCIDFont() ? 0x10000 : 0x100;
p.first->second = new char [len];
memset(p.first->second, 0, len * sizeof(char));
}
cur_code_map = p.first->second;
}
cur_code_map[code] = 1;
}
2012-09-26 16:17:56 +00:00
void Preprocessor::startPage(int pageNum, GfxState *state)
2013-02-05 07:30:57 +00:00
{
startPage(pageNum, state, nullptr);
}
void Preprocessor::startPage(int pageNum, GfxState *state, XRef * xref)
2012-09-26 16:17:56 +00:00
{
2012-10-02 18:19:40 +00:00
max_width = max<double>(max_width, state->getPageWidth());
max_height = max<double>(max_height, state->getPageHeight());
2012-09-26 16:17:56 +00:00
}
const char * Preprocessor::get_code_map (long long font_id) const
2012-09-07 07:03:03 +00:00
{
auto iter = code_maps.find(font_id);
return (iter == code_maps.end()) ? nullptr : (iter->second);
}
} // namespace pdf2htmlEX