2012-09-07 07:03:03 +00:00
|
|
|
/*
|
2012-09-22 04:41:53 +00:00
|
|
|
* Preprocessor.cc
|
2012-09-07 07:03:03 +00:00
|
|
|
*
|
|
|
|
* Check used codes for each font
|
|
|
|
*
|
|
|
|
* by WangLu
|
|
|
|
* 2012.09.07
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <cstring>
|
2012-09-22 06:41:29 +00:00
|
|
|
#include <iostream>
|
2012-09-26 16:17:56 +00:00
|
|
|
#include <algorithm>
|
2012-09-07 07:03:03 +00:00
|
|
|
|
|
|
|
#include <GfxState.h>
|
|
|
|
#include <GfxFont.h>
|
|
|
|
|
2012-09-22 04:41:53 +00:00
|
|
|
#include "Preprocessor.h"
|
2012-11-29 10:28:07 +00:00
|
|
|
#include "util/misc.h"
|
|
|
|
#include "util/const.h"
|
2012-09-07 07:03:03 +00:00
|
|
|
|
2012-09-11 13:52:46 +00:00
|
|
|
namespace pdf2htmlEX {
|
|
|
|
|
2012-09-22 06:41:29 +00:00
|
|
|
using std::cerr;
|
|
|
|
using std::endl;
|
|
|
|
using std::flush;
|
2012-09-26 16:17:56 +00:00
|
|
|
using std::max;
|
2012-09-22 06:41:29 +00:00
|
|
|
|
2013-04-06 09:01:05 +00:00
|
|
|
Preprocessor::Preprocessor(const Param & param)
|
2012-09-22 04:41:53 +00:00
|
|
|
: OutputDev()
|
2012-09-22 06:41:29 +00:00
|
|
|
, param(param)
|
2012-09-26 16:17:56 +00:00
|
|
|
, max_width(0)
|
|
|
|
, max_height(0)
|
2012-09-22 04:41:53 +00:00
|
|
|
, cur_font_id(0)
|
2012-09-07 07:03:03 +00:00
|
|
|
, cur_code_map(nullptr)
|
|
|
|
{ }
|
|
|
|
|
2012-09-22 04:41:53 +00:00
|
|
|
Preprocessor::~Preprocessor(void)
|
2012-09-07 07:03:03 +00:00
|
|
|
{
|
2012-09-09 06:48:10 +00:00
|
|
|
for(auto iter = code_maps.begin(); iter != code_maps.end(); ++iter)
|
|
|
|
delete [] iter->second;
|
2012-09-07 07:03:03 +00:00
|
|
|
}
|
|
|
|
|
2012-09-22 06:41:29 +00:00
|
|
|
void Preprocessor::process(PDFDoc * doc)
|
|
|
|
{
|
2013-04-06 09:01:05 +00:00
|
|
|
int page_count = (param.last_page - param.first_page + 1);
|
|
|
|
for(int i = param.first_page; i <= param.last_page ; ++i)
|
2012-09-22 06:41:29 +00:00
|
|
|
{
|
2013-04-06 09:01:05 +00:00
|
|
|
cerr << "Preprocessing: " << (i-param.first_page) << "/" << page_count << '\r' << flush;
|
2012-10-24 14:30:52 +00:00
|
|
|
|
2012-09-26 16:17:56 +00:00
|
|
|
doc->displayPage(this, i, DEFAULT_DPI, DEFAULT_DPI,
|
2013-03-28 06:59:24 +00:00
|
|
|
0,
|
2013-04-06 09:01:05 +00:00
|
|
|
(!(param.use_cropbox)),
|
2013-03-28 06:59:24 +00:00
|
|
|
true, // crop
|
|
|
|
false, // printing
|
2012-09-22 06:41:29 +00:00
|
|
|
nullptr, nullptr, nullptr, nullptr);
|
|
|
|
}
|
2012-10-24 14:30:52 +00:00
|
|
|
if(page_count >= 0)
|
|
|
|
cerr << "Preprocessing: " << page_count << "/" << page_count;
|
2012-09-22 06:41:29 +00:00
|
|
|
cerr << endl;
|
|
|
|
}
|
|
|
|
|
2012-09-22 04:41:53 +00:00
|
|
|
void Preprocessor::drawChar(GfxState *state, double x, double y,
|
2012-09-07 07:03:03 +00:00
|
|
|
double dx, double dy,
|
|
|
|
double originX, double originY,
|
|
|
|
CharCode code, int nBytes, Unicode *u, int uLen)
|
|
|
|
{
|
|
|
|
GfxFont * font = state->getFont();
|
|
|
|
if(!font) return;
|
|
|
|
|
|
|
|
long long fn_id = hash_ref(font->getID());
|
|
|
|
|
|
|
|
if(fn_id != cur_font_id)
|
|
|
|
{
|
|
|
|
cur_font_id = fn_id;
|
2012-09-09 06:48:10 +00:00
|
|
|
auto p = code_maps.insert(std::make_pair(cur_font_id, (char*)nullptr));
|
2012-09-07 07:03:03 +00:00
|
|
|
if(p.second)
|
|
|
|
{
|
|
|
|
// this is a new font
|
|
|
|
int len = font->isCIDFont() ? 0x10000 : 0x100;
|
|
|
|
p.first->second = new char [len];
|
|
|
|
memset(p.first->second, 0, len * sizeof(char));
|
|
|
|
}
|
|
|
|
|
|
|
|
cur_code_map = p.first->second;
|
|
|
|
}
|
|
|
|
|
|
|
|
cur_code_map[code] = 1;
|
|
|
|
}
|
|
|
|
|
2012-09-26 16:17:56 +00:00
|
|
|
void Preprocessor::startPage(int pageNum, GfxState *state)
|
2013-02-05 07:30:57 +00:00
|
|
|
{
|
|
|
|
startPage(pageNum, state, nullptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
void Preprocessor::startPage(int pageNum, GfxState *state, XRef * xref)
|
2012-09-26 16:17:56 +00:00
|
|
|
{
|
2012-10-02 18:19:40 +00:00
|
|
|
max_width = max<double>(max_width, state->getPageWidth());
|
|
|
|
max_height = max<double>(max_height, state->getPageHeight());
|
2012-09-26 16:17:56 +00:00
|
|
|
}
|
|
|
|
|
2012-09-22 04:41:53 +00:00
|
|
|
const char * Preprocessor::get_code_map (long long font_id) const
|
2012-09-07 07:03:03 +00:00
|
|
|
{
|
|
|
|
auto iter = code_maps.find(font_id);
|
|
|
|
return (iter == code_maps.end()) ? nullptr : (iter->second);
|
|
|
|
}
|
2012-09-11 13:52:46 +00:00
|
|
|
|
|
|
|
} // namespace pdf2htmlEX
|