From 45b54d18c4a768fc1fa2624cfea5df8b7aad243c Mon Sep 17 00:00:00 2001 From: Lu Wang Date: Fri, 7 Sep 2012 00:58:23 +0800 Subject: [PATCH] add a font preprocessor --- CMakeLists.txt | 2 ++ TODO | 2 ++ src/BackgroundRenderer.h | 2 +- src/HTMLRenderer.h | 4 +++- src/HTMLRenderer/general.cc | 14 +++++++++++--- src/HTMLRenderer/install.cc | 2 +- src/HTMLRenderer/text.cc | 4 ++++ src/util.h | 6 ++++++ 8 files changed, 30 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9da91d1..441b442 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,6 +66,8 @@ add_executable(pdf2htmlEX src/ff/ff.c src/BackgroundRenderer.h src/BackgroundRenderer.cc + src/FontPreprocessor.h + src/FontPreprocessor.cc src/Consts.h src/Consts.cc src/util.h diff --git a/TODO b/TODO index 3dd2a1d..0ea638e 100644 --- a/TODO +++ b/TODO @@ -1,3 +1,5 @@ +disable annotation + valgrind re-encoded only used glyphs diff --git a/src/BackgroundRenderer.h b/src/BackgroundRenderer.h index 1990931..f8f4034 100644 --- a/src/BackgroundRenderer.h +++ b/src/BackgroundRenderer.h @@ -24,7 +24,7 @@ public: allowAntialiasA) { } virtual ~BackgroundRenderer() { } - void drawChar(GfxState *state, double x, double y, + virtual void drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, int nBytes, Unicode *u, int uLen); diff --git a/src/HTMLRenderer.h b/src/HTMLRenderer.h index ed0cb40..40002f9 100644 --- a/src/HTMLRenderer.h +++ b/src/HTMLRenderer.h @@ -30,7 +30,7 @@ #include "Param.h" #include "util.h" - +#include "FontPreprocessor.h" /* * Naming Convention @@ -124,6 +124,7 @@ class HTMLRenderer : public OutputDev //////////////////////////////////////////////////// // misc //////////////////////////////////////////////////// + void add_tmp_file (const std::string & fn); void clean_tmp_files (); boost::filesystem::path dump_embedded_font (GfxFont * font, long long fn_id); @@ -333,6 +334,7 @@ class HTMLRenderer : public OutputDev // for font reencoding int32_t * cur_mapping; char ** cur_mapping2; + FontPreprocessor font_preprocessor; //////////////////////////////////////////////////// // styles & resources diff --git a/src/HTMLRenderer/general.cc b/src/HTMLRenderer/general.cc index aec67f1..692d0dc 100644 --- a/src/HTMLRenderer/general.cc +++ b/src/HTMLRenderer/general.cc @@ -51,12 +51,20 @@ HTMLRenderer::~HTMLRenderer() void HTMLRenderer::process(PDFDoc *doc) { - cerr << "Working: "; - xref = doc->getXRef(); - BackgroundRenderer * bg_renderer = nullptr; + cerr << "Preprocessing: "; + for(int i = param->first_page; i <= param->last_page ; ++i) + { + doc->displayPage(&font_preprocessor, i, param->h_dpi, param->v_dpi, + 0, true, false, false, + nullptr, nullptr, nullptr, nullptr); + cerr << "." << flush; + } + cerr << endl; + cerr << "Working: "; + BackgroundRenderer * bg_renderer = nullptr; if(param->process_nontext) { // Render non-text objects as image diff --git a/src/HTMLRenderer/install.cc b/src/HTMLRenderer/install.cc index 5e676e6..3e504ef 100644 --- a/src/HTMLRenderer/install.cc +++ b/src/HTMLRenderer/install.cc @@ -25,7 +25,7 @@ const FontInfo * HTMLRenderer::install_font(GfxFont * font) { assert(sizeof(long long) == 2*sizeof(int)); - long long fn_id = (font == nullptr) ? 0 : *reinterpret_cast(font->getID()); + long long fn_id = (font == nullptr) ? 0 : hash_ref(font->getID()); auto iter = font_name_map.find(fn_id); if(iter != font_name_map.end()) diff --git a/src/HTMLRenderer/text.cc b/src/HTMLRenderer/text.cc index f329d14..90873f9 100644 --- a/src/HTMLRenderer/text.cc +++ b/src/HTMLRenderer/text.cc @@ -217,8 +217,12 @@ void HTMLRenderer::embed_font(const path & filepath, GfxFont * font, FontInfo & memset(cur_mapping2, 0, 256 * sizeof(char*)); + const char * used_map = font_preprocessor.get_code_map(hash_ref(font->getID())); + for(int i = 0; i < 256; ++i) { + if(!used_map[i]) continue; + auto cn = font_8bit->getCharName(i); if(cn == nullptr) { diff --git a/src/util.h b/src/util.h index 853bc23..b1ad834 100644 --- a/src/util.h +++ b/src/util.h @@ -21,6 +21,7 @@ #include #include #include +#include #include "Consts.h" @@ -49,6 +50,11 @@ static inline bool _tm_equal(const double * tm1, const double * tm2, int size = return true; } +static inline long long hash_ref(const Ref * id) +{ + return (((long long)(id->num)) << (sizeof(id->gen)*8)) | (id->gen); +} + /* * http://en.wikipedia.org/wiki/HTML_decimal_character_rendering */