mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 13:00:08 +00:00
separate TextLinebuffer and HTMLRenderer
This commit is contained in:
parent
88724a7bd5
commit
bc9318e33d
@ -156,8 +156,6 @@ add_executable(pdf2htmlEX
|
||||
src/HTMLRenderer/general.cc
|
||||
src/HTMLRenderer/image.cc
|
||||
src/HTMLRenderer/font.cc
|
||||
src/HTMLRenderer/TextLineBuffer.h
|
||||
src/HTMLRenderer/TextLineBuffer.cc
|
||||
src/HTMLRenderer/link.cc
|
||||
src/HTMLRenderer/outline.cc
|
||||
src/HTMLRenderer/state.cc
|
||||
@ -180,6 +178,7 @@ add_executable(pdf2htmlEX
|
||||
src/util/encoding.cc
|
||||
src/util/ffw.h
|
||||
src/util/ffw.c
|
||||
src/util/HTMLState.h
|
||||
src/util/math.h
|
||||
src/util/math.cc
|
||||
src/util/misc.h
|
||||
@ -191,6 +190,8 @@ add_executable(pdf2htmlEX
|
||||
src/util/Preprocessor.cc
|
||||
src/util/StringFormatter.h
|
||||
src/util/StringFormatter.cc
|
||||
src/util/TextLineBuffer.h
|
||||
src/util/TextLineBuffer.cc
|
||||
src/util/TmpFiles.h
|
||||
src/util/TmpFiles.cc
|
||||
src/util/unicode.h
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <unordered_map>
|
||||
#include <cstdint>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
|
||||
#include <OutputDev.h>
|
||||
#include <GfxState.h>
|
||||
@ -28,36 +29,10 @@
|
||||
#include "util/misc.h"
|
||||
#include "util/color.h"
|
||||
#include "util/StateManager.h"
|
||||
#include "util/TextLineBuffer.h"
|
||||
|
||||
namespace pdf2htmlEX {
|
||||
|
||||
|
||||
struct FontInfo
|
||||
{
|
||||
long long id;
|
||||
bool use_tounicode;
|
||||
int em_size;
|
||||
double space_width;
|
||||
double ascent, descent;
|
||||
bool is_type3;
|
||||
};
|
||||
|
||||
struct HTMLState
|
||||
{
|
||||
const FontInfo * font_info;
|
||||
double font_size;
|
||||
Color fill_color;
|
||||
Color stroke_color;
|
||||
double letter_space;
|
||||
double word_space;
|
||||
|
||||
// relative to the previous state
|
||||
double vertical_align;
|
||||
|
||||
double x,y;
|
||||
double transform_matrix[4];
|
||||
};
|
||||
|
||||
class HTMLRenderer : public OutputDev
|
||||
{
|
||||
public:
|
||||
@ -267,7 +242,7 @@ protected:
|
||||
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
// states
|
||||
// PDF states
|
||||
////////////////////////////////////////////////////
|
||||
bool line_opened;
|
||||
enum NewLineState
|
||||
@ -277,7 +252,6 @@ protected:
|
||||
NLS_DIV // has to open a new <div>
|
||||
} new_line_state;
|
||||
|
||||
|
||||
// track the original (unscaled) values to determine scaling and merge lines
|
||||
// current position
|
||||
double cur_tx, cur_ty; // real text position, in text coords
|
||||
@ -298,6 +272,9 @@ protected:
|
||||
bool letter_space_changed;
|
||||
bool stroke_color_changed;
|
||||
|
||||
////////////////////////////////////////////////////
|
||||
// HTML states
|
||||
////////////////////////////////////////////////////
|
||||
|
||||
// optimize for web
|
||||
// we try to render the final font size directly
|
||||
@ -314,9 +291,8 @@ protected:
|
||||
|
||||
// some metrics have to be determined after all elements in the lines have been seen
|
||||
// see TextLineBuffer.h
|
||||
class TextLineBuffer;
|
||||
friend class TextLineBuffer;
|
||||
TextLineBuffer * text_line_buf;
|
||||
std::vector<std::unique_ptr<TextLineBuffer>> text_line_buffers;
|
||||
|
||||
// for font reencoding
|
||||
int32_t * cur_mapping;
|
||||
@ -337,22 +313,7 @@ protected:
|
||||
std::unordered_map<long long, FontInfo> font_info_map;
|
||||
|
||||
// managers store values actually used in HTML (i.e. scaled)
|
||||
////////////////////////////////////////////////
|
||||
TransformMatrixManager transform_matrix_manager;
|
||||
VerticalAlignManager vertical_align_manager;
|
||||
StrokeColorManager stroke_color_manager;
|
||||
LetterSpaceManager letter_space_manager;
|
||||
WhitespaceManager whitespace_manager;
|
||||
WordSpaceManager word_space_manager;
|
||||
FillColorManager fill_color_manager;
|
||||
FontSizeManager font_size_manager;
|
||||
BottomManager bottom_manager;
|
||||
HeightManager height_manager;
|
||||
WidthManager width_manager;
|
||||
LeftManager left_manager;
|
||||
////////////////////////////////////////////////
|
||||
BGImageSizeManager bgimage_size_manager;
|
||||
|
||||
AllStateManater all_manager;
|
||||
|
||||
const Param * param;
|
||||
|
||||
|
@ -373,7 +373,7 @@ void HTMLRenderer::css_draw_rectangle(double x, double y, double w, double h, co
|
||||
}
|
||||
|
||||
f_pages.fs << "<div class=\"" << CSS::CSS_DRAW_CN
|
||||
<< ' ' << CSS::TRANSFORM_MATRIX_CN << transform_matrix_manager.install(new_tm)
|
||||
<< ' ' << CSS::TRANSFORM_MATRIX_CN << all_manager.transform_matrix.install(new_tm)
|
||||
<< "\" style=\"";
|
||||
|
||||
if(line_color)
|
||||
|
@ -15,7 +15,7 @@
|
||||
#include <GlobalParams.h>
|
||||
|
||||
#include "HTMLRenderer.h"
|
||||
#include "TextLineBuffer.h"
|
||||
#include "util/TextLineBuffer.h"
|
||||
#include "pdf2htmlEX-config.h"
|
||||
#include "BackgroundRenderer/BackgroundRenderer.h"
|
||||
#include "util/namespace.h"
|
||||
@ -40,7 +40,6 @@ using std::endl;
|
||||
HTMLRenderer::HTMLRenderer(const Param * param)
|
||||
:OutputDev()
|
||||
,line_opened(false)
|
||||
,text_line_buf(new TextLineBuffer(this))
|
||||
,preprocessor(param)
|
||||
,tmp_files(*param)
|
||||
,param(param)
|
||||
@ -51,6 +50,7 @@ HTMLRenderer::HTMLRenderer(const Param * param)
|
||||
globalParams->setErrQuiet(gTrue);
|
||||
}
|
||||
|
||||
text_line_buffers.emplace_back(new TextLineBuffer(param, all_manager));
|
||||
ffw_init(param->debug);
|
||||
cur_mapping = new int32_t [0x10000];
|
||||
cur_mapping2 = new char* [0x100];
|
||||
@ -61,24 +61,23 @@ HTMLRenderer::HTMLRenderer(const Param * param)
|
||||
* or may be handled well (whitespace_manager)
|
||||
* So we can set a large eps here
|
||||
*/
|
||||
vertical_align_manager.set_eps(param->v_eps);
|
||||
whitespace_manager .set_eps(param->h_eps);
|
||||
left_manager .set_eps(param->h_eps);
|
||||
all_manager.vertical_align.set_eps(param->v_eps);
|
||||
all_manager.whitespace .set_eps(param->h_eps);
|
||||
all_manager.left .set_eps(param->h_eps);
|
||||
/*
|
||||
* For othere states, we need accurate values
|
||||
* optimization will be done separately
|
||||
*/
|
||||
font_size_manager .set_eps(EPS);
|
||||
letter_space_manager.set_eps(EPS);
|
||||
word_space_manager .set_eps(EPS);
|
||||
height_manager .set_eps(EPS);
|
||||
width_manager .set_eps(EPS);
|
||||
bottom_manager .set_eps(EPS);
|
||||
all_manager.font_size .set_eps(EPS);
|
||||
all_manager.letter_space.set_eps(EPS);
|
||||
all_manager.word_space .set_eps(EPS);
|
||||
all_manager.height .set_eps(EPS);
|
||||
all_manager.width .set_eps(EPS);
|
||||
all_manager.bottom .set_eps(EPS);
|
||||
}
|
||||
|
||||
HTMLRenderer::~HTMLRenderer()
|
||||
{
|
||||
delete text_line_buf;
|
||||
ffw_finalize();
|
||||
delete [] cur_mapping;
|
||||
delete [] cur_mapping2;
|
||||
@ -173,8 +172,8 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
|
||||
|
||||
this->pageNum = pageNum;
|
||||
|
||||
long long wid = width_manager.install(state->getPageWidth());
|
||||
long long hid = height_manager.install(state->getPageHeight());
|
||||
long long wid = all_manager.width.install(state->getPageWidth());
|
||||
long long hid = all_manager.height.install(state->getPageHeight());
|
||||
f_pages.fs
|
||||
<< "<div class=\"" << CSS::PAGE_DECORATION_CN
|
||||
<< " " << CSS::WIDTH_CN << wid
|
||||
@ -212,6 +211,10 @@ void HTMLRenderer::startPage(int pageNum, GfxState *state, XRef * xref)
|
||||
void HTMLRenderer::endPage() {
|
||||
close_text_line();
|
||||
|
||||
// dump all text
|
||||
for(auto iter = text_line_buffers.begin(); iter != text_line_buffers.end(); ++iter)
|
||||
(*iter)->flush(f_pages.fs);
|
||||
|
||||
// process links before the page is closed
|
||||
cur_doc->processLinks(this, pageNum);
|
||||
|
||||
@ -445,36 +448,36 @@ void HTMLRenderer::set_stream_flags(std::ostream & out)
|
||||
|
||||
void HTMLRenderer::dump_css (void)
|
||||
{
|
||||
transform_matrix_manager.dump_css(f_css.fs);
|
||||
vertical_align_manager .dump_css(f_css.fs);
|
||||
letter_space_manager .dump_css(f_css.fs);
|
||||
stroke_color_manager .dump_css(f_css.fs);
|
||||
word_space_manager .dump_css(f_css.fs);
|
||||
whitespace_manager .dump_css(f_css.fs);
|
||||
fill_color_manager .dump_css(f_css.fs);
|
||||
font_size_manager .dump_css(f_css.fs);
|
||||
bottom_manager .dump_css(f_css.fs);
|
||||
height_manager .dump_css(f_css.fs);
|
||||
width_manager .dump_css(f_css.fs);
|
||||
left_manager .dump_css(f_css.fs);
|
||||
bgimage_size_manager .dump_css(f_css.fs);
|
||||
all_manager.transform_matrix.dump_css(f_css.fs);
|
||||
all_manager.vertical_align .dump_css(f_css.fs);
|
||||
all_manager.letter_space .dump_css(f_css.fs);
|
||||
all_manager.stroke_color .dump_css(f_css.fs);
|
||||
all_manager.word_space .dump_css(f_css.fs);
|
||||
all_manager.whitespace .dump_css(f_css.fs);
|
||||
all_manager.fill_color .dump_css(f_css.fs);
|
||||
all_manager.font_size .dump_css(f_css.fs);
|
||||
all_manager.bottom .dump_css(f_css.fs);
|
||||
all_manager.height .dump_css(f_css.fs);
|
||||
all_manager.width .dump_css(f_css.fs);
|
||||
all_manager.left .dump_css(f_css.fs);
|
||||
all_manager.bgimage_size .dump_css(f_css.fs);
|
||||
|
||||
// print css
|
||||
double ps = print_scale();
|
||||
f_css.fs << CSS::PRINT_ONLY << "{" << endl;
|
||||
transform_matrix_manager.dump_print_css(f_css.fs, ps);
|
||||
vertical_align_manager .dump_print_css(f_css.fs, ps);
|
||||
letter_space_manager .dump_print_css(f_css.fs, ps);
|
||||
stroke_color_manager .dump_print_css(f_css.fs, ps);
|
||||
word_space_manager .dump_print_css(f_css.fs, ps);
|
||||
whitespace_manager .dump_print_css(f_css.fs, ps);
|
||||
fill_color_manager .dump_print_css(f_css.fs, ps);
|
||||
font_size_manager .dump_print_css(f_css.fs, ps);
|
||||
bottom_manager .dump_print_css(f_css.fs, ps);
|
||||
height_manager .dump_print_css(f_css.fs, ps);
|
||||
width_manager .dump_print_css(f_css.fs, ps);
|
||||
left_manager .dump_print_css(f_css.fs, ps);
|
||||
bgimage_size_manager .dump_print_css(f_css.fs, ps);
|
||||
all_manager.transform_matrix.dump_print_css(f_css.fs, ps);
|
||||
all_manager.vertical_align .dump_print_css(f_css.fs, ps);
|
||||
all_manager.letter_space .dump_print_css(f_css.fs, ps);
|
||||
all_manager.stroke_color .dump_print_css(f_css.fs, ps);
|
||||
all_manager.word_space .dump_print_css(f_css.fs, ps);
|
||||
all_manager.whitespace .dump_print_css(f_css.fs, ps);
|
||||
all_manager.fill_color .dump_print_css(f_css.fs, ps);
|
||||
all_manager.font_size .dump_print_css(f_css.fs, ps);
|
||||
all_manager.bottom .dump_print_css(f_css.fs, ps);
|
||||
all_manager.height .dump_print_css(f_css.fs, ps);
|
||||
all_manager.width .dump_print_css(f_css.fs, ps);
|
||||
all_manager.left .dump_print_css(f_css.fs, ps);
|
||||
all_manager.bgimage_size .dump_print_css(f_css.fs, ps);
|
||||
f_css.fs << "}" << endl;
|
||||
}
|
||||
|
||||
|
@ -206,7 +206,7 @@ void HTMLRenderer::processLink(AnnotLink * al)
|
||||
}
|
||||
|
||||
f_pages.fs << "<div class=\"" << CSS::CSS_DRAW_CN << ' ' << CSS::TRANSFORM_MATRIX_CN
|
||||
<< transform_matrix_manager.install(default_ctm)
|
||||
<< all_manager.transform_matrix.install(default_ctm)
|
||||
<< "\" style=\"";
|
||||
|
||||
double x,y,w,h;
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include <algorithm>
|
||||
|
||||
#include "HTMLRenderer.h"
|
||||
#include "TextLineBuffer.h"
|
||||
#include "util/TextLineBuffer.h"
|
||||
#include "util/namespace.h"
|
||||
#include "util/math.h"
|
||||
|
||||
@ -335,7 +335,7 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
||||
|
||||
if(merged)
|
||||
{
|
||||
text_line_buf->append_offset(dx * old_draw_text_scale);
|
||||
text_line_buffers.back()->append_offset(dx * old_draw_text_scale);
|
||||
if(equal(dy, 0))
|
||||
{
|
||||
cur_html_state.vertical_align = 0;
|
||||
@ -458,14 +458,14 @@ void HTMLRenderer::prepare_text_line(GfxState * state)
|
||||
double target = (cur_tx - draw_tx) * draw_text_scale;
|
||||
if(!equal(target, 0))
|
||||
{
|
||||
text_line_buf->append_offset(target);
|
||||
text_line_buffers.back()->append_offset(target);
|
||||
draw_tx += target / draw_text_scale;
|
||||
}
|
||||
}
|
||||
|
||||
if(new_line_state != NLS_NONE)
|
||||
{
|
||||
text_line_buf->append_state(cur_html_state);
|
||||
text_line_buffers.back()->append_state(cur_html_state);
|
||||
}
|
||||
|
||||
line_opened = true;
|
||||
@ -476,7 +476,7 @@ void HTMLRenderer::close_text_line()
|
||||
if(line_opened)
|
||||
{
|
||||
line_opened = false;
|
||||
text_line_buf->flush();
|
||||
text_line_buffers.emplace_back(new TextLineBuffer(param, all_manager));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include <algorithm>
|
||||
|
||||
#include "HTMLRenderer.h"
|
||||
#include "TextLineBuffer.h"
|
||||
#include "util/TextLineBuffer.h"
|
||||
#include "util/namespace.h"
|
||||
#include "util/unicode.h"
|
||||
|
||||
@ -90,13 +90,13 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
|
||||
if(is_space && (param->space_as_offset))
|
||||
{
|
||||
// ignore horiz_scaling, as it's merged in CTM
|
||||
text_line_buf->append_offset((dx1 * cur_font_size + cur_letter_space + cur_word_space) * draw_text_scale);
|
||||
text_line_buffers.back()->append_offset((dx1 * cur_font_size + cur_letter_space + cur_word_space) * draw_text_scale);
|
||||
}
|
||||
else
|
||||
{
|
||||
if((param->decompose_ligature) && (uLen > 1) && all_of(u, u+uLen, isLegalUnicode))
|
||||
{
|
||||
text_line_buf->append_unicodes(u, uLen);
|
||||
text_line_buffers.back()->append_unicodes(u, uLen);
|
||||
// TODO: decomposed characters may be not with the same width as the original ligature, need to fix it.
|
||||
}
|
||||
else
|
||||
@ -110,14 +110,14 @@ void HTMLRenderer::drawString(GfxState * state, GooString * s)
|
||||
{
|
||||
uu = unicode_from_font(code, font);
|
||||
}
|
||||
text_line_buf->append_unicodes(&uu, 1);
|
||||
text_line_buffers.back()->append_unicodes(&uu, 1);
|
||||
/*
|
||||
* In PDF, word_space is appended if (n == 1 and *p = ' ')
|
||||
* but in HTML, word_space is appended if (uu == ' ')
|
||||
*/
|
||||
int space_count = (is_space ? 1 : 0) - (uu == ' ' ? 1 : 0);
|
||||
if(space_count != 0)
|
||||
text_line_buf->append_offset(cur_word_space * draw_text_scale * space_count);
|
||||
text_line_buffers.back()->append_offset(cur_word_space * draw_text_scale * space_count);
|
||||
}
|
||||
}
|
||||
|
||||
|
40
src/util/HTMLState.h
Normal file
40
src/util/HTMLState.h
Normal file
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Header file for HTMLState
|
||||
* Copyright (C) 2013 Lu Wang <coolwanglu@gmail.com>
|
||||
*/
|
||||
#ifndef HTMLSTATE_H__
|
||||
#define HTMLSTATE_H__
|
||||
|
||||
#include "util/color.h"
|
||||
|
||||
namespace pdf2htmlEX {
|
||||
|
||||
struct FontInfo
|
||||
{
|
||||
long long id;
|
||||
bool use_tounicode;
|
||||
int em_size;
|
||||
double space_width;
|
||||
double ascent, descent;
|
||||
bool is_type3;
|
||||
};
|
||||
|
||||
struct HTMLState
|
||||
{
|
||||
const FontInfo * font_info;
|
||||
double font_size;
|
||||
Color fill_color;
|
||||
Color stroke_color;
|
||||
double letter_space;
|
||||
double word_space;
|
||||
|
||||
// relative to the previous state
|
||||
double vertical_align;
|
||||
|
||||
double x,y;
|
||||
double transform_matrix[4];
|
||||
};
|
||||
|
||||
} // namespace pdf2htmlEX
|
||||
|
||||
#endif //HTMLSTATE_H__
|
@ -410,6 +410,23 @@ private:
|
||||
std::unordered_map<int, std::pair<double,double>> value_map;
|
||||
};
|
||||
|
||||
struct AllStateManater
|
||||
{
|
||||
TransformMatrixManager transform_matrix;
|
||||
VerticalAlignManager vertical_align;
|
||||
StrokeColorManager stroke_color;
|
||||
LetterSpaceManager letter_space;
|
||||
WhitespaceManager whitespace;
|
||||
WordSpaceManager word_space;
|
||||
FillColorManager fill_color;
|
||||
FontSizeManager font_size;
|
||||
BottomManager bottom;
|
||||
HeightManager height;
|
||||
WidthManager width;
|
||||
LeftManager left;
|
||||
BGImageSizeManager bgimage_size;
|
||||
};
|
||||
|
||||
} // namespace pdf2htmlEX
|
||||
|
||||
#endif //STATEMANAGER_H__
|
||||
|
@ -10,8 +10,8 @@
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
|
||||
#include "HTMLRenderer.h"
|
||||
#include "TextLineBuffer.h"
|
||||
#include "HTMLRenderer/HTMLRenderer.h"
|
||||
#include "util/TextLineBuffer.h"
|
||||
#include "util/namespace.h"
|
||||
#include "util/unicode.h"
|
||||
#include "util/math.h"
|
||||
@ -29,12 +29,12 @@ using std::endl;
|
||||
using std::find;
|
||||
using std::abs;
|
||||
|
||||
void HTMLRenderer::TextLineBuffer::append_unicodes(const Unicode * u, int l)
|
||||
void TextLineBuffer::append_unicodes(const Unicode * u, int l)
|
||||
{
|
||||
text.insert(text.end(), u, u+l);
|
||||
}
|
||||
|
||||
void HTMLRenderer::TextLineBuffer::append_offset(double width)
|
||||
void TextLineBuffer::append_offset(double width)
|
||||
{
|
||||
/*
|
||||
* If the last offset is very thin, we can ignore it and directly use it
|
||||
@ -47,7 +47,7 @@ void HTMLRenderer::TextLineBuffer::append_offset(double width)
|
||||
offsets.emplace_back(text.size(), width);
|
||||
}
|
||||
|
||||
void HTMLRenderer::TextLineBuffer::append_state(const HTMLState & html_state)
|
||||
void TextLineBuffer::append_state(const HTMLState & html_state)
|
||||
{
|
||||
if(states.empty() || (states.back().start_idx != text.size()))
|
||||
{
|
||||
@ -59,7 +59,7 @@ void HTMLRenderer::TextLineBuffer::append_state(const HTMLState & html_state)
|
||||
(HTMLState&)(states.back()) = html_state;
|
||||
}
|
||||
|
||||
void HTMLRenderer::TextLineBuffer::flush(void)
|
||||
void TextLineBuffer::flush(ostream & out)
|
||||
{
|
||||
/*
|
||||
* Each Line is an independent absolute positioned block
|
||||
@ -79,6 +79,7 @@ void HTMLRenderer::TextLineBuffer::flush(void)
|
||||
if(states.empty() || (states[0].start_idx != 0))
|
||||
{
|
||||
cerr << "Warning: text without a style! Must be a bug in pdf2htmlEX" << endl;
|
||||
states.clear();
|
||||
text.clear();
|
||||
offsets.clear();
|
||||
return;
|
||||
@ -88,7 +89,6 @@ void HTMLRenderer::TextLineBuffer::flush(void)
|
||||
optimize();
|
||||
|
||||
// Start Output
|
||||
ostream & out = renderer->f_pages.fs;
|
||||
{
|
||||
// max_ascent determines the height of the div
|
||||
double accum_vertical_align = 0; // accumulated
|
||||
@ -103,10 +103,10 @@ void HTMLRenderer::TextLineBuffer::flush(void)
|
||||
|
||||
// open <div> for the current text line
|
||||
out << "<div class=\"" << CSS::LINE_CN
|
||||
<< " " << CSS::TRANSFORM_MATRIX_CN << renderer->transform_matrix_manager.install(states[0].transform_matrix)
|
||||
<< " " << CSS::LEFT_CN << renderer->left_manager .install(states[0].x)
|
||||
<< " " << CSS::HEIGHT_CN << renderer->height_manager .install(max_ascent)
|
||||
<< " " << CSS::BOTTOM_CN << renderer->bottom_manager .install(states[0].y)
|
||||
<< " " << CSS::TRANSFORM_MATRIX_CN << all_manager.transform_matrix.install(states[0].transform_matrix)
|
||||
<< " " << CSS::LEFT_CN << all_manager.left.install(states[0].x)
|
||||
<< " " << CSS::HEIGHT_CN << all_manager.height.install(max_ascent)
|
||||
<< " " << CSS::BOTTOM_CN << all_manager.bottom.install(states[0].y)
|
||||
;
|
||||
// it will be closed by the first state
|
||||
}
|
||||
@ -132,11 +132,11 @@ void HTMLRenderer::TextLineBuffer::flush(void)
|
||||
{
|
||||
// set id
|
||||
state_iter1->ids[State::FONT_ID] = state_iter1->font_info->id;
|
||||
state_iter1->ids[State::FONT_SIZE_ID] = renderer->font_size_manager .install(state_iter1->font_size);
|
||||
state_iter1->ids[State::FILL_COLOR_ID] = renderer->fill_color_manager .install(state_iter1->fill_color);
|
||||
state_iter1->ids[State::STROKE_COLOR_ID] = renderer->stroke_color_manager .install(state_iter1->stroke_color);
|
||||
state_iter1->ids[State::LETTER_SPACE_ID] = renderer->letter_space_manager .install(state_iter1->letter_space);
|
||||
state_iter1->ids[State::WORD_SPACE_ID] = renderer->word_space_manager .install(state_iter1->word_space);
|
||||
state_iter1->ids[State::FONT_SIZE_ID] = all_manager.font_size.install(state_iter1->font_size);
|
||||
state_iter1->ids[State::FILL_COLOR_ID] = all_manager.fill_color.install(state_iter1->fill_color);
|
||||
state_iter1->ids[State::STROKE_COLOR_ID] = all_manager.stroke_color.install(state_iter1->stroke_color);
|
||||
state_iter1->ids[State::LETTER_SPACE_ID] = all_manager.letter_space.install(state_iter1->letter_space);
|
||||
state_iter1->ids[State::WORD_SPACE_ID] = all_manager.word_space.install(state_iter1->word_space);
|
||||
state_iter1->hash();
|
||||
|
||||
// greedy
|
||||
@ -172,7 +172,7 @@ void HTMLRenderer::TextLineBuffer::flush(void)
|
||||
vertical_align += (*iter)->vertical_align;
|
||||
}
|
||||
//
|
||||
state_iter1->ids[State::VERTICAL_ALIGN_ID] = renderer->vertical_align_manager.install(state_iter1->vertical_align);
|
||||
state_iter1->ids[State::VERTICAL_ALIGN_ID] = all_manager.vertical_align.install(state_iter1->vertical_align);
|
||||
// export the diff between *state_iter1 and stack.back()
|
||||
state_iter1->begin(out, stack.back());
|
||||
stack.push_back(&*state_iter1);
|
||||
@ -194,7 +194,7 @@ void HTMLRenderer::TextLineBuffer::flush(void)
|
||||
double actual_offset = 0;
|
||||
|
||||
//ignore near-zero offsets
|
||||
if(abs(target) <= renderer->param->h_eps)
|
||||
if(abs(target) <= param->h_eps)
|
||||
{
|
||||
actual_offset = 0;
|
||||
}
|
||||
@ -205,7 +205,7 @@ void HTMLRenderer::TextLineBuffer::flush(void)
|
||||
if(!(state_iter1->hash_umask & State::umask_by_id(State::WORD_SPACE_ID)))
|
||||
{
|
||||
double space_off = state_iter1->single_space_offset();
|
||||
if(abs(target - space_off) <= renderer->param->h_eps)
|
||||
if(abs(target - space_off) <= param->h_eps)
|
||||
{
|
||||
Unicode u = ' ';
|
||||
outputUnicodes(out, &u, 1);
|
||||
@ -217,14 +217,14 @@ void HTMLRenderer::TextLineBuffer::flush(void)
|
||||
// finally, just dump it
|
||||
if(!done)
|
||||
{
|
||||
long long wid = renderer->whitespace_manager.install(target, &actual_offset);
|
||||
long long wid = all_manager.whitespace.install(target, &actual_offset);
|
||||
|
||||
if(!equal(actual_offset, 0))
|
||||
{
|
||||
if(is_positive(-actual_offset))
|
||||
last_text_pos_with_negative_offset = cur_text_idx;
|
||||
|
||||
double threshold = state_iter1->em_size() * (renderer->param->space_threshold);
|
||||
double threshold = state_iter1->em_size() * (param->space_threshold);
|
||||
|
||||
out << "<span class=\"" << CSS::WHITESPACE_CN
|
||||
<< ' ' << CSS::WHITESPACE_CN << wid << "\">" << (target > (threshold - EPS) ? " " : "") << "</span>";
|
||||
@ -266,9 +266,9 @@ void HTMLRenderer::TextLineBuffer::flush(void)
|
||||
* Adjust letter space and word space in order to reduce the number of HTML elements
|
||||
* May also unmask word space
|
||||
*/
|
||||
void HTMLRenderer::TextLineBuffer::optimize()
|
||||
void TextLineBuffer::optimize()
|
||||
{
|
||||
if(!(renderer->param->optimize_text))
|
||||
if(!(param->optimize_text))
|
||||
return;
|
||||
|
||||
assert(!states.empty());
|
||||
@ -276,8 +276,8 @@ void HTMLRenderer::TextLineBuffer::optimize()
|
||||
const long long word_space_umask = State::umask_by_id(State::WORD_SPACE_ID);
|
||||
|
||||
// for optimization, we need accurate values
|
||||
auto & ls_manager = renderer->letter_space_manager;
|
||||
auto & ws_manager = renderer->word_space_manager;
|
||||
auto & ls_manager = all_manager.letter_space;
|
||||
auto & ws_manager = all_manager.word_space;
|
||||
|
||||
// statistics of widths
|
||||
std::map<double, size_t> width_map;
|
||||
@ -408,7 +408,7 @@ void HTMLRenderer::TextLineBuffer::optimize()
|
||||
|
||||
if(offset_count > 0)
|
||||
{
|
||||
double threshold = (state_iter1->em_size()) * (renderer->param->space_threshold);
|
||||
double threshold = (state_iter1->em_size()) * (param->space_threshold);
|
||||
// set word_space for the most frequently used offset
|
||||
double most_used_width = 0;
|
||||
size_t max_count = 0;
|
||||
@ -446,7 +446,7 @@ void HTMLRenderer::TextLineBuffer::optimize()
|
||||
// this state will be converted to a child node of the node of prev_state
|
||||
// dump the difference between previous state
|
||||
// also clone corresponding states
|
||||
void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * prev_state)
|
||||
void TextLineBuffer::State::begin (ostream & out, const State * prev_state)
|
||||
{
|
||||
if(prev_state)
|
||||
{
|
||||
@ -566,13 +566,13 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr
|
||||
}
|
||||
}
|
||||
|
||||
void HTMLRenderer::TextLineBuffer::State::end(ostream & out) const
|
||||
void TextLineBuffer::State::end(ostream & out) const
|
||||
{
|
||||
if(need_close)
|
||||
out << "</span>";
|
||||
}
|
||||
|
||||
void HTMLRenderer::TextLineBuffer::State::hash(void)
|
||||
void TextLineBuffer::State::hash(void)
|
||||
{
|
||||
hash_value = 0;
|
||||
for(int i = 0; i < ID_COUNT; ++i)
|
||||
@ -581,7 +581,7 @@ void HTMLRenderer::TextLineBuffer::State::hash(void)
|
||||
}
|
||||
}
|
||||
|
||||
int HTMLRenderer::TextLineBuffer::State::diff(const State & s) const
|
||||
int TextLineBuffer::State::diff(const State & s) const
|
||||
{
|
||||
/*
|
||||
* A quick check based on hash_value
|
||||
@ -602,23 +602,23 @@ int HTMLRenderer::TextLineBuffer::State::diff(const State & s) const
|
||||
return d;
|
||||
}
|
||||
|
||||
double HTMLRenderer::TextLineBuffer::State::single_space_offset(void) const
|
||||
double TextLineBuffer::State::single_space_offset(void) const
|
||||
{
|
||||
return word_space + letter_space + font_info->space_width * font_size;
|
||||
}
|
||||
|
||||
double HTMLRenderer::TextLineBuffer::State::em_size(void) const
|
||||
double TextLineBuffer::State::em_size(void) const
|
||||
{
|
||||
return font_size * (font_info->ascent - font_info->descent);
|
||||
}
|
||||
|
||||
long long HTMLRenderer::TextLineBuffer::State::umask_by_id(int id)
|
||||
long long TextLineBuffer::State::umask_by_id(int id)
|
||||
{
|
||||
return (((long long)0xff) << (8*id));
|
||||
}
|
||||
|
||||
// the order should be the same as in the enum
|
||||
const char * const HTMLRenderer::TextLineBuffer::State::css_class_names [] = {
|
||||
const char * const TextLineBuffer::State::css_class_names [] = {
|
||||
CSS::FONT_FAMILY_CN,
|
||||
CSS::FONT_SIZE_CN,
|
||||
CSS::FILL_COLOR_CN,
|
@ -4,6 +4,10 @@
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include "Param.h"
|
||||
#include "util/StateManager.h"
|
||||
#include "util/HTMLState.h"
|
||||
|
||||
namespace pdf2htmlEX {
|
||||
|
||||
/*
|
||||
@ -13,11 +17,11 @@ namespace pdf2htmlEX {
|
||||
* - State change
|
||||
* within a line
|
||||
*/
|
||||
class HTMLRenderer;
|
||||
class HTMLRenderer::TextLineBuffer
|
||||
class TextLineBuffer
|
||||
{
|
||||
public:
|
||||
TextLineBuffer (HTMLRenderer * renderer) : renderer(renderer) { }
|
||||
TextLineBuffer (const Param * param, AllStateManater & all_manager)
|
||||
: param(param), all_manager(all_manager) { }
|
||||
|
||||
class State : public HTMLState {
|
||||
public:
|
||||
@ -73,12 +77,13 @@ public:
|
||||
void append_unicodes(const Unicode * u, int l);
|
||||
void append_offset(double width);
|
||||
void append_state(const HTMLState & html_state);
|
||||
void flush(void);
|
||||
void flush(std::ostream & out);
|
||||
|
||||
private:
|
||||
void optimize(void);
|
||||
|
||||
HTMLRenderer * renderer;
|
||||
const Param * param;
|
||||
AllStateManater & all_manager;
|
||||
|
||||
double x, y;
|
||||
long long tm_id;
|
Loading…
Reference in New Issue
Block a user