mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-07-05 01:28:39 +00:00
Merge branch 'master' of github.com:coolwanglu/pdf2htmlEX
This commit is contained in:
commit
0268a9d966
|
@ -164,14 +164,21 @@ void HTMLRenderer::TextLineBuffer::flush(void)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
double space_off = stack.back()->single_space_offset();
|
bool done = false;
|
||||||
if(abs(target - space_off) <= renderer->param->h_eps)
|
auto cur_state = stack.back();
|
||||||
|
if(!(cur_state->hash_umask & State::umask_by_id(State::WORD_SPACE_ID)))
|
||||||
{
|
{
|
||||||
Unicode u = ' ';
|
double space_off = cur_state->single_space_offset();
|
||||||
outputUnicodes(out, &u, 1);
|
if(abs(target - space_off) <= renderer->param->h_eps)
|
||||||
actual_offset = space_off;
|
{
|
||||||
|
Unicode u = ' ';
|
||||||
|
outputUnicodes(out, &u, 1);
|
||||||
|
actual_offset = space_off;
|
||||||
|
done = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
if(!done)
|
||||||
{
|
{
|
||||||
auto & wm = renderer->whitespace_manager;
|
auto & wm = renderer->whitespace_manager;
|
||||||
wm.install(target);
|
wm.install(target);
|
||||||
|
@ -183,8 +190,7 @@ void HTMLRenderer::TextLineBuffer::flush(void)
|
||||||
if(is_positive(-actual_offset))
|
if(is_positive(-actual_offset))
|
||||||
last_text_pos_with_negative_offset = cur_text_idx;
|
last_text_pos_with_negative_offset = cur_text_idx;
|
||||||
|
|
||||||
auto * p = stack.back();
|
double threshold = cur_state->draw_font_size * (cur_state->font_info->ascent - cur_state->font_info->descent) * (renderer->param->space_threshold);
|
||||||
double threshold = p->draw_font_size * (p->font_info->ascent - p->font_info->descent) * (renderer->param->space_threshold);
|
|
||||||
|
|
||||||
out << "<span class=\"" << CSS::WHITESPACE_CN
|
out << "<span class=\"" << CSS::WHITESPACE_CN
|
||||||
<< ' ' << CSS::WHITESPACE_CN << wid << "\">" << (target > (threshold - EPS) ? " " : "") << "</span>";
|
<< ' ' << CSS::WHITESPACE_CN << wid << "\">" << (target > (threshold - EPS) ? " " : "") << "</span>";
|
||||||
|
@ -236,13 +242,13 @@ void HTMLRenderer::TextLineBuffer::set_state (State & state)
|
||||||
|
|
||||||
void HTMLRenderer::TextLineBuffer::optimize(void)
|
void HTMLRenderer::TextLineBuffer::optimize(void)
|
||||||
{
|
{
|
||||||
// this function needs more work
|
// need more work
|
||||||
return;
|
return;
|
||||||
|
|
||||||
assert(!states.empty());
|
assert(!states.empty());
|
||||||
|
|
||||||
// set proper hash_umask
|
// set proper hash_umask
|
||||||
long long word_space_umask = ((long long)0xff) << (8*((int)State::WORD_SPACE_ID));
|
long long word_space_umask = State::umask_by_id(State::WORD_SPACE_ID);
|
||||||
for(auto iter = states.begin(); iter != states.end(); ++iter)
|
for(auto iter = states.begin(); iter != states.end(); ++iter)
|
||||||
{
|
{
|
||||||
auto text_iter1 = text.begin() + (iter->start_idx);
|
auto text_iter1 = text.begin() + (iter->start_idx);
|
||||||
|
@ -255,20 +261,6 @@ void HTMLRenderer::TextLineBuffer::optimize(void)
|
||||||
iter->hash_umask |= word_space_umask;
|
iter->hash_umask |= word_space_umask;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// clean zero offsets
|
|
||||||
{
|
|
||||||
auto write_iter = offsets.begin();
|
|
||||||
for(auto iter = offsets.begin(); iter != offsets.end(); ++iter)
|
|
||||||
{
|
|
||||||
if(!equal(iter->width, 0))
|
|
||||||
{
|
|
||||||
*write_iter = *iter;
|
|
||||||
++write_iter;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
offsets.erase(write_iter, offsets.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
// In some PDF files all spaces are converted into positionig shifts
|
// In some PDF files all spaces are converted into positionig shifts
|
||||||
// We may try to change them to ' ' and adjusted word_spaces
|
// We may try to change them to ' ' and adjusted word_spaces
|
||||||
|
@ -358,6 +350,22 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr
|
||||||
// we have to inherit it
|
// we have to inherit it
|
||||||
ids[i] = prev_state->ids[i];
|
ids[i] = prev_state->ids[i];
|
||||||
hash_umask &= (~cur_mask);
|
hash_umask &= (~cur_mask);
|
||||||
|
//copy the corresponding value
|
||||||
|
//TODO: this is so ugly
|
||||||
|
switch(i)
|
||||||
|
{
|
||||||
|
case FONT_SIZE_ID:
|
||||||
|
draw_font_size = prev_state->draw_font_size;
|
||||||
|
break;
|
||||||
|
case LETTER_SPACE_ID:
|
||||||
|
letter_space = prev_state->letter_space;
|
||||||
|
break;
|
||||||
|
case WORD_SPACE_ID:
|
||||||
|
word_space = prev_state->word_space;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
//anyway we don't have to output it
|
//anyway we don't have to output it
|
||||||
continue;
|
continue;
|
||||||
|
@ -437,6 +445,11 @@ double HTMLRenderer::TextLineBuffer::State::single_space_offset(void) const
|
||||||
return word_space + letter_space + font_info->space_width * draw_font_size;
|
return word_space + letter_space + font_info->space_width * draw_font_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
long long HTMLRenderer::TextLineBuffer::State::umask_by_id(int id)
|
||||||
|
{
|
||||||
|
return (((long long)0xff) << (8*id));
|
||||||
|
}
|
||||||
|
|
||||||
// the order should be the same as in the enum
|
// the order should be the same as in the enum
|
||||||
const char * const HTMLRenderer::TextLineBuffer::State::css_class_names [] = {
|
const char * const HTMLRenderer::TextLineBuffer::State::css_class_names [] = {
|
||||||
CSS::FONT_FAMILY_CN,
|
CSS::FONT_FAMILY_CN,
|
||||||
|
|
|
@ -44,6 +44,8 @@ public:
|
||||||
ID_COUNT
|
ID_COUNT
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static long long umask_by_id(int id);
|
||||||
|
|
||||||
long long ids[ID_COUNT];
|
long long ids[ID_COUNT];
|
||||||
|
|
||||||
const FontInfo * font_info;
|
const FontInfo * font_info;
|
||||||
|
|
|
@ -6,11 +6,6 @@
|
||||||
* Copyright (C) 2012,2013 Lu Wang <coolwanglu@gmail.com>
|
* Copyright (C) 2012,2013 Lu Wang <coolwanglu@gmail.com>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
|
||||||
* TODO
|
|
||||||
* optimize lines using nested <span> (reuse classes)
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
|
@ -64,7 +59,6 @@ void HTMLRenderer::updateCharSpace(GfxState * state)
|
||||||
{
|
{
|
||||||
letter_space_changed = true;
|
letter_space_changed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLRenderer::updateWordSpace(GfxState * state)
|
void HTMLRenderer::updateWordSpace(GfxState * state)
|
||||||
{
|
{
|
||||||
word_space_changed = true;
|
word_space_changed = true;
|
||||||
|
@ -356,8 +350,8 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
||||||
// fill color
|
// fill color
|
||||||
if((!(param->fallback)) && (all_changed || fill_color_changed))
|
if((!(param->fallback)) && (all_changed || fill_color_changed))
|
||||||
{
|
{
|
||||||
// * PDF Spec. Table 106 – Text rendering modes
|
// * PDF Spec. Table 106 –Text rendering modes
|
||||||
static const char FILL[8] = { true, false, true, false, true, false, true, false };
|
static const char FILL[8] = { true, false, true, false, true, false, true, false };
|
||||||
|
|
||||||
int idx = state->getRender();
|
int idx = state->getRender();
|
||||||
assert((idx >= 0) && (idx < 8));
|
assert((idx >= 0) && (idx < 8));
|
||||||
|
|
Loading…
Reference in New Issue
Block a user