1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-07-05 01:28:39 +00:00

Merge branch 'master' of github.com:coolwanglu/pdf2htmlEX

This commit is contained in:
Lu Wang 2013-03-28 14:59:48 +08:00
commit 0268a9d966
3 changed files with 41 additions and 32 deletions

View File

@ -164,14 +164,21 @@ void HTMLRenderer::TextLineBuffer::flush(void)
} }
else else
{ {
double space_off = stack.back()->single_space_offset(); bool done = false;
if(abs(target - space_off) <= renderer->param->h_eps) auto cur_state = stack.back();
if(!(cur_state->hash_umask & State::umask_by_id(State::WORD_SPACE_ID)))
{ {
Unicode u = ' '; double space_off = cur_state->single_space_offset();
outputUnicodes(out, &u, 1); if(abs(target - space_off) <= renderer->param->h_eps)
actual_offset = space_off; {
Unicode u = ' ';
outputUnicodes(out, &u, 1);
actual_offset = space_off;
done = true;
}
} }
else
if(!done)
{ {
auto & wm = renderer->whitespace_manager; auto & wm = renderer->whitespace_manager;
wm.install(target); wm.install(target);
@ -183,8 +190,7 @@ void HTMLRenderer::TextLineBuffer::flush(void)
if(is_positive(-actual_offset)) if(is_positive(-actual_offset))
last_text_pos_with_negative_offset = cur_text_idx; last_text_pos_with_negative_offset = cur_text_idx;
auto * p = stack.back(); double threshold = cur_state->draw_font_size * (cur_state->font_info->ascent - cur_state->font_info->descent) * (renderer->param->space_threshold);
double threshold = p->draw_font_size * (p->font_info->ascent - p->font_info->descent) * (renderer->param->space_threshold);
out << "<span class=\"" << CSS::WHITESPACE_CN out << "<span class=\"" << CSS::WHITESPACE_CN
<< ' ' << CSS::WHITESPACE_CN << wid << "\">" << (target > (threshold - EPS) ? " " : "") << "</span>"; << ' ' << CSS::WHITESPACE_CN << wid << "\">" << (target > (threshold - EPS) ? " " : "") << "</span>";
@ -236,13 +242,13 @@ void HTMLRenderer::TextLineBuffer::set_state (State & state)
void HTMLRenderer::TextLineBuffer::optimize(void) void HTMLRenderer::TextLineBuffer::optimize(void)
{ {
// this function needs more work // need more work
return; return;
assert(!states.empty()); assert(!states.empty());
// set proper hash_umask // set proper hash_umask
long long word_space_umask = ((long long)0xff) << (8*((int)State::WORD_SPACE_ID)); long long word_space_umask = State::umask_by_id(State::WORD_SPACE_ID);
for(auto iter = states.begin(); iter != states.end(); ++iter) for(auto iter = states.begin(); iter != states.end(); ++iter)
{ {
auto text_iter1 = text.begin() + (iter->start_idx); auto text_iter1 = text.begin() + (iter->start_idx);
@ -255,20 +261,6 @@ void HTMLRenderer::TextLineBuffer::optimize(void)
iter->hash_umask |= word_space_umask; iter->hash_umask |= word_space_umask;
} }
} }
// clean zero offsets
{
auto write_iter = offsets.begin();
for(auto iter = offsets.begin(); iter != offsets.end(); ++iter)
{
if(!equal(iter->width, 0))
{
*write_iter = *iter;
++write_iter;
}
}
offsets.erase(write_iter, offsets.end());
}
// In some PDF files all spaces are converted into positionig shifts // In some PDF files all spaces are converted into positionig shifts
// We may try to change them to ' ' and adjusted word_spaces // We may try to change them to ' ' and adjusted word_spaces
@ -358,6 +350,22 @@ void HTMLRenderer::TextLineBuffer::State::begin (ostream & out, const State * pr
// we have to inherit it // we have to inherit it
ids[i] = prev_state->ids[i]; ids[i] = prev_state->ids[i];
hash_umask &= (~cur_mask); hash_umask &= (~cur_mask);
//copy the corresponding value
//TODO: this is so ugly
switch(i)
{
case FONT_SIZE_ID:
draw_font_size = prev_state->draw_font_size;
break;
case LETTER_SPACE_ID:
letter_space = prev_state->letter_space;
break;
case WORD_SPACE_ID:
word_space = prev_state->word_space;
break;
default:
break;
}
} }
//anyway we don't have to output it //anyway we don't have to output it
continue; continue;
@ -437,6 +445,11 @@ double HTMLRenderer::TextLineBuffer::State::single_space_offset(void) const
return word_space + letter_space + font_info->space_width * draw_font_size; return word_space + letter_space + font_info->space_width * draw_font_size;
} }
long long HTMLRenderer::TextLineBuffer::State::umask_by_id(int id)
{
return (((long long)0xff) << (8*id));
}
// the order should be the same as in the enum // the order should be the same as in the enum
const char * const HTMLRenderer::TextLineBuffer::State::css_class_names [] = { const char * const HTMLRenderer::TextLineBuffer::State::css_class_names [] = {
CSS::FONT_FAMILY_CN, CSS::FONT_FAMILY_CN,

View File

@ -44,6 +44,8 @@ public:
ID_COUNT ID_COUNT
}; };
static long long umask_by_id(int id);
long long ids[ID_COUNT]; long long ids[ID_COUNT];
const FontInfo * font_info; const FontInfo * font_info;

View File

@ -6,11 +6,6 @@
* Copyright (C) 2012,2013 Lu Wang <coolwanglu@gmail.com> * Copyright (C) 2012,2013 Lu Wang <coolwanglu@gmail.com>
*/ */
/*
* TODO
* optimize lines using nested <span> (reuse classes)
*/
#include <cmath> #include <cmath>
#include <algorithm> #include <algorithm>
@ -64,7 +59,6 @@ void HTMLRenderer::updateCharSpace(GfxState * state)
{ {
letter_space_changed = true; letter_space_changed = true;
} }
void HTMLRenderer::updateWordSpace(GfxState * state) void HTMLRenderer::updateWordSpace(GfxState * state)
{ {
word_space_changed = true; word_space_changed = true;
@ -356,8 +350,8 @@ void HTMLRenderer::check_state_change(GfxState * state)
// fill color // fill color
if((!(param->fallback)) && (all_changed || fill_color_changed)) if((!(param->fallback)) && (all_changed || fill_color_changed))
{ {
// * PDF Spec. Table 106 Text rendering modes // * PDF Spec. Table 106 Text rendering modes
static const char FILL[8] = { true, false, true, false, true, false, true, false }; static const char FILL[8] = { true, false, true, false, true, false, true, false };
int idx = state->getRender(); int idx = state->getRender();
assert((idx >= 0) && (idx < 8)); assert((idx >= 0) && (idx < 8));