1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-08-24 20:37:41 +00:00
pdf2htmlEX/src/HTMLTextPage.cc

100 lines
2.0 KiB
C++
Raw Normal View History

/*
* HTMLTextPage.cc
*
* Generate and optimized HTML for one Page
*
* Copyright (C) 2013 Lu Wang <coolwanglu@gmail.com>
*/
2013-04-07 09:01:37 +00:00
#include <algorithm>
#include "HTMLTextPage.h"
namespace pdf2htmlEX {
using std::ostream;
2013-04-07 09:01:37 +00:00
using std::unique_ptr;
2013-04-07 08:10:52 +00:00
HTMLTextPage::HTMLTextPage(const Param & param, AllStateManager & all_manager)
: param(param)
, all_manager(all_manager)
, last_line(nullptr)
{ }
void HTMLTextPage::dump_text(ostream & out)
{
2013-04-07 08:10:52 +00:00
prepare();
for(auto iter = text_lines.begin(); iter != text_lines.end(); ++iter)
(*iter)->dump_text(out);
}
void HTMLTextPage::append_unicodes(const Unicode * u, int l)
{
if(!last_line)
open_new_line();
last_line->append_unicodes(u, l);
}
void HTMLTextPage::append_offset(double offset)
{
if(!last_line)
open_new_line();
last_line->append_offset(offset);
}
void HTMLTextPage::append_state(const HTMLState & state)
{
if(!last_line)
open_new_line();
last_line->append_state(state);
}
void HTMLTextPage::dump_css(ostream & out)
{
//TODO
}
void HTMLTextPage::clear(void)
{
text_lines.clear();
last_line = nullptr;
}
void HTMLTextPage::open_new_line(void)
{
if(last_line && (last_line->empty()))
{
// state and offsets might be nonempty
last_line->clear();
}
else
{
text_lines.emplace_back(new HTMLTextLine(param, all_manager));
last_line = text_lines.back().get();
}
}
2013-04-07 09:01:37 +00:00
static bool is_text_line_empty(const unique_ptr<HTMLTextLine>& p)
{
return p->empty();
}
2013-04-07 08:10:52 +00:00
void HTMLTextPage::prepare(void)
{
2013-04-07 09:01:37 +00:00
// remove empty lines
text_lines.erase(remove_if(text_lines.begin(), text_lines.end(), is_text_line_empty), text_lines.end());
2013-04-07 08:10:52 +00:00
for(auto iter = text_lines.begin(); iter != text_lines.end(); ++iter)
(*iter)->prepare();
if(param.optimize_text)
optimize();
}
void HTMLTextPage::optimize(void)
{
//TODO
2013-04-07 09:11:49 +00:00
//group lines with same x-axis
//collect common states
}
} // namespace pdf2htmlEX