pdf2htmlEX/src/HTMLTextPage.cc

148 lines
3.7 KiB
C++
Raw Normal View History

/*
* HTMLTextPage.cc
*
* Generate and optimized HTML for one Page
*
* Copyright (C) 2013 Lu Wang <coolwanglu@gmail.com>
*/
#include "HTMLTextPage.h"
2013-05-04 11:26:26 +00:00
#include "util/css_const.h"
namespace pdf2htmlEX {
using std::ostream;
2013-04-07 08:10:52 +00:00
HTMLTextPage::HTMLTextPage(const Param & param, AllStateManager & all_manager)
: param(param)
, all_manager(all_manager)
, cur_line(nullptr)
2013-05-04 11:26:26 +00:00
, page_width(0)
, page_height(0)
{ }
2013-12-22 08:59:59 +00:00
HTMLTextPage::~HTMLTextPage()
{
2014-11-16 14:04:02 +00:00
for(auto p : text_lines)
delete p;
2013-12-22 08:59:59 +00:00
}
void HTMLTextPage::dump_text(ostream & out)
{
2013-12-22 08:59:59 +00:00
if(param.optimize_text)
{
// text lines may be split during optimization, collect them
std::vector<HTMLTextLine*> new_text_lines;
2014-11-16 14:04:02 +00:00
for(auto p : text_lines)
p->optimize(new_text_lines);
2013-12-22 08:59:59 +00:00
std::swap(text_lines, new_text_lines);
}
2014-11-16 14:04:02 +00:00
for(auto p : text_lines)
p->prepare();
2013-04-07 09:14:51 +00:00
if(param.optimize_text)
optimize();
2013-05-04 11:26:26 +00:00
2013-05-04 13:21:29 +00:00
HTMLClipState page_box;
page_box.xmin = page_box.ymin = 0;
page_box.xmax = page_width;
page_box.ymax = page_height;
2013-05-04 11:26:26 +00:00
//push a dummy entry for convenience
2013-05-04 13:21:29 +00:00
clips.emplace_back(page_box, text_lines.size());
2013-05-04 11:26:26 +00:00
2013-05-04 13:21:29 +00:00
Clip cur_clip(page_box, 0);
2013-05-04 11:26:26 +00:00
bool has_clip = false;
auto text_line_iter = text_lines.begin();
2013-05-04 13:17:35 +00:00
for(auto clip_iter = clips.begin(); clip_iter != clips.end(); ++clip_iter)
2013-05-04 11:26:26 +00:00
{
auto next_text_line_iter = text_lines.begin() + clip_iter->start_idx;
2013-05-04 13:17:35 +00:00
if(text_line_iter != next_text_line_iter)
2013-05-04 11:26:26 +00:00
{
2013-05-04 13:17:35 +00:00
const auto & cs = cur_clip.clip_state;
if(has_clip)
{
out << "<div class=\"" << CSS::CLIP_CN
<< " " << CSS::LEFT_CN << all_manager.left.install(cs.xmin)
<< " " << CSS::BOTTOM_CN << all_manager.bottom.install(cs.ymin)
<< " " << CSS::WIDTH_CN << all_manager.width.install(cs.xmax - cs.xmin)
<< " " << CSS::HEIGHT_CN << all_manager.height.install(cs.ymax - cs.ymin)
<< "\">";
}
while(text_line_iter != next_text_line_iter)
{
if(has_clip)
{
(*text_line_iter)->clip(cs);
}
(*text_line_iter)->dump_text(out);
++text_line_iter;
}
if(has_clip)
{
out << "</div>";
}
2013-05-04 11:26:26 +00:00
}
2013-05-04 13:17:35 +00:00
2013-05-04 11:26:26 +00:00
{
2013-05-04 13:17:35 +00:00
cur_clip = *clip_iter;
const auto & cs = cur_clip.clip_state;
has_clip = !(equal(0, cs.xmin) && equal(0, cs.ymin)
&& equal(page_width, cs.xmax) && equal(page_height, cs.ymax));
2013-05-04 11:26:26 +00:00
}
}
}
void HTMLTextPage::dump_css(ostream & out)
{
//TODO
}
void HTMLTextPage::clear(void)
{
text_lines.clear();
2013-05-04 13:17:35 +00:00
clips.clear();
cur_line = nullptr;
}
void HTMLTextPage::open_new_line(const HTMLLineState & line_state)
{
// do not reused the last text_line even if it's empty
// because the clip states may point to the next index
text_lines.emplace_back(new HTMLTextLine(line_state, param, all_manager));
2013-12-22 08:59:59 +00:00
cur_line = text_lines.back();
}
2013-05-04 11:26:26 +00:00
void HTMLTextPage::set_page_size(double width, double height)
{
page_width = width;
page_height = height;
}
2013-05-04 13:17:35 +00:00
void HTMLTextPage::clip(const HTMLClipState & clip_state)
2013-05-04 11:26:26 +00:00
{
2013-05-04 13:17:35 +00:00
if(!clips.empty())
2013-05-04 11:26:26 +00:00
{
2013-05-04 13:17:35 +00:00
auto & clip = clips.back();
if(clip.start_idx == text_lines.size())
2013-05-04 11:26:26 +00:00
{
/*
* Previous ClipBox is not used
*/
2013-05-04 13:17:35 +00:00
clip.clip_state = clip_state;
2013-05-04 11:26:26 +00:00
return;
}
}
2013-05-04 13:17:35 +00:00
clips.emplace_back(clip_state, text_lines.size());
2013-05-04 11:26:26 +00:00
}
void HTMLTextPage::optimize(void)
{
//TODO
2013-04-07 09:11:49 +00:00
//group lines with same x-axis
//collect common states
}
} // namespace pdf2htmlEX