2012-12-11 12:48:10 +00:00
|
|
|
#ifndef TEXTLINEBUFFER_H__
|
|
|
|
#define TEXTLINEBUFFER_H__
|
|
|
|
|
|
|
|
#include <iostream>
|
|
|
|
#include <vector>
|
|
|
|
|
2013-04-06 08:45:01 +00:00
|
|
|
#include <CharTypes.h>
|
|
|
|
|
2013-04-06 08:32:31 +00:00
|
|
|
#include "Param.h"
|
2013-04-06 08:45:01 +00:00
|
|
|
#include "StateManager.h"
|
|
|
|
#include "HTMLState.h"
|
2013-04-06 08:32:31 +00:00
|
|
|
|
2012-12-11 12:48:10 +00:00
|
|
|
namespace pdf2htmlEX {
|
|
|
|
|
2013-01-19 11:19:15 +00:00
|
|
|
/*
|
|
|
|
* Store a series of
|
|
|
|
* - Text
|
|
|
|
* - Shift
|
|
|
|
* - State change
|
|
|
|
* within a line
|
|
|
|
*/
|
2013-04-06 08:32:31 +00:00
|
|
|
class TextLineBuffer
|
2012-12-11 12:48:10 +00:00
|
|
|
{
|
|
|
|
public:
|
2013-04-06 09:01:05 +00:00
|
|
|
TextLineBuffer (const Param & param, AllStateManater & all_manager)
|
2013-04-06 08:32:31 +00:00
|
|
|
: param(param), all_manager(all_manager) { }
|
2012-12-11 12:48:10 +00:00
|
|
|
|
2013-04-04 14:57:50 +00:00
|
|
|
class State : public HTMLState {
|
2013-03-30 17:00:04 +00:00
|
|
|
public:
|
|
|
|
// before output
|
|
|
|
void begin(std::ostream & out, const State * prev_state);
|
|
|
|
// after output
|
|
|
|
void end(std::ostream & out) const;
|
|
|
|
// calculate the hash code
|
|
|
|
void hash(void);
|
|
|
|
// calculate the difference between another State
|
|
|
|
int diff(const State & s) const;
|
|
|
|
|
|
|
|
enum {
|
|
|
|
FONT_ID,
|
|
|
|
FONT_SIZE_ID,
|
|
|
|
FILL_COLOR_ID,
|
|
|
|
STROKE_COLOR_ID,
|
|
|
|
LETTER_SPACE_ID,
|
|
|
|
WORD_SPACE_ID,
|
2013-04-05 13:53:34 +00:00
|
|
|
HASH_ID_COUNT,
|
2013-03-30 17:00:04 +00:00
|
|
|
|
2013-04-05 13:53:34 +00:00
|
|
|
VERTICAL_ALIGN_ID = HASH_ID_COUNT,
|
2013-03-30 17:00:04 +00:00
|
|
|
ID_COUNT
|
|
|
|
};
|
|
|
|
|
|
|
|
static long long umask_by_id(int id);
|
|
|
|
|
|
|
|
long long ids[ID_COUNT];
|
|
|
|
|
|
|
|
size_t start_idx; // index of the first Text using this state
|
|
|
|
// for optimzation
|
|
|
|
long long hash_value;
|
|
|
|
long long hash_umask; // some states may not be actually used
|
|
|
|
bool need_close;
|
|
|
|
|
|
|
|
static const char * const css_class_names []; // class names for each id
|
2012-12-11 12:48:10 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
class Offset {
|
2013-03-30 17:00:04 +00:00
|
|
|
public:
|
2013-04-03 17:35:44 +00:00
|
|
|
Offset(size_t size_idx, double width)
|
|
|
|
:start_idx(size_idx),width(width)
|
|
|
|
{ }
|
2013-03-30 17:00:04 +00:00
|
|
|
size_t start_idx; // should put this Offset right before text[start_idx];
|
|
|
|
double width;
|
2012-12-11 12:48:10 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
void append_unicodes(const Unicode * u, int l);
|
|
|
|
void append_offset(double width);
|
2013-04-04 13:19:28 +00:00
|
|
|
void append_state(const HTMLState & html_state);
|
2013-04-06 08:32:31 +00:00
|
|
|
void flush(std::ostream & out);
|
2012-12-11 12:48:10 +00:00
|
|
|
|
|
|
|
private:
|
2013-03-20 15:46:58 +00:00
|
|
|
void optimize(void);
|
|
|
|
|
2013-04-06 09:01:05 +00:00
|
|
|
const Param & param;
|
2013-04-06 08:32:31 +00:00
|
|
|
AllStateManater & all_manager;
|
2012-12-11 12:48:10 +00:00
|
|
|
|
|
|
|
double x, y;
|
|
|
|
long long tm_id;
|
|
|
|
|
|
|
|
std::vector<State> states;
|
|
|
|
std::vector<Offset> offsets;
|
|
|
|
std::vector<Unicode> text;
|
|
|
|
|
|
|
|
// for flush
|
|
|
|
std::vector<State*> stack;
|
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace pdf2htmlEX
|
|
|
|
#endif //TEXTLINEBUFFER_H__
|