1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 13:00:08 +00:00

working on line merging

This commit is contained in:
Lu Wang 2013-03-24 17:18:13 +08:00
parent b337163361
commit 90d99d5000
4 changed files with 36 additions and 25 deletions

View File

@ -262,7 +262,6 @@ protected:
double cur_tx, cur_ty; // real text position, in text coords double cur_tx, cur_ty; // real text position, in text coords
double cur_font_size; double cur_font_size;
// this is CTM * TextMAT in PDF // this is CTM * TextMAT in PDF
// [4] and [5] are ignored,
// as we'll calculate the position of the origin separately // as we'll calculate the position of the origin separately
double cur_text_tm[6]; // unscaled double cur_text_tm[6]; // unscaled

View File

@ -157,7 +157,11 @@ void HTMLRenderer::TextLineBuffer::flush(void)
{ {
double target = cur_offset_iter->width + dx; double target = cur_offset_iter->width + dx;
if(equal(target, stack.back()->single_space_offset())) if(equal(target, 0))
{
dx = 0;
}
else if(equal(target, stack.back()->single_space_offset()))
{ {
Unicode u = ' '; Unicode u = ' ';
outputUnicodes(out, &u, 1); outputUnicodes(out, &u, 1);

View File

@ -187,8 +187,8 @@ void HTMLRenderer::check_state_change(GfxState * state)
} }
// backup the current ctm for need_recheck_position // backup the current ctm for need_recheck_position
double old_ctm[6]; double old_tm[6];
memcpy(old_ctm, cur_text_tm, sizeof(old_ctm)); memcpy(old_tm, cur_text_tm, sizeof(old_tm));
// ctm & text ctm & hori scale // ctm & text ctm & hori scale
if(all_changed || ctm_changed || text_mat_changed || hori_scale_changed) if(all_changed || ctm_changed || text_mat_changed || hori_scale_changed)
@ -269,51 +269,54 @@ void HTMLRenderer::check_state_change(GfxState * state)
} }
} }
// see if we can merge with the current line // see if the new line is compatible with the current line with proper position shift
// depends: rise & text position & transformation // depends: rise & text position & transformation
if(need_recheck_position) if(need_recheck_position)
{ {
// try to transform the old origin under the new TM // try to transform the old origin under the new TM
/* /*
* OldTM * (draw_tx, draw_ty, 1)^T = CurTM * (draw_tx + dx, draw_ty + dy, 1)^T * CurTM * (cur_tx, cur_ty, 1)^T = OldTM * (draw_tx + dx, draw_ty + dy, 1)^T
* *
* OldTM[4] = CurTM[0] * dx + CurTM[2] * dy + CurTM[4] * the first 4 elements of CurTM and OldTM should be the same
* OldTM[5] = CurTM[1] * dx + CurTM[3] * dy + CurTM[5] * otherwise the following text cannot be parallel
* *
* We just care if we can map the origin y to the same new y * CurTM[4] - OldTM[4] = OldTM[0] * (draw_tx + dx - cur_tx) + OldTM[2] * (draw_ty + dy - cur_ty)
* So just let dy = cur_y - old_y, and try to solve dx * CurTM[5] - OldTM[5] = OldTM[1] * (draw_tx + dx - cur_tx) + OldTM[3] * (draw_ty + dy - cur_ty)
*
* For horizontal text, set dy = 0, and try to solve dx
* If dx can be solved, we can simply append a x-offset without creating a new line
* *
* TODO, writing mode, set dx and solve dy * TODO, writing mode, set dx and solve dy
*/ */
bool merged = false; bool merged = false;
if(tm_equal(old_ctm, cur_text_tm, 4)) double dx = 0;
if(tm_equal(old_tm, cur_text_tm, 4))
{ {
double dy = cur_ty - draw_ty; double lhs1 = cur_text_tm[4] - old_tm[4] - old_tm[2] * (draw_ty - cur_ty) - old_tm[0] * (draw_tx - cur_tx);
double tdx = old_ctm[4] - cur_text_tm[4] - cur_text_tm[2] * dy; double lhs2 = cur_text_tm[5] - old_tm[5] - old_tm[3] * (draw_ty - cur_ty) - old_tm[0] * (draw_tx - cur_tx);
double tdy = old_ctm[5] - cur_text_tm[5] - cur_text_tm[3] * dy;
if(equal(cur_text_tm[0] * tdy, cur_text_tm[1] * tdx)) if(equal(old_tm[0] * lhs2, old_tm[1] * lhs1))
{ {
if(is_positive(cur_text_tm[0])) if(!equal(old_tm[0], 0))
{ {
draw_tx += tdx / cur_text_tm[0]; dx = lhs1 / old_tm[0];
draw_ty += dy; draw_tx += dx;
merged = true; merged = true;
} }
else if (is_positive(cur_text_tm[1])) else if (!equal(old_tm[1], 0))
{ {
draw_tx += tdy / cur_text_tm[1]; dx = lhs2 / old_tm[1];
draw_ty += dy; draw_tx += dx;
merged = true; merged = true;
} }
else else
{ {
if((equal(tdx,0)) && (equal(tdy,0))) if((equal(lhs1,0)) && (equal(lhs2,0)))
{ {
// free // free
dx = 0;
draw_tx = cur_tx; draw_tx = cur_tx;
draw_ty += dy;
merged = true; merged = true;
} }
// else fail // else fail
@ -323,7 +326,12 @@ void HTMLRenderer::check_state_change(GfxState * state)
} }
// else force new line // else force new line
if(!merged) if(merged)
{
text_line_buf->append_offset(dx * draw_text_scale);
draw_ty = cur_ty;
}
else
{ {
new_line_state = max<NewLineState>(new_line_state, NLS_DIV); new_line_state = max<NewLineState>(new_line_state, NLS_DIV);
} }

View File

@ -13,7 +13,7 @@ with open('out.html','w') as outf:
if not f.lower().endswith('.pdf'): if not f.lower().endswith('.pdf'):
continue continue
print f print f
if os.system('pdf2htmlEX -l 7 --fit-width 1024 --dest-dir html --auto-hint=1 --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) != 0: if os.system('pdf2htmlEX -l 10 --no-drm 1 --fit-width 1024 --dest-dir html --auto-hint=1 --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) != 0:
print "error on ", f print "error on ", f
sys.exit(-1) sys.exit(-1)