1
0
mirror of https://github.com/pdf2htmlEX/pdf2htmlEX.git synced 2024-12-22 13:00:08 +00:00

working on line merging

This commit is contained in:
Lu Wang 2013-03-24 17:18:13 +08:00
parent b337163361
commit 90d99d5000
4 changed files with 36 additions and 25 deletions

View File

@ -262,7 +262,6 @@ protected:
double cur_tx, cur_ty; // real text position, in text coords
double cur_font_size;
// this is CTM * TextMAT in PDF
// [4] and [5] are ignored,
// as we'll calculate the position of the origin separately
double cur_text_tm[6]; // unscaled

View File

@ -157,7 +157,11 @@ void HTMLRenderer::TextLineBuffer::flush(void)
{
double target = cur_offset_iter->width + dx;
if(equal(target, stack.back()->single_space_offset()))
if(equal(target, 0))
{
dx = 0;
}
else if(equal(target, stack.back()->single_space_offset()))
{
Unicode u = ' ';
outputUnicodes(out, &u, 1);

View File

@ -187,8 +187,8 @@ void HTMLRenderer::check_state_change(GfxState * state)
}
// backup the current ctm for need_recheck_position
double old_ctm[6];
memcpy(old_ctm, cur_text_tm, sizeof(old_ctm));
double old_tm[6];
memcpy(old_tm, cur_text_tm, sizeof(old_tm));
// ctm & text ctm & hori scale
if(all_changed || ctm_changed || text_mat_changed || hori_scale_changed)
@ -269,51 +269,54 @@ void HTMLRenderer::check_state_change(GfxState * state)
}
}
// see if we can merge with the current line
// see if the new line is compatible with the current line with proper position shift
// depends: rise & text position & transformation
if(need_recheck_position)
{
// try to transform the old origin under the new TM
/*
* OldTM * (draw_tx, draw_ty, 1)^T = CurTM * (draw_tx + dx, draw_ty + dy, 1)^T
* CurTM * (cur_tx, cur_ty, 1)^T = OldTM * (draw_tx + dx, draw_ty + dy, 1)^T
*
* OldTM[4] = CurTM[0] * dx + CurTM[2] * dy + CurTM[4]
* OldTM[5] = CurTM[1] * dx + CurTM[3] * dy + CurTM[5]
* the first 4 elements of CurTM and OldTM should be the same
* otherwise the following text cannot be parallel
*
* We just care if we can map the origin y to the same new y
* So just let dy = cur_y - old_y, and try to solve dx
* CurTM[4] - OldTM[4] = OldTM[0] * (draw_tx + dx - cur_tx) + OldTM[2] * (draw_ty + dy - cur_ty)
* CurTM[5] - OldTM[5] = OldTM[1] * (draw_tx + dx - cur_tx) + OldTM[3] * (draw_ty + dy - cur_ty)
*
* For horizontal text, set dy = 0, and try to solve dx
* If dx can be solved, we can simply append a x-offset without creating a new line
*
* TODO, writing mode, set dx and solve dy
*/
bool merged = false;
if(tm_equal(old_ctm, cur_text_tm, 4))
double dx = 0;
if(tm_equal(old_tm, cur_text_tm, 4))
{
double dy = cur_ty - draw_ty;
double tdx = old_ctm[4] - cur_text_tm[4] - cur_text_tm[2] * dy;
double tdy = old_ctm[5] - cur_text_tm[5] - cur_text_tm[3] * dy;
double lhs1 = cur_text_tm[4] - old_tm[4] - old_tm[2] * (draw_ty - cur_ty) - old_tm[0] * (draw_tx - cur_tx);
double lhs2 = cur_text_tm[5] - old_tm[5] - old_tm[3] * (draw_ty - cur_ty) - old_tm[0] * (draw_tx - cur_tx);
if(equal(cur_text_tm[0] * tdy, cur_text_tm[1] * tdx))
if(equal(old_tm[0] * lhs2, old_tm[1] * lhs1))
{
if(is_positive(cur_text_tm[0]))
if(!equal(old_tm[0], 0))
{
draw_tx += tdx / cur_text_tm[0];
draw_ty += dy;
dx = lhs1 / old_tm[0];
draw_tx += dx;
merged = true;
}
else if (is_positive(cur_text_tm[1]))
else if (!equal(old_tm[1], 0))
{
draw_tx += tdy / cur_text_tm[1];
draw_ty += dy;
dx = lhs2 / old_tm[1];
draw_tx += dx;
merged = true;
}
else
{
if((equal(tdx,0)) && (equal(tdy,0)))
if((equal(lhs1,0)) && (equal(lhs2,0)))
{
// free
dx = 0;
draw_tx = cur_tx;
draw_ty += dy;
merged = true;
}
// else fail
@ -323,7 +326,12 @@ void HTMLRenderer::check_state_change(GfxState * state)
}
// else force new line
if(!merged)
if(merged)
{
text_line_buf->append_offset(dx * draw_text_scale);
draw_ty = cur_ty;
}
else
{
new_line_state = max<NewLineState>(new_line_state, NLS_DIV);
}

View File

@ -13,7 +13,7 @@ with open('out.html','w') as outf:
if not f.lower().endswith('.pdf'):
continue
print f
if os.system('pdf2htmlEX -l 7 --fit-width 1024 --dest-dir html --auto-hint=1 --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) != 0:
if os.system('pdf2htmlEX -l 10 --no-drm 1 --fit-width 1024 --dest-dir html --auto-hint=1 --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) != 0:
print "error on ", f
sys.exit(-1)