mirror of
https://github.com/pdf2htmlEX/pdf2htmlEX.git
synced 2024-12-22 13:00:08 +00:00
working on line merging
This commit is contained in:
parent
b337163361
commit
90d99d5000
@ -262,7 +262,6 @@ protected:
|
|||||||
double cur_tx, cur_ty; // real text position, in text coords
|
double cur_tx, cur_ty; // real text position, in text coords
|
||||||
double cur_font_size;
|
double cur_font_size;
|
||||||
// this is CTM * TextMAT in PDF
|
// this is CTM * TextMAT in PDF
|
||||||
// [4] and [5] are ignored,
|
|
||||||
// as we'll calculate the position of the origin separately
|
// as we'll calculate the position of the origin separately
|
||||||
double cur_text_tm[6]; // unscaled
|
double cur_text_tm[6]; // unscaled
|
||||||
|
|
||||||
|
@ -157,7 +157,11 @@ void HTMLRenderer::TextLineBuffer::flush(void)
|
|||||||
{
|
{
|
||||||
double target = cur_offset_iter->width + dx;
|
double target = cur_offset_iter->width + dx;
|
||||||
|
|
||||||
if(equal(target, stack.back()->single_space_offset()))
|
if(equal(target, 0))
|
||||||
|
{
|
||||||
|
dx = 0;
|
||||||
|
}
|
||||||
|
else if(equal(target, stack.back()->single_space_offset()))
|
||||||
{
|
{
|
||||||
Unicode u = ' ';
|
Unicode u = ' ';
|
||||||
outputUnicodes(out, &u, 1);
|
outputUnicodes(out, &u, 1);
|
||||||
|
@ -187,8 +187,8 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// backup the current ctm for need_recheck_position
|
// backup the current ctm for need_recheck_position
|
||||||
double old_ctm[6];
|
double old_tm[6];
|
||||||
memcpy(old_ctm, cur_text_tm, sizeof(old_ctm));
|
memcpy(old_tm, cur_text_tm, sizeof(old_tm));
|
||||||
|
|
||||||
// ctm & text ctm & hori scale
|
// ctm & text ctm & hori scale
|
||||||
if(all_changed || ctm_changed || text_mat_changed || hori_scale_changed)
|
if(all_changed || ctm_changed || text_mat_changed || hori_scale_changed)
|
||||||
@ -269,51 +269,54 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// see if we can merge with the current line
|
// see if the new line is compatible with the current line with proper position shift
|
||||||
// depends: rise & text position & transformation
|
// depends: rise & text position & transformation
|
||||||
if(need_recheck_position)
|
if(need_recheck_position)
|
||||||
{
|
{
|
||||||
// try to transform the old origin under the new TM
|
// try to transform the old origin under the new TM
|
||||||
/*
|
/*
|
||||||
* OldTM * (draw_tx, draw_ty, 1)^T = CurTM * (draw_tx + dx, draw_ty + dy, 1)^T
|
* CurTM * (cur_tx, cur_ty, 1)^T = OldTM * (draw_tx + dx, draw_ty + dy, 1)^T
|
||||||
*
|
*
|
||||||
* OldTM[4] = CurTM[0] * dx + CurTM[2] * dy + CurTM[4]
|
* the first 4 elements of CurTM and OldTM should be the same
|
||||||
* OldTM[5] = CurTM[1] * dx + CurTM[3] * dy + CurTM[5]
|
* otherwise the following text cannot be parallel
|
||||||
*
|
*
|
||||||
* We just care if we can map the origin y to the same new y
|
* CurTM[4] - OldTM[4] = OldTM[0] * (draw_tx + dx - cur_tx) + OldTM[2] * (draw_ty + dy - cur_ty)
|
||||||
* So just let dy = cur_y - old_y, and try to solve dx
|
* CurTM[5] - OldTM[5] = OldTM[1] * (draw_tx + dx - cur_tx) + OldTM[3] * (draw_ty + dy - cur_ty)
|
||||||
|
*
|
||||||
|
* For horizontal text, set dy = 0, and try to solve dx
|
||||||
|
* If dx can be solved, we can simply append a x-offset without creating a new line
|
||||||
*
|
*
|
||||||
* TODO, writing mode, set dx and solve dy
|
* TODO, writing mode, set dx and solve dy
|
||||||
*/
|
*/
|
||||||
|
|
||||||
bool merged = false;
|
bool merged = false;
|
||||||
if(tm_equal(old_ctm, cur_text_tm, 4))
|
double dx = 0;
|
||||||
|
if(tm_equal(old_tm, cur_text_tm, 4))
|
||||||
{
|
{
|
||||||
double dy = cur_ty - draw_ty;
|
double lhs1 = cur_text_tm[4] - old_tm[4] - old_tm[2] * (draw_ty - cur_ty) - old_tm[0] * (draw_tx - cur_tx);
|
||||||
double tdx = old_ctm[4] - cur_text_tm[4] - cur_text_tm[2] * dy;
|
double lhs2 = cur_text_tm[5] - old_tm[5] - old_tm[3] * (draw_ty - cur_ty) - old_tm[0] * (draw_tx - cur_tx);
|
||||||
double tdy = old_ctm[5] - cur_text_tm[5] - cur_text_tm[3] * dy;
|
|
||||||
|
|
||||||
if(equal(cur_text_tm[0] * tdy, cur_text_tm[1] * tdx))
|
if(equal(old_tm[0] * lhs2, old_tm[1] * lhs1))
|
||||||
{
|
{
|
||||||
if(is_positive(cur_text_tm[0]))
|
if(!equal(old_tm[0], 0))
|
||||||
{
|
{
|
||||||
draw_tx += tdx / cur_text_tm[0];
|
dx = lhs1 / old_tm[0];
|
||||||
draw_ty += dy;
|
draw_tx += dx;
|
||||||
merged = true;
|
merged = true;
|
||||||
}
|
}
|
||||||
else if (is_positive(cur_text_tm[1]))
|
else if (!equal(old_tm[1], 0))
|
||||||
{
|
{
|
||||||
draw_tx += tdy / cur_text_tm[1];
|
dx = lhs2 / old_tm[1];
|
||||||
draw_ty += dy;
|
draw_tx += dx;
|
||||||
merged = true;
|
merged = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if((equal(tdx,0)) && (equal(tdy,0)))
|
if((equal(lhs1,0)) && (equal(lhs2,0)))
|
||||||
{
|
{
|
||||||
// free
|
// free
|
||||||
|
dx = 0;
|
||||||
draw_tx = cur_tx;
|
draw_tx = cur_tx;
|
||||||
draw_ty += dy;
|
|
||||||
merged = true;
|
merged = true;
|
||||||
}
|
}
|
||||||
// else fail
|
// else fail
|
||||||
@ -323,7 +326,12 @@ void HTMLRenderer::check_state_change(GfxState * state)
|
|||||||
}
|
}
|
||||||
// else force new line
|
// else force new line
|
||||||
|
|
||||||
if(!merged)
|
if(merged)
|
||||||
|
{
|
||||||
|
text_line_buf->append_offset(dx * draw_text_scale);
|
||||||
|
draw_ty = cur_ty;
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
new_line_state = max<NewLineState>(new_line_state, NLS_DIV);
|
new_line_state = max<NewLineState>(new_line_state, NLS_DIV);
|
||||||
}
|
}
|
||||||
|
@ -13,7 +13,7 @@ with open('out.html','w') as outf:
|
|||||||
if not f.lower().endswith('.pdf'):
|
if not f.lower().endswith('.pdf'):
|
||||||
continue
|
continue
|
||||||
print f
|
print f
|
||||||
if os.system('pdf2htmlEX -l 7 --fit-width 1024 --dest-dir html --auto-hint=1 --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) != 0:
|
if os.system('pdf2htmlEX -l 10 --no-drm 1 --fit-width 1024 --dest-dir html --auto-hint=1 --external-hint-tool="ttfautohint" "%s/%s"' % (DIR,f)) != 0:
|
||||||
print "error on ", f
|
print "error on ", f
|
||||||
sys.exit(-1)
|
sys.exit(-1)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user