This commit is contained in:
Lu Wang 2012-08-28 01:57:39 +08:00
parent ec989f5b4f
commit f083f662ad
2 changed files with 6 additions and 4 deletions

2
debian/control vendored
View File

@ -8,6 +8,6 @@ Homepage: http://github.com/coolwanglu/pdf2htmlEX
Package: pdf2htmlex
Architecture: any
Depends: libpoppler (>= 0.20.3), fontforge, libboost-filesystem1.46.1, libboost-progam-options1.46.1,
Depends: libpoppler (>= 0.20.3), fontforge, libboost-filesystem-dev, libboost-program-options-dev
Description: Converts PDF to HTML without losing format
pdf2htmlEX converts PDF to HTML while retaining text, format & style as much as possible

View File

@ -1,16 +1,18 @@
#!/usr/bin/env python
DIR = 'pdf'
import os
outf = open('out.html','w')
outf.write('<html><body><div style="position:absolute;top:0;left:0;width:80%;height:100%;"><iframe width="100%" height="100%" name="pdf"></iframe></div><div style="position:absolute;top:0;right:0;width:20%;height:100%;text-align:right;">')
outf.write('<html><body><div style="position:absolute;top:0;left:0;width:80%;height:100%;"><iframe width="100%" height="100%" name="pdf"></iframe></div><div style="position:absolute;top:0;right:0;width:20%;height:100%;overflow:auto;text-align:right;">')
for f in os.listdir('pdf'):
for f in os.listdir(DIR):
if not f.lower().endswith('.pdf'):
continue
print f
os.system('pdf2htmlEX -l 3 --dest-dir html pdf/%s' % (f,))
os.system('pdf2htmlEX -l 3 --dest-dir html %s/%s' % (DIR,f))
ff = f[:-3]+'html'
outf.write('<a href="html/%s" target="pdf">%s</a><br/>' % (ff,ff))
outf.flush();