mirror of
https://github.com/uvoteam/img-fetcher.git
synced 2024-12-22 02:30:08 +00:00
init
This commit is contained in:
commit
4688e55d73
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
output
|
7
README.md
Normal file
7
README.md
Normal file
@ -0,0 +1,7 @@
|
||||
Image Fetcher
|
||||
==========
|
||||
|
||||
Script parses stdin, finds all urls to images, and then download all images into "output" folder.
|
||||
|
||||
|
||||
$ echo '<html><body><img src="https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"</body></html>' | python3 ./img-fetcher.py
|
29
img-fetcher.py
Normal file
29
img-fetcher.py
Normal file
@ -0,0 +1,29 @@
|
||||
import sys
|
||||
import re
|
||||
import os
|
||||
import urllib.request
|
||||
import shutil
|
||||
|
||||
allowedExt = ["png", "jpg", "jpeg", "svg"]
|
||||
|
||||
for data in sys.stdin:
|
||||
urls = re.findall(r'\"(https?://[^"]+)"', data)
|
||||
if not os.path.exists('./output'):
|
||||
os.mkdir('output')
|
||||
if len(urls):
|
||||
for url in urls:
|
||||
if len(url):
|
||||
try:
|
||||
lastDot = url.rindex('.')
|
||||
ext = url[lastDot+1:]
|
||||
lastSlash = url.rindex('/')
|
||||
imageName = url[lastSlash+1:]
|
||||
endpoint = os.getcwd() + "/output/" + imageName
|
||||
if not os.path.exists(endpoint) and ext in allowedExt:
|
||||
filename, headers = urllib.request.urlretrieve(url)
|
||||
shutil.move(filename, endpoint)
|
||||
print(imageName)
|
||||
except ValueError:
|
||||
print(url)
|
||||
pass
|
||||
|
Loading…
Reference in New Issue
Block a user