This commit is contained in:
Denys Medvid 2019-03-14 17:19:05 +02:00
commit 4688e55d73
WARNING! Although there is a key with this ID in the database it does not verify this commit! This commit is SUSPICIOUS.
GPG Key ID: C83C12F037EE26DF
3 changed files with 37 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
output

7
README.md Normal file
View File

@ -0,0 +1,7 @@
Image Fetcher
==========
Script parses stdin, finds all urls to images, and then download all images into "output" folder.
$ echo '<html><body><img src="https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"</body></html>' | python3 ./img-fetcher.py

29
img-fetcher.py Normal file
View File

@ -0,0 +1,29 @@
import sys
import re
import os
import urllib.request
import shutil
allowedExt = ["png", "jpg", "jpeg", "svg"]
for data in sys.stdin:
urls = re.findall(r'\"(https?://[^"]+)"', data)
if not os.path.exists('./output'):
os.mkdir('output')
if len(urls):
for url in urls:
if len(url):
try:
lastDot = url.rindex('.')
ext = url[lastDot+1:]
lastSlash = url.rindex('/')
imageName = url[lastSlash+1:]
endpoint = os.getcwd() + "/output/" + imageName
if not os.path.exists(endpoint) and ext in allowedExt:
filename, headers = urllib.request.urlretrieve(url)
shutil.move(filename, endpoint)
print(imageName)
except ValueError:
print(url)
pass