From 4688e55d73d64c6793d8e0b22fb771d1e5a5baea Mon Sep 17 00:00:00 2001 From: Denis Medved Date: Thu, 14 Mar 2019 17:19:05 +0200 Subject: [PATCH] init --- .gitignore | 1 + README.md | 7 +++++++ img-fetcher.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 img-fetcher.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..53752db --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +output diff --git a/README.md b/README.md new file mode 100644 index 0000000..ebb892e --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +Image Fetcher +========== + +Script parses stdin, finds all urls to images, and then download all images into "output" folder. + + + $ echo '' | python3 ./img-fetcher.py diff --git a/img-fetcher.py b/img-fetcher.py new file mode 100644 index 0000000..4115c74 --- /dev/null +++ b/img-fetcher.py @@ -0,0 +1,29 @@ +import sys +import re +import os +import urllib.request +import shutil + +allowedExt = ["png", "jpg", "jpeg", "svg"] + +for data in sys.stdin: + urls = re.findall(r'\"(https?://[^"]+)"', data) + if not os.path.exists('./output'): + os.mkdir('output') + if len(urls): + for url in urls: + if len(url): + try: + lastDot = url.rindex('.') + ext = url[lastDot+1:] + lastSlash = url.rindex('/') + imageName = url[lastSlash+1:] + endpoint = os.getcwd() + "/output/" + imageName + if not os.path.exists(endpoint) and ext in allowedExt: + filename, headers = urllib.request.urlretrieve(url) + shutil.move(filename, endpoint) + print(imageName) + except ValueError: + print(url) + pass +