init

2024-12-22 02:30:08 +00:00 · 2019-03-14 17:19:05 +02:00 · 2019-03-14 17:19:05 +02:00 · 4688e55d73
commit 4688e55d73
3 changed files with 37 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+output
--- a/README.md
+++ b/README.md
@ -0,0 +1,7 @@
+Image Fetcher
+==========
+
+Script parses stdin, finds all urls to images, and then download all images into "output" folder.
+
+
+    $ echo '<html><body><img src="https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"</body></html>' | python3 ./img-fetcher.py
--- a/img-fetcher.py
+++ b/img-fetcher.py
@ -0,0 +1,29 @@
+import sys
+import re
+import os
+import urllib.request
+import shutil
+
+allowedExt = ["png", "jpg", "jpeg", "svg"]
+
+for data in sys.stdin:
+    urls = re.findall(r'\"(https?://[^"]+)"', data)
+    if not os.path.exists('./output'):
+        os.mkdir('output')
+    if len(urls):
+        for url in urls:
+            if len(url):
+                try:
+                    lastDot = url.rindex('.')
+                    ext = url[lastDot+1:]
+                    lastSlash = url.rindex('/')
+                    imageName = url[lastSlash+1:]
+                    endpoint = os.getcwd() + "/output/" + imageName
+                    if not os.path.exists(endpoint) and ext in allowedExt:
+                        filename, headers = urllib.request.urlretrieve(url)
+                        shutil.move(filename, endpoint)
+                        print(imageName)
+                except ValueError:
+                    print(url)
+                    pass
+