2020-06-04 03:10:25 +12:00
|
|
|
import urllib
|
|
|
|
import json
|
2020-05-29 06:42:11 +12:00
|
|
|
import os
|
2020-06-04 03:10:25 +12:00
|
|
|
import time
|
|
|
|
import requests
|
2020-05-29 06:42:11 +12:00
|
|
|
|
|
|
|
from src.utils import GLOBAL, nameCorrector
|
|
|
|
from src.utils import printToFile as print
|
2020-06-04 03:10:25 +12:00
|
|
|
from src.downloaders.Direct import Direct
|
|
|
|
from src.downloaders.downloaderUtils import getFile
|
|
|
|
from src.errors import FileNotFoundError, FileAlreadyExistsError, AlbumNotDownloadedCompletely, ImageNotFound, ExtensionError, NotADownloadableLinkError, TypeInSkip
|
2020-05-29 06:42:11 +12:00
|
|
|
|
|
|
|
class Imgur:
|
|
|
|
|
2020-06-04 03:10:25 +12:00
|
|
|
IMGUR_IMAGE_DOMAIN = "https://i.imgur.com/"
|
2020-05-29 06:42:11 +12:00
|
|
|
|
2020-06-04 03:10:25 +12:00
|
|
|
def __init__(self,directory, post):
|
2020-05-29 06:42:11 +12:00
|
|
|
|
2020-06-04 03:10:25 +12:00
|
|
|
link = post['CONTENTURL']
|
2020-05-29 06:42:11 +12:00
|
|
|
|
2020-06-04 03:10:25 +12:00
|
|
|
if link.endswith(".gifv"):
|
|
|
|
link = link.replace(".gifv",".mp4")
|
|
|
|
Direct(directory, {**post, 'CONTENTURL': link})
|
|
|
|
return None
|
2020-05-29 06:42:11 +12:00
|
|
|
|
2020-06-04 03:10:25 +12:00
|
|
|
self.rawData = self.getData(link)
|
2020-05-29 06:42:11 +12:00
|
|
|
|
2020-06-04 03:10:25 +12:00
|
|
|
self.directory = directory
|
|
|
|
self.post = post
|
2020-05-29 06:42:11 +12:00
|
|
|
|
2020-06-04 03:10:25 +12:00
|
|
|
if self.isAlbum:
|
|
|
|
if self.rawData["album_images"]["count"] != 1:
|
|
|
|
self.downloadAlbum(self.rawData["album_images"])
|
|
|
|
else:
|
|
|
|
self.download(self.rawData["album_images"]["images"][0])
|
|
|
|
else:
|
|
|
|
self.download(self.rawData)
|
2020-06-02 00:05:02 +12:00
|
|
|
|
2020-06-04 03:10:25 +12:00
|
|
|
def downloadAlbum(self, images):
|
|
|
|
folderName = GLOBAL.config['filename'].format(**self.post)
|
|
|
|
folderDir = self.directory / folderName
|
2020-05-29 06:42:11 +12:00
|
|
|
|
2020-06-04 03:10:25 +12:00
|
|
|
imagesLenght = images["count"]
|
|
|
|
howManyDownloaded = 0
|
|
|
|
duplicates = 0
|
2020-05-29 06:42:11 +12:00
|
|
|
|
2020-06-04 03:10:25 +12:00
|
|
|
try:
|
|
|
|
if not os.path.exists(folderDir):
|
2020-05-29 06:42:11 +12:00
|
|
|
os.makedirs(folderDir)
|
2020-06-04 03:10:25 +12:00
|
|
|
except FileNotFoundError:
|
|
|
|
folderDir = self.directory / self.post['POSTID']
|
|
|
|
os.makedirs(folderDir)
|
2020-05-29 06:42:11 +12:00
|
|
|
|
2020-06-04 03:10:25 +12:00
|
|
|
print(folderName)
|
|
|
|
|
|
|
|
for i in range(imagesLenght):
|
|
|
|
|
|
|
|
extension = self.validateExtension(images["images"][i]["ext"])
|
2020-05-29 06:42:11 +12:00
|
|
|
|
2020-06-04 03:10:25 +12:00
|
|
|
imageURL = self.IMGUR_IMAGE_DOMAIN + images["images"][i]["hash"] + extension
|
|
|
|
|
|
|
|
filename = "_".join([
|
|
|
|
str(i+1), nameCorrector(images["images"][i]['title']), images["images"][i]['hash']
|
|
|
|
]) + extension
|
|
|
|
shortFilename = str(i+1) + "_" + images["images"][i]['hash']
|
|
|
|
|
|
|
|
print("\n ({}/{})".format(i+1,imagesLenght))
|
|
|
|
|
|
|
|
try:
|
|
|
|
getFile(filename,shortFilename,folderDir,imageURL,indent=2)
|
|
|
|
howManyDownloaded += 1
|
|
|
|
print()
|
|
|
|
|
|
|
|
except FileAlreadyExistsError:
|
|
|
|
print(" The file already exists" + " "*10,end="\n\n")
|
|
|
|
duplicates += 1
|
|
|
|
|
|
|
|
except TypeInSkip:
|
|
|
|
print(" Skipping...")
|
|
|
|
howManyDownloaded += 1
|
|
|
|
|
|
|
|
except Exception as exception:
|
|
|
|
print("\n Could not get the file")
|
|
|
|
print(
|
|
|
|
" "
|
|
|
|
+ "{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
|
|
|
|
class_name=exception.__class__.__name__,
|
|
|
|
info=str(exception)
|
|
|
|
)
|
|
|
|
+ "\n"
|
2020-05-29 06:42:11 +12:00
|
|
|
)
|
2020-06-04 03:10:25 +12:00
|
|
|
print(GLOBAL.log_stream.getvalue(),noPrint=True)
|
|
|
|
|
|
|
|
if duplicates == imagesLenght:
|
|
|
|
raise FileAlreadyExistsError
|
|
|
|
elif howManyDownloaded + duplicates < imagesLenght:
|
|
|
|
raise AlbumNotDownloadedCompletely(
|
|
|
|
"Album Not Downloaded Completely"
|
|
|
|
)
|
|
|
|
|
|
|
|
def download(self, image):
|
|
|
|
extension = self.validateExtension(image["ext"])
|
|
|
|
imageURL = self.IMGUR_IMAGE_DOMAIN + image["hash"] + extension
|
|
|
|
|
|
|
|
filename = GLOBAL.config['filename'].format(**self.post) + extension
|
|
|
|
shortFilename = self.post['POSTID']+extension
|
|
|
|
|
|
|
|
getFile(filename,shortFilename,self.directory,imageURL)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def isAlbum(self):
|
|
|
|
return "album_images" in self.rawData
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def getData(link):
|
|
|
|
|
|
|
|
cookies = {"over18": "1"}
|
|
|
|
res = requests.get(link, cookies=cookies)
|
|
|
|
if res.status_code != 200: raise ImageNotFound(f"Server responded with {res.status_code} to {link}")
|
|
|
|
pageSource = requests.get(link, cookies=cookies).text
|
|
|
|
|
|
|
|
STARTING_STRING = "image : "
|
|
|
|
ENDING_STRING = "group :"
|
|
|
|
|
|
|
|
STARTING_STRING_LENGHT = len(STARTING_STRING)
|
|
|
|
try:
|
|
|
|
startIndex = pageSource.index(STARTING_STRING) + STARTING_STRING_LENGHT
|
|
|
|
endIndex = pageSource.index(ENDING_STRING)
|
|
|
|
except ValueError:
|
|
|
|
raise NotADownloadableLinkError(f"Could not read the page source on {link}")
|
|
|
|
|
|
|
|
data = pageSource[startIndex:endIndex].strip()[:-1]
|
|
|
|
|
|
|
|
return json.loads(data)
|
2020-05-29 06:42:11 +12:00
|
|
|
|
|
|
|
@staticmethod
|
2020-06-04 03:10:25 +12:00
|
|
|
def validateExtension(string):
|
|
|
|
POSSIBLE_EXTENSIONS = [".jpg", ".png", ".mp4", ".gif"]
|
|
|
|
|
|
|
|
for extension in POSSIBLE_EXTENSIONS:
|
|
|
|
if extension in string: return extension
|
|
|
|
else: raise ExtensionError(f"\"{string}\" is not recognized as a valid extension.")
|