2020-05-29 06:42:11 +12:00
|
|
|
import sys
|
|
|
|
import os
|
2020-06-02 00:05:02 +12:00
|
|
|
import time
|
2020-05-29 06:42:11 +12:00
|
|
|
from urllib.error import HTTPError
|
|
|
|
import urllib.request
|
2020-06-02 00:05:02 +12:00
|
|
|
from pathlib import Path
|
|
|
|
import hashlib
|
2020-05-29 06:42:11 +12:00
|
|
|
|
2020-06-02 00:05:02 +12:00
|
|
|
from src.utils import nameCorrector, GLOBAL
|
|
|
|
from src.utils import printToFile as print
|
|
|
|
from src.errors import FileAlreadyExistsError, FileNameTooLong, FailedToDownload, DomainInSkip
|
2020-05-29 06:42:11 +12:00
|
|
|
|
|
|
|
def dlProgress(count, blockSize, totalSize):
|
|
|
|
"""Function for writing download progress to console
|
|
|
|
"""
|
|
|
|
|
|
|
|
downloadedMbs = int(count*blockSize*(10**(-6)))
|
|
|
|
fileSize = int(totalSize*(10**(-6)))
|
|
|
|
sys.stdout.write("{}Mb/{}Mb\r".format(downloadedMbs,fileSize))
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
|
|
def getExtension(link):
|
|
|
|
"""Extract file extension from image link.
|
|
|
|
If didn't find any, return '.jpg'
|
|
|
|
"""
|
|
|
|
|
|
|
|
imageTypes = ['jpg','png','mp4','webm','gif']
|
|
|
|
parsed = link.split('.')
|
|
|
|
for fileType in imageTypes:
|
|
|
|
if fileType in parsed:
|
|
|
|
return "."+parsed[-1]
|
|
|
|
else:
|
|
|
|
if not "v.redd.it" in link:
|
|
|
|
return '.jpg'
|
|
|
|
else:
|
|
|
|
return '.mp4'
|
|
|
|
|
2020-06-02 00:05:02 +12:00
|
|
|
def getFile(filename,shortFilename,folderDir,imageURL,indent=0, silent=False):
|
2020-05-29 06:42:11 +12:00
|
|
|
|
2020-06-02 00:05:02 +12:00
|
|
|
if any(domain in imageURL for domain in GLOBAL.arguments.skip):
|
|
|
|
raise DomainInSkip
|
2020-05-29 06:42:11 +12:00
|
|
|
|
|
|
|
headers = [
|
|
|
|
("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
|
|
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "\
|
|
|
|
"Safari/537.36 OPR/54.0.2952.64"),
|
|
|
|
("Accept", "text/html,application/xhtml+xml,application/xml;" \
|
|
|
|
"q=0.9,image/webp,image/apng,*/*;q=0.8"),
|
|
|
|
("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"),
|
|
|
|
("Accept-Encoding", "none"),
|
|
|
|
("Accept-Language", "en-US,en;q=0.8"),
|
|
|
|
("Connection", "keep-alive")
|
|
|
|
]
|
|
|
|
|
|
|
|
opener = urllib.request.build_opener()
|
|
|
|
if not "imgur" in imageURL:
|
|
|
|
opener.addheaders = headers
|
|
|
|
urllib.request.install_opener(opener)
|
|
|
|
|
2020-06-02 00:05:02 +12:00
|
|
|
filename = nameCorrector(filename)
|
|
|
|
|
|
|
|
if not silent: print(" "*indent + str(folderDir),
|
|
|
|
" "*indent + str(filename),
|
|
|
|
sep="\n")
|
|
|
|
|
|
|
|
|
|
|
|
for i in range(3):
|
|
|
|
fileDir = Path(folderDir) / filename
|
|
|
|
tempDir = Path(folderDir) / (filename+".tmp")
|
|
|
|
|
|
|
|
if not (os.path.isfile(fileDir)):
|
2020-05-29 06:42:11 +12:00
|
|
|
try:
|
|
|
|
urllib.request.urlretrieve(imageURL,
|
|
|
|
tempDir,
|
|
|
|
reporthook=dlProgress)
|
2020-06-02 00:05:02 +12:00
|
|
|
|
|
|
|
if GLOBAL.arguments.no_dupes:
|
|
|
|
fileHash = createHash(tempDir)
|
|
|
|
if fileHash in GLOBAL.hashList:
|
|
|
|
os.remove(tempDir)
|
|
|
|
raise FileAlreadyExistsError
|
|
|
|
GLOBAL.hashList.add(fileHash)
|
|
|
|
|
2020-05-29 06:42:11 +12:00
|
|
|
os.rename(tempDir,fileDir)
|
2020-06-02 00:05:02 +12:00
|
|
|
if not silent: print(" "*indent+"Downloaded"+" "*10)
|
|
|
|
return None
|
2020-05-29 06:42:11 +12:00
|
|
|
except ConnectionResetError as exception:
|
2020-06-02 00:05:02 +12:00
|
|
|
if not silent: print(" "*indent + str(exception))
|
|
|
|
if not silent: print(" "*indent + "Trying again\n")
|
2020-05-29 06:42:11 +12:00
|
|
|
except FileNotFoundError:
|
2020-06-02 00:05:02 +12:00
|
|
|
filename = shortFilename
|
|
|
|
else:
|
|
|
|
raise FileAlreadyExistsError
|
|
|
|
raise FailedToDownload
|
|
|
|
|
|
|
|
def createHash(filename):
|
|
|
|
hash_md5 = hashlib.md5()
|
|
|
|
with open(filename, "rb") as f:
|
|
|
|
for chunk in iter(lambda: f.read(4096), b""):
|
|
|
|
hash_md5.update(chunk)
|
|
|
|
return hash_md5.hexdigest()
|