2018-07-10 10:30:50 +12:00
|
|
|
import io
|
2018-07-10 07:58:11 +12:00
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import urllib.request
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
from src.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError,
|
|
|
|
FileNameTooLong, ImgurLoginError,
|
|
|
|
NotADownloadableLinkError)
|
|
|
|
from src.tools import GLOBAL, nameCorrector, printToFile
|
|
|
|
|
|
|
|
try:
|
|
|
|
from imgurpython import *
|
|
|
|
except ModuleNotFoundError:
|
|
|
|
print("\nimgurpython not found on your computer, installing...\n")
|
|
|
|
from src.tools import install
|
|
|
|
install("imgurpython")
|
|
|
|
from imgurpython import *
|
|
|
|
|
2018-07-10 10:30:50 +12:00
|
|
|
VanillaPrint = print
|
2018-07-10 07:58:11 +12:00
|
|
|
print = printToFile
|
|
|
|
|
|
|
|
def dlProgress(count, blockSize, totalSize):
|
|
|
|
"""Function for writing download progress to console
|
|
|
|
"""
|
|
|
|
|
|
|
|
downloadedMbs = int(count*blockSize*(10**(-6)))
|
|
|
|
fileSize = int(totalSize*(10**(-6)))
|
|
|
|
sys.stdout.write("\r{}Mb/{}Mb".format(downloadedMbs,fileSize))
|
|
|
|
sys.stdout.write("\b"*len("\r{}Mb/{}Mb".format(downloadedMbs,fileSize)))
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
|
|
def getExtension(link):
|
|
|
|
"""Extract file extension from image link.
|
|
|
|
If didn't find any, return '.jpg'
|
|
|
|
"""
|
|
|
|
|
|
|
|
imageTypes = ['jpg','png','mp4','webm','gif']
|
|
|
|
parsed = link.split('.')
|
|
|
|
for TYPE in imageTypes:
|
|
|
|
if TYPE in parsed:
|
|
|
|
return "."+parsed[-1]
|
|
|
|
else:
|
|
|
|
return '.jpg'
|
|
|
|
|
|
|
|
def getFile(fileDir,tempDir,imageURL,indent=0):
|
|
|
|
"""Downloads given file to given directory.
|
|
|
|
|
|
|
|
fileDir -- Full file directory
|
|
|
|
tempDir -- Full file directory with the extension of '.tmp'
|
|
|
|
imageURL -- URL to the file to be downloaded
|
|
|
|
|
|
|
|
redditID -- Post's reddit id if renaming the file is necessary.
|
|
|
|
As too long file names seem not working.
|
|
|
|
"""
|
|
|
|
|
|
|
|
if not (os.path.isfile(fileDir)):
|
|
|
|
for i in range(3):
|
|
|
|
try:
|
|
|
|
urllib.request.urlretrieve(imageURL,
|
|
|
|
tempDir,
|
|
|
|
reporthook=dlProgress)
|
|
|
|
os.rename(tempDir,fileDir)
|
|
|
|
print(" "*indent+"Downloaded"+" "*10)
|
|
|
|
break
|
|
|
|
except ConnectionResetError as exception:
|
|
|
|
print(" "*indent + str(exception))
|
|
|
|
print(" "*indent + "Trying again\n")
|
|
|
|
except FileNotFoundError:
|
|
|
|
raise FileNameTooLong
|
|
|
|
else:
|
|
|
|
raise FileAlreadyExistsError
|
|
|
|
|
|
|
|
class Imgur:
|
|
|
|
def __init__(self,directory,post):
|
|
|
|
self.imgurClient = self.initImgur()
|
|
|
|
|
|
|
|
imgurID = self.getId(post['postURL'])
|
|
|
|
content = self.getLink(imgurID)
|
|
|
|
|
|
|
|
if not os.path.exists(directory): os.makedirs(directory)
|
|
|
|
|
|
|
|
if content['type'] == 'image':
|
|
|
|
|
|
|
|
try:
|
|
|
|
post['mediaURL'] = content['object'].mp4
|
|
|
|
except AttributeError:
|
|
|
|
post['mediaURL'] = content['object'].link
|
|
|
|
|
|
|
|
post['postExt'] = getExtension(post['mediaURL'])
|
|
|
|
|
|
|
|
title = nameCorrector(post['postTitle'])
|
|
|
|
print(title+"_" +post['postId']+post['postExt'])
|
|
|
|
|
|
|
|
fileDir = title + "_" + post['postId'] + post['postExt']
|
|
|
|
fileDir = directory / fileDir
|
|
|
|
|
|
|
|
tempDir = title + "_" + post['postId'] + '.tmp'
|
|
|
|
tempDir = directory / tempDir
|
|
|
|
try:
|
|
|
|
getFile(fileDir,tempDir,post['mediaURL'])
|
|
|
|
except FileNameTooLong:
|
|
|
|
fileDir = directory / post['postId'] + post['postExt']
|
|
|
|
tempDir = directory / post['postId'] + '.tmp'
|
|
|
|
getFile(fileDir,tempDir,post['mediaURL'])
|
|
|
|
|
|
|
|
elif content['type'] == 'album':
|
|
|
|
exceptionType = ""
|
|
|
|
images = content['object'].images
|
|
|
|
imagesLenght = len(images)
|
|
|
|
howManyDownloaded = imagesLenght
|
|
|
|
duplicates = 0
|
|
|
|
|
|
|
|
title = nameCorrector(post['postTitle'])
|
|
|
|
print(title+"_"+post['postId'],end="\n\n")
|
|
|
|
|
|
|
|
folderDir = directory / (title+"_"+post['postId'])
|
|
|
|
|
|
|
|
try:
|
|
|
|
if not os.path.exists(folderDir):
|
|
|
|
os.makedirs(folderDir)
|
|
|
|
except FileNotFoundError:
|
|
|
|
folderDir = directory / post['postId']
|
|
|
|
os.makedirs(folderDir)
|
|
|
|
|
|
|
|
for i in range(imagesLenght):
|
|
|
|
try:
|
|
|
|
imageURL = images[i]['mp4']
|
|
|
|
except KeyError:
|
|
|
|
imageURL = images[i]['link']
|
|
|
|
|
|
|
|
images[i]['Ext'] = getExtension(imageURL)
|
|
|
|
|
|
|
|
fileName = (str(i+1)
|
|
|
|
+ "_"
|
|
|
|
+ nameCorrector(str(images[i]['title']))
|
|
|
|
+ "_"
|
|
|
|
+ images[i]['id'])
|
|
|
|
|
|
|
|
fileDir = folderDir / (fileName + images[i]['Ext'])
|
|
|
|
tempDir = folderDir / (fileName + ".tmp")
|
|
|
|
|
|
|
|
print(" ({}/{})".format(i+1,imagesLenght))
|
|
|
|
print(" {}".format(fileName+images[i]['Ext']))
|
|
|
|
|
|
|
|
try:
|
|
|
|
getFile(fileDir,tempDir,imageURL,indent=2)
|
|
|
|
print()
|
|
|
|
except FileAlreadyExistsError:
|
|
|
|
print(" The file already exists" + " "*10,end="\n\n")
|
|
|
|
duplicates += 1
|
|
|
|
howManyDownloaded -= 1
|
|
|
|
|
|
|
|
# IF FILE NAME IS TOO LONG, IT WONT REGISTER
|
|
|
|
except FileNameTooLong:
|
|
|
|
fileName = (str(i+1) + "_" + images[i]['id'])
|
|
|
|
fileDir = folderDir / (fileName + images[i]['Ext'])
|
|
|
|
tempDir = folderDir / (fileName + ".tmp")
|
|
|
|
try:
|
|
|
|
getFile(fileDir,tempDir,imageURL,indent=2)
|
|
|
|
# IF STILL TOO LONG
|
|
|
|
except FileNameTooLong:
|
|
|
|
fileName = str(i+1)
|
|
|
|
fileDir = folderDir / (fileName + images[i]['Ext'])
|
|
|
|
tempDir = folderDir / (fileName + ".tmp")
|
|
|
|
getFile(fileDir,tempDir,imageURL,indent=2)
|
|
|
|
|
|
|
|
except Exception as exception:
|
|
|
|
print("\n Could not get the file")
|
|
|
|
print(" " + str(exception) + "\n")
|
|
|
|
exceptionType = exception
|
|
|
|
howManyDownloaded -= 1
|
|
|
|
|
|
|
|
if duplicates == imagesLenght:
|
|
|
|
raise FileAlreadyExistsError
|
|
|
|
elif howManyDownloaded < imagesLenght:
|
|
|
|
raise AlbumNotDownloadedCompletely
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def initImgur():
|
|
|
|
"""Initialize imgur api"""
|
|
|
|
|
|
|
|
config = GLOBAL.config
|
|
|
|
return ImgurClient(
|
|
|
|
config['imgur_client_id'],
|
|
|
|
config['imgur_client_secret']
|
|
|
|
)
|
|
|
|
def getId(self,submissionURL):
|
|
|
|
"""Extract imgur post id
|
|
|
|
and determine if its a single image or album
|
|
|
|
"""
|
|
|
|
|
|
|
|
domainLenght = len("imgur.com/")
|
|
|
|
if submissionURL[-1] == "/":
|
|
|
|
submissionURL = submissionURL[:-1]
|
|
|
|
|
|
|
|
if "a/" in submissionURL or "gallery/" in submissionURL:
|
|
|
|
albumId = submissionURL.split("/")[-1]
|
|
|
|
return {'id':albumId, 'type':'album'}
|
|
|
|
|
|
|
|
else:
|
|
|
|
url = submissionURL.replace('.','/').split('/')
|
|
|
|
imageId = url[url.index('com')+1]
|
|
|
|
return {'id':imageId, 'type':'image'}
|
|
|
|
|
|
|
|
def getLink(self,identity):
|
|
|
|
"""Request imgur object from imgur api
|
|
|
|
"""
|
|
|
|
|
|
|
|
if identity['type'] == 'image':
|
|
|
|
return {'object':self.imgurClient.get_image(identity['id']),
|
|
|
|
'type':'image'}
|
|
|
|
elif identity['type'] == 'album':
|
|
|
|
return {'object':self.imgurClient.get_album(identity['id']),
|
|
|
|
'type':'album'}
|
|
|
|
|
|
|
|
def get_credits():
|
|
|
|
return Imgur.initImgur().get_credits()
|
|
|
|
|
|
|
|
class Gfycat:
|
|
|
|
def __init__(self,directory,POST):
|
|
|
|
try:
|
|
|
|
POST['mediaURL'] = self.getLink(POST['postURL'])
|
|
|
|
except IndexError:
|
|
|
|
raise NotADownloadableLinkError
|
|
|
|
except Exception as exception:
|
|
|
|
raise NotADownloadableLinkError
|
|
|
|
|
|
|
|
POST['postExt'] = getExtension(POST['mediaURL'])
|
|
|
|
|
|
|
|
if not os.path.exists(directory): os.makedirs(directory)
|
|
|
|
title = nameCorrector(POST['postTitle'])
|
|
|
|
print(title+"_"+POST['postId']+POST['postExt'])
|
|
|
|
|
|
|
|
fileDir = directory / (title+"_"+POST['postId']+POST['postExt'])
|
|
|
|
tempDir = directory / (title+"_"+POST['postId']+".tmp")
|
2018-07-10 10:30:50 +12:00
|
|
|
try:
|
|
|
|
getFile(fileDir,tempDir,POST['mediaURL'])
|
|
|
|
except FileNameTooLong:
|
|
|
|
fileDir = directory / (POST['postId']+POST['postExt'])
|
|
|
|
tempDir = directory / (POST['postId']+".tmp")
|
2018-07-10 07:58:11 +12:00
|
|
|
|
2018-07-10 10:30:50 +12:00
|
|
|
getFile(fileDir,tempDir,POST['mediaURL'])
|
2018-07-10 07:58:11 +12:00
|
|
|
|
|
|
|
def getLink(self, url, query='<source id="mp4Source" src=', lineNumber=105):
|
|
|
|
"""Extract direct link to the video from page's source
|
|
|
|
and return it
|
|
|
|
"""
|
|
|
|
|
|
|
|
if '.webm' in url or '.mp4' in url or '.gif' in url:
|
|
|
|
return url
|
|
|
|
|
|
|
|
if url[-1:] == '/':
|
|
|
|
url = url[:-1]
|
|
|
|
|
|
|
|
if 'gifs' in url:
|
|
|
|
url = "https://gfycat.com/" + url.split('/')[-1]
|
|
|
|
|
|
|
|
pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
|
|
|
|
|
|
|
|
theLine = pageSource[lineNumber]
|
|
|
|
lenght = len(query)
|
|
|
|
link = []
|
|
|
|
|
|
|
|
for i in range(len(theLine)):
|
|
|
|
if theLine[i:i+lenght] == query:
|
|
|
|
cursor = (i+lenght)+1
|
|
|
|
while not theLine[cursor] == '"':
|
|
|
|
link.append(theLine[cursor])
|
|
|
|
cursor += 1
|
|
|
|
break
|
|
|
|
|
|
|
|
if "".join(link) == "":
|
|
|
|
raise NotADownloadableLinkError
|
|
|
|
|
|
|
|
return "".join(link)
|
|
|
|
|
|
|
|
class Direct:
|
|
|
|
def __init__(self,directory,POST):
|
|
|
|
POST['postExt'] = getExtension(POST['postURL'])
|
|
|
|
if not os.path.exists(directory): os.makedirs(directory)
|
|
|
|
title = nameCorrector(POST['postTitle'])
|
|
|
|
print(title+"_"+POST['postId']+POST['postExt'])
|
|
|
|
|
|
|
|
fileDir = title+"_"+POST['postId']+POST['postExt']
|
|
|
|
fileDir = directory / fileDir
|
|
|
|
|
|
|
|
tempDir = title+"_"+POST['postId']+".tmp"
|
|
|
|
tempDir = directory / tempDir
|
|
|
|
|
2018-07-10 10:30:50 +12:00
|
|
|
try:
|
|
|
|
getFile(fileDir,tempDir,POST['postURL'])
|
|
|
|
except FileNameTooLong:
|
|
|
|
fileDir = directory / (POST['postId']+POST['postExt'])
|
|
|
|
tempDir = directory / (POST['postId']+".tmp")
|
|
|
|
|
|
|
|
getFile(fileDir,tempDir,POST['postURL'])
|
|
|
|
|
|
|
|
class Self:
|
|
|
|
def __init__(self,directory,post):
|
|
|
|
if not os.path.exists(directory): os.makedirs(directory)
|
|
|
|
|
|
|
|
title = nameCorrector(post['postTitle'])
|
|
|
|
print(title+"_"+post['postId']+".md")
|
|
|
|
|
|
|
|
fileDir = title+"_"+post['postId']+".md"
|
|
|
|
fileDir = directory / fileDir
|
|
|
|
|
|
|
|
if Path.is_file(fileDir):
|
|
|
|
raise FileAlreadyExistsError
|
2018-07-10 10:44:28 +12:00
|
|
|
|
|
|
|
try:
|
|
|
|
self.writeToFile(fileDir,post)
|
|
|
|
except FileNotFoundError:
|
|
|
|
fileDir = post['postId']+".md"
|
|
|
|
fileDir = directory / fileDir
|
2018-07-10 10:30:50 +12:00
|
|
|
|
2018-07-10 10:44:28 +12:00
|
|
|
self.writeToFile(fileDir,post)
|
2018-07-10 10:30:50 +12:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def writeToFile(directory,post):
|
|
|
|
|
|
|
|
content = ("## ["
|
|
|
|
+ post["postTitle"]
|
|
|
|
+ "]("
|
|
|
|
+ post["postURL"]
|
|
|
|
+ ")\n"
|
|
|
|
+ post["postContent"]
|
|
|
|
+ "\n\n---\n\n"
|
|
|
|
+ "submitted by [u/"
|
|
|
|
+ post["postSubmitter"]
|
|
|
|
+ "](https://www.reddit.com/user/"
|
|
|
|
+ post["postSubmitter"]
|
|
|
|
+ ")")
|
|
|
|
|
|
|
|
with io.open(directory,"w",encoding="utf-8") as FILE:
|
|
|
|
VanillaPrint(content,file=FILE)
|
|
|
|
|
|
|
|
print("Downloaded")
|