2018-07-10 10:30:50 +12:00
|
|
|
import io
|
2018-07-10 07:58:11 +12:00
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import urllib.request
|
2018-07-24 08:16:56 +12:00
|
|
|
from html.parser import HTMLParser
|
2018-07-10 07:58:11 +12:00
|
|
|
from pathlib import Path
|
2018-07-24 08:33:11 +12:00
|
|
|
from urllib.error import HTTPError
|
2018-07-10 07:58:11 +12:00
|
|
|
|
2018-07-12 08:59:14 +12:00
|
|
|
import imgurpython
|
2018-07-12 09:09:20 +12:00
|
|
|
from multiprocessing import Queue
|
2018-07-12 08:59:14 +12:00
|
|
|
|
2018-07-10 07:58:11 +12:00
|
|
|
from src.errors import (AlbumNotDownloadedCompletely, FileAlreadyExistsError,
|
|
|
|
FileNameTooLong, ImgurLoginError,
|
|
|
|
NotADownloadableLinkError)
|
|
|
|
from src.tools import GLOBAL, nameCorrector, printToFile
|
|
|
|
|
2018-07-10 10:30:50 +12:00
|
|
|
VanillaPrint = print
|
2018-07-10 07:58:11 +12:00
|
|
|
print = printToFile
|
|
|
|
|
|
|
|
def dlProgress(count, blockSize, totalSize):
|
|
|
|
"""Function for writing download progress to console
|
|
|
|
"""
|
|
|
|
|
|
|
|
downloadedMbs = int(count*blockSize*(10**(-6)))
|
|
|
|
fileSize = int(totalSize*(10**(-6)))
|
2018-08-14 00:55:37 +12:00
|
|
|
sys.stdout.write("{}Mb/{}Mb".format(downloadedMbs,fileSize))
|
|
|
|
sys.stdout.write("\r")
|
|
|
|
# sys.stdout.write("\b"*len("\r{}Mb/{}Mb".format(downloadedMbs,fileSize)))
|
2018-07-10 07:58:11 +12:00
|
|
|
sys.stdout.flush()
|
|
|
|
|
|
|
|
def getExtension(link):
|
|
|
|
"""Extract file extension from image link.
|
|
|
|
If didn't find any, return '.jpg'
|
|
|
|
"""
|
|
|
|
|
|
|
|
imageTypes = ['jpg','png','mp4','webm','gif']
|
|
|
|
parsed = link.split('.')
|
|
|
|
for TYPE in imageTypes:
|
|
|
|
if TYPE in parsed:
|
|
|
|
return "."+parsed[-1]
|
|
|
|
else:
|
2018-07-19 23:57:16 +12:00
|
|
|
if not "v.redd.it" in link:
|
|
|
|
return '.jpg'
|
|
|
|
else:
|
|
|
|
return '.mp4'
|
2018-07-10 07:58:11 +12:00
|
|
|
|
|
|
|
def getFile(fileDir,tempDir,imageURL,indent=0):
|
|
|
|
"""Downloads given file to given directory.
|
|
|
|
|
|
|
|
fileDir -- Full file directory
|
|
|
|
tempDir -- Full file directory with the extension of '.tmp'
|
|
|
|
imageURL -- URL to the file to be downloaded
|
|
|
|
|
|
|
|
redditID -- Post's reddit id if renaming the file is necessary.
|
|
|
|
As too long file names seem not working.
|
|
|
|
"""
|
|
|
|
|
2018-08-06 16:35:43 +12:00
|
|
|
headers = [
|
2018-08-06 18:33:07 +12:00
|
|
|
("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " \
|
|
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 "\
|
|
|
|
"Safari/537.36 OPR/54.0.2952.64"),
|
2018-08-06 16:35:43 +12:00
|
|
|
("Accept", "text/html,application/xhtml+xml,application/xml;" \
|
2018-08-06 18:33:07 +12:00
|
|
|
"q=0.9,image/webp,image/apng,*/*;q=0.8"),
|
2018-08-06 16:35:43 +12:00
|
|
|
("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3"),
|
2018-08-08 09:47:34 +12:00
|
|
|
("Accept-Encoding", "none"),
|
2018-08-06 16:35:43 +12:00
|
|
|
("Accept-Language", "en-US,en;q=0.8"),
|
|
|
|
("Connection", "keep-alive")
|
|
|
|
]
|
|
|
|
|
|
|
|
opener = urllib.request.build_opener()
|
|
|
|
opener.addheaders = headers
|
|
|
|
urllib.request.install_opener(opener)
|
|
|
|
|
2018-07-10 07:58:11 +12:00
|
|
|
if not (os.path.isfile(fileDir)):
|
|
|
|
for i in range(3):
|
|
|
|
try:
|
|
|
|
urllib.request.urlretrieve(imageURL,
|
|
|
|
tempDir,
|
|
|
|
reporthook=dlProgress)
|
|
|
|
os.rename(tempDir,fileDir)
|
|
|
|
except ConnectionResetError as exception:
|
|
|
|
print(" "*indent + str(exception))
|
|
|
|
print(" "*indent + "Trying again\n")
|
|
|
|
except FileNotFoundError:
|
|
|
|
raise FileNameTooLong
|
2018-07-25 07:11:12 +12:00
|
|
|
else:
|
|
|
|
print(" "*indent+"Downloaded"+" "*10)
|
|
|
|
break
|
2018-07-10 07:58:11 +12:00
|
|
|
else:
|
|
|
|
raise FileAlreadyExistsError
|
|
|
|
|
2018-07-24 08:16:56 +12:00
|
|
|
class Erome:
|
|
|
|
def __init__(self,directory,post):
|
2018-07-24 08:33:11 +12:00
|
|
|
try:
|
|
|
|
IMAGES = self.getLinks(post['postURL'])
|
|
|
|
except urllib.error.HTTPError:
|
|
|
|
raise NotADownloadableLinkError("Not a downloadable link")
|
2018-07-24 08:16:56 +12:00
|
|
|
|
|
|
|
imagesLenght = len(IMAGES)
|
|
|
|
howManyDownloaded = imagesLenght
|
|
|
|
duplicates = 0
|
|
|
|
|
|
|
|
if imagesLenght == 1:
|
|
|
|
|
|
|
|
extension = getExtension(IMAGES[0])
|
|
|
|
|
2018-08-06 17:13:07 +12:00
|
|
|
"""Filenames are declared here"""
|
|
|
|
|
2018-07-24 08:16:56 +12:00
|
|
|
title = nameCorrector(post['postTitle'])
|
2018-07-25 03:55:33 +12:00
|
|
|
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+extension)
|
2018-07-24 08:16:56 +12:00
|
|
|
|
2018-07-24 21:44:53 +12:00
|
|
|
fileDir = directory / (
|
2018-07-25 07:13:11 +12:00
|
|
|
post["postSubmitter"]+"_"+title+"_"+post['postId']+extension
|
2018-07-24 21:44:53 +12:00
|
|
|
)
|
|
|
|
tempDir = directory / (
|
2018-07-25 07:13:11 +12:00
|
|
|
post["postSubmitter"]+"_"+title+"_"+post['postId']+".tmp"
|
2018-07-24 21:44:53 +12:00
|
|
|
)
|
2018-07-24 08:16:56 +12:00
|
|
|
|
|
|
|
imageURL = "https:" + IMAGES[0]
|
|
|
|
|
|
|
|
try:
|
|
|
|
getFile(fileDir,tempDir,imageURL)
|
|
|
|
except FileNameTooLong:
|
|
|
|
fileDir = directory / (post['postId'] + extension)
|
|
|
|
tempDir = directory / (post['postId'] + '.tmp')
|
|
|
|
getFile(fileDir,tempDir,imageURL)
|
|
|
|
|
|
|
|
else:
|
|
|
|
title = nameCorrector(post['postTitle'])
|
2018-07-25 03:55:33 +12:00
|
|
|
print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n")
|
2018-07-24 08:16:56 +12:00
|
|
|
|
2018-07-24 21:44:53 +12:00
|
|
|
folderDir = directory / (
|
2018-07-25 03:55:33 +12:00
|
|
|
post["postSubmitter"] + "_" + title + "_" + post['postId']
|
2018-07-24 21:44:53 +12:00
|
|
|
)
|
2018-07-24 08:16:56 +12:00
|
|
|
|
|
|
|
try:
|
|
|
|
if not os.path.exists(folderDir):
|
|
|
|
os.makedirs(folderDir)
|
|
|
|
except FileNotFoundError:
|
|
|
|
folderDir = directory / post['postId']
|
|
|
|
os.makedirs(folderDir)
|
|
|
|
|
|
|
|
for i in range(imagesLenght):
|
|
|
|
|
|
|
|
extension = getExtension(IMAGES[i])
|
|
|
|
|
|
|
|
fileName = str(i+1)
|
|
|
|
imageURL = "https:" + IMAGES[i]
|
|
|
|
|
|
|
|
fileDir = folderDir / (fileName + extension)
|
|
|
|
tempDir = folderDir / (fileName + ".tmp")
|
|
|
|
|
|
|
|
print(" ({}/{})".format(i+1,imagesLenght))
|
|
|
|
print(" {}".format(fileName+extension))
|
|
|
|
|
|
|
|
try:
|
|
|
|
getFile(fileDir,tempDir,imageURL,indent=2)
|
|
|
|
print()
|
|
|
|
except FileAlreadyExistsError:
|
|
|
|
print(" The file already exists" + " "*10,end="\n\n")
|
|
|
|
duplicates += 1
|
|
|
|
howManyDownloaded -= 1
|
|
|
|
|
|
|
|
except Exception as exception:
|
2018-07-25 04:27:52 +12:00
|
|
|
# raise exception
|
2018-07-24 08:16:56 +12:00
|
|
|
print("\n Could not get the file")
|
2018-07-25 04:27:52 +12:00
|
|
|
print(
|
|
|
|
" "
|
|
|
|
+ "{class_name}: {info}".format(
|
|
|
|
class_name=exception.__class__.__name__,
|
|
|
|
info=str(exception)
|
|
|
|
)
|
|
|
|
+ "\n"
|
|
|
|
)
|
2018-07-24 08:16:56 +12:00
|
|
|
exceptionType = exception
|
|
|
|
howManyDownloaded -= 1
|
|
|
|
|
|
|
|
if duplicates == imagesLenght:
|
|
|
|
raise FileAlreadyExistsError
|
|
|
|
elif howManyDownloaded + duplicates < imagesLenght:
|
|
|
|
raise AlbumNotDownloadedCompletely(
|
|
|
|
"Album Not Downloaded Completely"
|
|
|
|
)
|
|
|
|
|
|
|
|
def getLinks(self,url,lineNumber=129):
|
|
|
|
|
|
|
|
content = []
|
|
|
|
lineNumber = None
|
|
|
|
|
|
|
|
class EromeParser(HTMLParser):
|
|
|
|
tag = None
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
|
|
self.tag = {tag:{attr[0]: attr[1] for attr in attrs}}
|
|
|
|
|
|
|
|
pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
|
|
|
|
|
|
|
|
""" FIND WHERE ALBUM STARTS IN ORDER NOT TO GET WRONG LINKS"""
|
|
|
|
for i in range(len(pageSource)):
|
|
|
|
obj = EromeParser()
|
|
|
|
obj.feed(pageSource[i])
|
|
|
|
tag = obj.tag
|
|
|
|
|
|
|
|
if tag is not None:
|
|
|
|
if "div" in tag:
|
|
|
|
if "id" in tag["div"]:
|
|
|
|
if tag["div"]["id"] == "album":
|
|
|
|
lineNumber = i
|
|
|
|
break
|
|
|
|
|
|
|
|
for line in pageSource[lineNumber:]:
|
|
|
|
obj = EromeParser()
|
|
|
|
obj.feed(line)
|
|
|
|
tag = obj.tag
|
|
|
|
if tag is not None:
|
|
|
|
if "img" in tag:
|
|
|
|
if "class" in tag["img"]:
|
|
|
|
if tag["img"]["class"]=="img-front":
|
|
|
|
content.append(tag["img"]["src"])
|
|
|
|
elif "source" in tag:
|
|
|
|
content.append(tag["source"]["src"])
|
|
|
|
|
|
|
|
return [
|
|
|
|
link for link in content \
|
|
|
|
if link.endswith("_480p.mp4") or not link.endswith(".mp4")
|
|
|
|
]
|
|
|
|
|
2018-07-10 07:58:11 +12:00
|
|
|
class Imgur:
|
|
|
|
def __init__(self,directory,post):
|
|
|
|
self.imgurClient = self.initImgur()
|
|
|
|
|
|
|
|
imgurID = self.getId(post['postURL'])
|
|
|
|
content = self.getLink(imgurID)
|
|
|
|
|
|
|
|
if not os.path.exists(directory): os.makedirs(directory)
|
|
|
|
|
|
|
|
if content['type'] == 'image':
|
|
|
|
|
|
|
|
try:
|
|
|
|
post['mediaURL'] = content['object'].mp4
|
|
|
|
except AttributeError:
|
|
|
|
post['mediaURL'] = content['object'].link
|
|
|
|
|
|
|
|
post['postExt'] = getExtension(post['mediaURL'])
|
2018-08-06 17:13:07 +12:00
|
|
|
|
2018-07-10 07:58:11 +12:00
|
|
|
title = nameCorrector(post['postTitle'])
|
2018-08-06 17:13:07 +12:00
|
|
|
|
|
|
|
"""Filenames are declared here"""
|
|
|
|
|
2018-07-25 03:55:33 +12:00
|
|
|
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+post['postExt'])
|
2018-07-10 07:58:11 +12:00
|
|
|
|
2018-07-24 21:44:53 +12:00
|
|
|
fileDir = directory / (
|
2018-07-25 03:55:33 +12:00
|
|
|
post["postSubmitter"]
|
|
|
|
+ "_" + title
|
2018-07-24 21:44:53 +12:00
|
|
|
+ "_" + post['postId']
|
|
|
|
+ post['postExt']
|
|
|
|
)
|
|
|
|
|
|
|
|
tempDir = directory / (
|
2018-07-25 03:55:33 +12:00
|
|
|
post["postSubmitter"]
|
|
|
|
+ "_" + title
|
2018-07-24 21:44:53 +12:00
|
|
|
+ "_" + post['postId']
|
|
|
|
+ ".tmp"
|
|
|
|
)
|
2018-07-10 07:58:11 +12:00
|
|
|
|
|
|
|
try:
|
|
|
|
getFile(fileDir,tempDir,post['mediaURL'])
|
|
|
|
except FileNameTooLong:
|
|
|
|
fileDir = directory / post['postId'] + post['postExt']
|
|
|
|
tempDir = directory / post['postId'] + '.tmp'
|
|
|
|
getFile(fileDir,tempDir,post['mediaURL'])
|
|
|
|
|
|
|
|
elif content['type'] == 'album':
|
|
|
|
exceptionType = ""
|
|
|
|
images = content['object'].images
|
|
|
|
imagesLenght = len(images)
|
|
|
|
howManyDownloaded = imagesLenght
|
|
|
|
duplicates = 0
|
|
|
|
|
|
|
|
title = nameCorrector(post['postTitle'])
|
2018-07-25 03:55:33 +12:00
|
|
|
print(post["postSubmitter"]+"_"+title+"_"+post['postId'],end="\n\n")
|
2018-07-10 07:58:11 +12:00
|
|
|
|
2018-07-24 21:44:53 +12:00
|
|
|
folderDir = directory / (
|
2018-07-25 03:55:33 +12:00
|
|
|
post["postSubmitter"] + "_" + title + "_" + post['postId']
|
2018-07-24 21:44:53 +12:00
|
|
|
)
|
2018-07-10 07:58:11 +12:00
|
|
|
|
|
|
|
try:
|
|
|
|
if not os.path.exists(folderDir):
|
|
|
|
os.makedirs(folderDir)
|
|
|
|
except FileNotFoundError:
|
|
|
|
folderDir = directory / post['postId']
|
|
|
|
os.makedirs(folderDir)
|
|
|
|
|
|
|
|
for i in range(imagesLenght):
|
|
|
|
try:
|
|
|
|
imageURL = images[i]['mp4']
|
|
|
|
except KeyError:
|
|
|
|
imageURL = images[i]['link']
|
|
|
|
|
|
|
|
images[i]['Ext'] = getExtension(imageURL)
|
|
|
|
|
|
|
|
fileName = (str(i+1)
|
|
|
|
+ "_"
|
|
|
|
+ nameCorrector(str(images[i]['title']))
|
|
|
|
+ "_"
|
|
|
|
+ images[i]['id'])
|
|
|
|
|
2018-08-06 17:13:07 +12:00
|
|
|
"""Filenames are declared here"""
|
|
|
|
|
2018-07-10 07:58:11 +12:00
|
|
|
fileDir = folderDir / (fileName + images[i]['Ext'])
|
|
|
|
tempDir = folderDir / (fileName + ".tmp")
|
|
|
|
|
|
|
|
print(" ({}/{})".format(i+1,imagesLenght))
|
|
|
|
print(" {}".format(fileName+images[i]['Ext']))
|
|
|
|
|
|
|
|
try:
|
|
|
|
getFile(fileDir,tempDir,imageURL,indent=2)
|
|
|
|
print()
|
|
|
|
except FileAlreadyExistsError:
|
|
|
|
print(" The file already exists" + " "*10,end="\n\n")
|
|
|
|
duplicates += 1
|
|
|
|
howManyDownloaded -= 1
|
|
|
|
|
|
|
|
# IF FILE NAME IS TOO LONG, IT WONT REGISTER
|
|
|
|
except FileNameTooLong:
|
|
|
|
fileName = (str(i+1) + "_" + images[i]['id'])
|
|
|
|
fileDir = folderDir / (fileName + images[i]['Ext'])
|
|
|
|
tempDir = folderDir / (fileName + ".tmp")
|
|
|
|
try:
|
|
|
|
getFile(fileDir,tempDir,imageURL,indent=2)
|
|
|
|
# IF STILL TOO LONG
|
|
|
|
except FileNameTooLong:
|
|
|
|
fileName = str(i+1)
|
|
|
|
fileDir = folderDir / (fileName + images[i]['Ext'])
|
|
|
|
tempDir = folderDir / (fileName + ".tmp")
|
|
|
|
getFile(fileDir,tempDir,imageURL,indent=2)
|
|
|
|
|
|
|
|
except Exception as exception:
|
|
|
|
print("\n Could not get the file")
|
2018-07-25 04:27:52 +12:00
|
|
|
print(
|
|
|
|
" "
|
|
|
|
+ "{class_name}: {info}".format(
|
|
|
|
class_name=exception.__class__.__name__,
|
|
|
|
info=str(exception)
|
|
|
|
)
|
|
|
|
+ "\n"
|
|
|
|
)
|
2018-07-10 07:58:11 +12:00
|
|
|
exceptionType = exception
|
|
|
|
howManyDownloaded -= 1
|
|
|
|
|
|
|
|
if duplicates == imagesLenght:
|
|
|
|
raise FileAlreadyExistsError
|
2018-07-24 08:16:56 +12:00
|
|
|
elif howManyDownloaded + duplicates < imagesLenght:
|
2018-07-19 23:56:00 +12:00
|
|
|
raise AlbumNotDownloadedCompletely(
|
|
|
|
"Album Not Downloaded Completely"
|
|
|
|
)
|
2018-07-10 07:58:11 +12:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def initImgur():
|
|
|
|
"""Initialize imgur api"""
|
|
|
|
|
|
|
|
config = GLOBAL.config
|
2018-07-12 09:09:20 +12:00
|
|
|
return imgurpython.ImgurClient(
|
2018-07-10 07:58:11 +12:00
|
|
|
config['imgur_client_id'],
|
|
|
|
config['imgur_client_secret']
|
|
|
|
)
|
|
|
|
def getId(self,submissionURL):
|
|
|
|
"""Extract imgur post id
|
|
|
|
and determine if its a single image or album
|
|
|
|
"""
|
|
|
|
|
|
|
|
domainLenght = len("imgur.com/")
|
|
|
|
if submissionURL[-1] == "/":
|
|
|
|
submissionURL = submissionURL[:-1]
|
|
|
|
|
|
|
|
if "a/" in submissionURL or "gallery/" in submissionURL:
|
|
|
|
albumId = submissionURL.split("/")[-1]
|
|
|
|
return {'id':albumId, 'type':'album'}
|
|
|
|
|
|
|
|
else:
|
|
|
|
url = submissionURL.replace('.','/').split('/')
|
|
|
|
imageId = url[url.index('com')+1]
|
|
|
|
return {'id':imageId, 'type':'image'}
|
|
|
|
|
|
|
|
def getLink(self,identity):
|
|
|
|
"""Request imgur object from imgur api
|
|
|
|
"""
|
|
|
|
|
|
|
|
if identity['type'] == 'image':
|
|
|
|
return {'object':self.imgurClient.get_image(identity['id']),
|
|
|
|
'type':'image'}
|
|
|
|
elif identity['type'] == 'album':
|
|
|
|
return {'object':self.imgurClient.get_album(identity['id']),
|
|
|
|
'type':'album'}
|
2018-07-12 11:06:16 +12:00
|
|
|
@staticmethod
|
2018-07-10 07:58:11 +12:00
|
|
|
def get_credits():
|
2018-07-12 11:06:16 +12:00
|
|
|
return Imgur.initImgur().get_credits()
|
2018-07-10 07:58:11 +12:00
|
|
|
|
|
|
|
class Gfycat:
|
|
|
|
def __init__(self,directory,POST):
|
|
|
|
try:
|
|
|
|
POST['mediaURL'] = self.getLink(POST['postURL'])
|
|
|
|
except IndexError:
|
2018-07-19 23:56:00 +12:00
|
|
|
raise NotADownloadableLinkError("Could not read the page source")
|
2018-07-10 07:58:11 +12:00
|
|
|
except Exception as exception:
|
2018-08-08 09:47:34 +12:00
|
|
|
#debug
|
|
|
|
raise exception
|
2018-07-19 23:56:00 +12:00
|
|
|
raise NotADownloadableLinkError("Could not read the page source")
|
2018-07-10 07:58:11 +12:00
|
|
|
|
|
|
|
POST['postExt'] = getExtension(POST['mediaURL'])
|
2018-08-06 17:13:07 +12:00
|
|
|
|
2018-07-10 07:58:11 +12:00
|
|
|
if not os.path.exists(directory): os.makedirs(directory)
|
|
|
|
title = nameCorrector(POST['postTitle'])
|
2018-08-06 17:13:07 +12:00
|
|
|
|
|
|
|
"""Filenames are declared here"""
|
|
|
|
|
2018-07-25 03:55:33 +12:00
|
|
|
print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])
|
2018-07-10 07:58:11 +12:00
|
|
|
|
2018-07-24 21:44:53 +12:00
|
|
|
fileDir = directory / (
|
2018-07-25 03:55:33 +12:00
|
|
|
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']
|
2018-07-24 21:44:53 +12:00
|
|
|
)
|
|
|
|
tempDir = directory / (
|
2018-07-25 03:55:33 +12:00
|
|
|
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp"
|
2018-07-24 21:44:53 +12:00
|
|
|
)
|
|
|
|
|
2018-07-10 10:30:50 +12:00
|
|
|
try:
|
|
|
|
getFile(fileDir,tempDir,POST['mediaURL'])
|
|
|
|
except FileNameTooLong:
|
|
|
|
fileDir = directory / (POST['postId']+POST['postExt'])
|
|
|
|
tempDir = directory / (POST['postId']+".tmp")
|
2018-07-10 07:58:11 +12:00
|
|
|
|
2018-07-10 10:30:50 +12:00
|
|
|
getFile(fileDir,tempDir,POST['mediaURL'])
|
2018-07-10 07:58:11 +12:00
|
|
|
|
|
|
|
def getLink(self, url, query='<source id="mp4Source" src=', lineNumber=105):
|
|
|
|
"""Extract direct link to the video from page's source
|
|
|
|
and return it
|
|
|
|
"""
|
|
|
|
|
|
|
|
if '.webm' in url or '.mp4' in url or '.gif' in url:
|
|
|
|
return url
|
|
|
|
|
|
|
|
if url[-1:] == '/':
|
|
|
|
url = url[:-1]
|
|
|
|
|
2018-07-20 00:22:12 +12:00
|
|
|
url = "https://gfycat.com/" + url.split('/')[-1]
|
2018-07-10 07:58:11 +12:00
|
|
|
|
|
|
|
pageSource = (urllib.request.urlopen(url).read().decode().split('\n'))
|
|
|
|
|
|
|
|
theLine = pageSource[lineNumber]
|
|
|
|
lenght = len(query)
|
|
|
|
link = []
|
|
|
|
|
|
|
|
for i in range(len(theLine)):
|
|
|
|
if theLine[i:i+lenght] == query:
|
|
|
|
cursor = (i+lenght)+1
|
|
|
|
while not theLine[cursor] == '"':
|
|
|
|
link.append(theLine[cursor])
|
|
|
|
cursor += 1
|
|
|
|
break
|
|
|
|
|
|
|
|
if "".join(link) == "":
|
2018-07-19 23:56:00 +12:00
|
|
|
raise NotADownloadableLinkError("Could not read the page source")
|
2018-07-10 07:58:11 +12:00
|
|
|
|
|
|
|
return "".join(link)
|
|
|
|
|
|
|
|
class Direct:
|
|
|
|
def __init__(self,directory,POST):
|
|
|
|
POST['postExt'] = getExtension(POST['postURL'])
|
|
|
|
if not os.path.exists(directory): os.makedirs(directory)
|
|
|
|
title = nameCorrector(POST['postTitle'])
|
2018-08-06 17:13:07 +12:00
|
|
|
|
|
|
|
"""Filenames are declared here"""
|
|
|
|
|
2018-07-25 03:55:33 +12:00
|
|
|
print(POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt'])
|
2018-07-10 07:58:11 +12:00
|
|
|
|
2018-07-24 21:44:53 +12:00
|
|
|
fileDir = directory / (
|
2018-07-25 03:55:33 +12:00
|
|
|
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+POST['postExt']
|
2018-07-24 21:44:53 +12:00
|
|
|
)
|
|
|
|
tempDir = directory / (
|
2018-07-25 03:55:33 +12:00
|
|
|
POST["postSubmitter"]+"_"+title+"_"+POST['postId']+".tmp"
|
2018-07-24 21:44:53 +12:00
|
|
|
)
|
2018-07-10 07:58:11 +12:00
|
|
|
|
2018-07-10 10:30:50 +12:00
|
|
|
try:
|
|
|
|
getFile(fileDir,tempDir,POST['postURL'])
|
|
|
|
except FileNameTooLong:
|
|
|
|
fileDir = directory / (POST['postId']+POST['postExt'])
|
|
|
|
tempDir = directory / (POST['postId']+".tmp")
|
|
|
|
|
|
|
|
getFile(fileDir,tempDir,POST['postURL'])
|
|
|
|
|
|
|
|
class Self:
|
|
|
|
def __init__(self,directory,post):
|
|
|
|
if not os.path.exists(directory): os.makedirs(directory)
|
|
|
|
|
|
|
|
title = nameCorrector(post['postTitle'])
|
2018-08-06 17:13:07 +12:00
|
|
|
|
|
|
|
"""Filenames are declared here"""
|
|
|
|
|
2018-07-25 03:55:33 +12:00
|
|
|
print(post["postSubmitter"]+"_"+title+"_"+post['postId']+".md")
|
2018-07-10 10:30:50 +12:00
|
|
|
|
2018-07-24 21:44:53 +12:00
|
|
|
fileDir = directory / (
|
2018-07-25 03:55:33 +12:00
|
|
|
post["postSubmitter"]+"_"+title+"_"+post['postId']+".md"
|
2018-07-24 21:44:53 +12:00
|
|
|
)
|
2018-07-10 10:30:50 +12:00
|
|
|
|
|
|
|
if Path.is_file(fileDir):
|
|
|
|
raise FileAlreadyExistsError
|
2018-07-10 10:44:28 +12:00
|
|
|
|
|
|
|
try:
|
|
|
|
self.writeToFile(fileDir,post)
|
|
|
|
except FileNotFoundError:
|
|
|
|
fileDir = post['postId']+".md"
|
|
|
|
fileDir = directory / fileDir
|
2018-07-10 10:30:50 +12:00
|
|
|
|
2018-07-10 10:44:28 +12:00
|
|
|
self.writeToFile(fileDir,post)
|
2018-07-10 10:30:50 +12:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def writeToFile(directory,post):
|
2018-08-06 17:13:07 +12:00
|
|
|
|
|
|
|
"""Self posts are formatted here"""
|
2018-07-10 10:30:50 +12:00
|
|
|
content = ("## ["
|
|
|
|
+ post["postTitle"]
|
|
|
|
+ "]("
|
|
|
|
+ post["postURL"]
|
|
|
|
+ ")\n"
|
|
|
|
+ post["postContent"]
|
|
|
|
+ "\n\n---\n\n"
|
2018-07-25 03:55:33 +12:00
|
|
|
+ "submitted to [r/"
|
|
|
|
+ post["postSubreddit"]
|
|
|
|
+ "](https://www.reddit.com/r/"
|
|
|
|
+ post["postSubreddit"]
|
|
|
|
+ ") by [u/"
|
2018-07-10 10:30:50 +12:00
|
|
|
+ post["postSubmitter"]
|
|
|
|
+ "](https://www.reddit.com/user/"
|
|
|
|
+ post["postSubmitter"]
|
|
|
|
+ ")")
|
|
|
|
|
|
|
|
with io.open(directory,"w",encoding="utf-8") as FILE:
|
|
|
|
VanillaPrint(content,file=FILE)
|
|
|
|
|
2018-07-10 11:45:55 +12:00
|
|
|
print("Downloaded")
|