1
0
Fork 0
mirror of synced 2024-05-02 11:22:33 +12:00
bulk-downloader-for-reddit/script.py
2018-07-09 22:58:11 +03:00

534 lines
16 KiB
Python

#!/usr/bin/env python
"""
This program downloads imgur, gfycat and direct image and video links of
saved posts from a reddit account. It is written in Python 3.
"""
import argparse
import os
import sys
import time
from pathlib import Path, PurePath
from src.downloader import Direct, Gfycat, Imgur
from src.parser import LinkDesigner
from src.searcher import getPosts
from src.tools import (GLOBAL, createLogFile, jsonFile, nameCorrector,
printToFile)
from src.errors import *
__author__ = "Ali Parlakci"
__license__ = "GPL"
__version__ = "1.0.0"
__maintainer__ = "Ali Parlakci"
__email__ = "parlakciali@gmail.com"
def debug(*post):
GLOBAL.config = getConfig('config.json')
GLOBAL.directory = Path(".\\debug\\")
download([*post])
quit()
def getConfig(configFileName):
"""Read credentials from config.json file"""
keys = ['imgur_client_id',
'imgur_client_secret']
if os.path.exists(configFileName):
FILE = jsonFile(configFileName)
content = FILE.read()
if "reddit_refresh_token" in content:
if content["reddit_refresh_token"] == "":
FILE.delete("reddit_refresh_token")
for key in keys:
try:
if content[key] == "":
raise KeyError
except KeyError:
print(key,": ")
FILE.add({key:input()})
return jsonFile(configFileName).read()
else:
FILE = jsonFile(configFileName)
configDictionary = {}
for key in keys:
configDictionary[key] = input(key + ": ")
FILE.add(configDictionary)
return FILE.read()
def parseArguments(arguments=[]):
"""Initialize argparse and add arguments"""
parser = argparse.ArgumentParser(allow_abbrev=False,
description="This program downloads " \
"media from reddit " \
"posts")
parser.add_argument("directory",
help="Specifies the directory where posts will be " \
"downloaded to",
metavar="DIRECTORY")
parser.add_argument("--link","-l",
help="Get posts from link",
metavar="link")
parser.add_argument("--saved",
action="store_true",
help="Triggers saved mode")
parser.add_argument("--submitted",
action="store_true",
help="Gets posts of --user")
parser.add_argument("--upvoted",
action="store_true",
help="Gets upvoted posts of --user")
parser.add_argument("--log",
help="Triggers log read mode and takes a log file",
# type=argparse.FileType('r'),
metavar="LOG FILE")
parser.add_argument("--subreddit",
nargs="+",
help="Triggers subreddit mode and takes subreddit's " \
"name without r/. use \"frontpage\" for frontpage",
metavar="SUBREDDIT",
type=str)
parser.add_argument("--multireddit",
help="Triggers multireddit mode and takes "\
"multireddit's name without m/",
metavar="MULTIREDDIT",
type=str)
parser.add_argument("--user",
help="reddit username if needed. use \"me\" for " \
"current user",
required="--multireddit" in sys.argv or \
"--submitted" in sys.argv,
metavar="redditor",
type=str)
parser.add_argument("--search",
help="Searches for given query in given subreddits",
metavar="query",
type=str)
parser.add_argument("--sort",
help="Either hot, top, new, controversial, rising " \
"or relevance default: hot",
choices=[
"hot","top","new","controversial","rising",
"relevance"
],
metavar="SORT TYPE",
type=str)
parser.add_argument("--limit",
help="default: unlimited",
metavar="Limit",
default=None,
type=int)
parser.add_argument("--time",
help="Either hour, day, week, month, year or all." \
" default: all",
choices=["all","hour","day","week","month","year"],
metavar="TIME_LIMIT",
type=str)
parser.add_argument("--NoDownload",
help="Just gets the posts and store them in a file" \
" for downloading later",
action="store_true",
default=False)
if arguments == []:
return parser.parse_args()
else:
return parser.parse_args(arguments)
def checkConflicts():
"""Check if command-line arguments are given correcly,
if not, raise errors
"""
if GLOBAL.arguments.saved is False:
saved = 0
else:
saved = 1
if GLOBAL.arguments.subreddit is None:
subreddit = 0
else:
subreddit = 1
if GLOBAL.arguments.submitted is False:
submitted = 0
else:
submitted = 1
if GLOBAL.arguments.search is None:
search = 0
else:
search = 1
if GLOBAL.arguments.log is None:
log = 0
else:
log = 1
if GLOBAL.arguments.link is None:
link = 0
else:
link = 1
if GLOBAL.arguments.user is None:
user = 0
else:
user = 1
if GLOBAL.arguments.upvoted is False:
upvoted = 0
else:
upvoted = 1
if not saved+subreddit+log+link+submitted+upvoted == 1:
print("Program mode is invalid")
quit()
if search+subreddit == 2:
print("You cannot search in your saved posts")
quit()
if search+submitted == 2:
print("You cannot search in submitted posts")
quit()
if search+upvoted == 2:
print("You cannot search in upvoted posts")
quit()
if upvoted+subreddit == 1 and user == 0:
print("No redditor name given")
quit()
def postFromLog(fileName):
"""Analyze a log file and return a list of dictionaries containing
submissions
"""
if Path.is_file(Path(fileName)):
content = jsonFile(fileName).read()
else:
print("File not found")
quit()
try:
del content["HEADER"]
except KeyError:
pass
posts = []
for post in content:
if not content[post][-1]['postType'] == None:
posts.append(content[post][-1])
return posts
def prepareAttributes():
ATTRIBUTES = {}
if GLOBAL.arguments.user is not None:
ATTRIBUTES["user"] = GLOBAL.arguments.user
if GLOBAL.arguments.search is not None:
ATTRIBUTES["search"] = GLOBAL.arguments.search
if GLOBAL.arguments.sort == "hot" or \
GLOBAL.arguments.sort == "controversial" or \
GLOBAL.arguments.sort == "rising":
GLOBAL.arguments.sort = "relevance"
if GLOBAL.arguments.sort is not None:
ATTRIBUTES["sort"] = GLOBAL.arguments.sort
else:
if GLOBAL.arguments.submitted:
ATTRIBUTES["sort"] = "new"
else:
ATTRIBUTES["sort"] = "hot"
if GLOBAL.arguments.time is not None:
ATTRIBUTES["time"] = GLOBAL.arguments.time
else:
ATTRIBUTES["time"] = "all"
if GLOBAL.arguments.link is not None:
GLOBAL.arguments.link = GLOBAL.arguments.link.strip("\"")
try:
ATTRIBUTES = LinkDesigner(GLOBAL.arguments.link)
except InvalidRedditLink:
raise InvalidRedditLink
if GLOBAL.arguments.search is not None:
ATTRIBUTES["search"] = GLOBAL.arguments.search
if GLOBAL.arguments.sort is not None:
ATTRIBUTES["sort"] = GLOBAL.arguments.sort
if GLOBAL.arguments.time is not None:
ATTRIBUTES["time"] = GLOBAL.arguments.time
elif GLOBAL.arguments.subreddit is not None:
GLOBAL.arguments.subreddit = "+".join(GLOBAL.arguments.subreddit)
ATTRIBUTES["subreddit"] = GLOBAL.arguments.subreddit
elif GLOBAL.arguments.saved is True:
ATTRIBUTES["saved"] = True
elif GLOBAL.arguments.upvoted is True:
ATTRIBUTES["upvoted"] = True
elif GLOBAL.arguments.submitted is not None:
ATTRIBUTES["submitted"] = True
if GLOBAL.arguments.sort == "rising":
raise InvalidSortingType
ATTRIBUTES["limit"] = GLOBAL.arguments.limit
return ATTRIBUTES
def postExists(POST):
"""Figure out a file's name and checks if the file already exists"""
title = nameCorrector(POST['postTitle'])
FILENAME = title + "_" + POST['postId']
PATH = GLOBAL.directory / POST["postSubreddit"]
possibleExtensions = [".jpg",".png",".mp4",".gif",".webm"]
for i in range(2):
for extension in possibleExtensions:
FILE_PATH = PATH / (FILENAME+extension)
if FILE_PATH.exists():
return True
else:
FILENAME = POST['postId']
else:
return False
def download(submissions):
"""Analyze list of submissions and call the right function
to download each one, catch errors, update the log files
"""
subsLenght = len(submissions)
lastRequestTime = 0
downloadedCount = subsLenght
duplicates = 0
BACKUP = {}
FAILED_FILE = createLogFile("FAILED")
for i in range(subsLenght):
print("\n({}/{})".format(i+1,subsLenght))
print(
"https://reddit.com/r/{subreddit}/comments/{id}".format(
subreddit=submissions[i]['postSubreddit'],
id=submissions[i]['postId']
)
)
if postExists(submissions[i]):
result = False
print("It already exists")
duplicates += 1
downloadedCount -= 1
continue
directory = GLOBAL.directory / submissions[i]['postSubreddit']
if submissions[i]['postType'] == 'imgur':
print("IMGUR",end="")
while int(time.time() - lastRequestTime) <= 2:
pass
credit = Imgur.get_credits()
IMGUR_RESET_TIME = credit['UserReset']-time.time()
USER_RESET = ("after " \
+ str(int(IMGUR_RESET_TIME/60)) \
+ " Minutes " \
+ str(int(IMGUR_RESET_TIME%60)) \
+ " Seconds")
print(
" => Client: {} - User: {} - Reset {}".format(
credit['ClientRemaining'],
credit['UserRemaining'],
USER_RESET
)
)
if not (credit['UserRemaining'] == 0 or \
credit['ClientRemaining'] == 0):
"""This block of code is needed
"""
while int(time.time() - lastRequestTime) <= 2:
pass
lastRequestTime = time.time()
try:
Imgur(directory,submissions[i])
except FileAlreadyExistsError:
print("It already exists")
duplicates += 1
downloadedCount -= 1
except ImgurLoginError:
print(
"Imgur login failed. Quitting the program "\
"as unexpected errors might occur."
)
quit()
except Exception as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
else:
if credit['UserRemaining'] == 0:
KEYWORD = "user"
elif credit['ClientRemaining'] == 0:
KEYWORD = "client"
print('{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()))
FAILED_FILE.add(
{int(i+1):['{} LIMIT EXCEEDED\n'.format(KEYWORD.upper()),
submissions[i]]}
)
downloadedCount -= 1
elif submissions[i]['postType'] == 'gfycat':
print("GFYCAT")
try:
Gfycat(directory,submissions[i])
except FileAlreadyExistsError:
print("It already exists")
duplicates += 1
downloadedCount -= 1
except NotADownloadableLinkError as exception:
print("Could not read the page source")
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
except Exception as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
elif submissions[i]['postType'] == 'direct':
print("DIRECT")
try:
Direct(directory,submissions[i])
except FileAlreadyExistsError:
print("It already exists")
downloadedCount -= 1
duplicates += 1
except Exception as exception:
print(exception)
FAILED_FILE.add({int(i+1):[str(exception),submissions[i]]})
downloadedCount -= 1
else:
print("No match found, skipping...")
downloadedCount -= 1
if duplicates:
print("\n There was {} duplicates".format(duplicates))
if downloadedCount == 0:
print(" Nothing downloaded :(")
else:
print(" Total of {} links downloaded!".format(downloadedCount))
def main():
if sys.argv[-1].endswith(__file__):
GLOBAL.arguments = parseArguments(input("> ").split())
else:
GLOBAL.arguments = parseArguments()
if GLOBAL.arguments.directory is not None:
GLOBAL.directory = Path(GLOBAL.arguments.directory)
else:
print("Invalid directory")
quit()
GLOBAL.config = getConfig(Path(PurePath(__file__).parent / 'config.json'))
checkConflicts()
mode = prepareAttributes()
print(sys.argv)
if GLOBAL.arguments.log is not None:
logDir = Path(GLOBAL.arguments.log)
download(postFromLog(logDir))
quit()
try:
POSTS = getPosts(prepareAttributes())
except InsufficientPermission:
print("You do not have permission to do that")
quit()
except NoMatchingSubmissionFound:
print("No matching submission was found")
quit()
except NoRedditSupoort:
print("Reddit does not support that")
quit()
except NoPrawSupport:
print("PRAW does not support that")
quit()
except MultiredditNotFound:
print("Multireddit not found")
quit()
except InvalidSortingType:
print("Invalid sorting type has given")
quit()
except InvalidRedditLink:
print("Invalid reddit link")
quit()
if POSTS is None:
print("I could not find any posts in that URL")
quit()
if GLOBAL.arguments.NoDownload:
quit()
else:
download(POSTS)
if __name__ == "__main__":
try:
VanillaPrint = print
print = printToFile
GLOBAL.RUN_TIME = time.time()
main()
except KeyboardInterrupt:
print("\nQUITTING...")
quit()