1
0
Fork 0
mirror of synced 2024-06-13 15:54:37 +12:00
bulk-downloader-for-reddit/bulkredditdownloader/__main__.py

342 lines
11 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
This program downloads imgur, gfycat and direct image and video links of
saved posts from a reddit account. It is written in Python 3.
"""
import logging
import os
import sys
import time
from io import StringIO
from pathlib import Path
from prawcore.exceptions import InsufficientScope
2021-02-07 14:05:18 +13:00
from bulkredditdownloader.arguments import Arguments
from bulkredditdownloader.config import Config
2021-02-07 20:08:24 +13:00
from bulkredditdownloader.site_downloaders.direct import Direct
from bulkredditdownloader.site_downloaders.erome import Erome
from bulkredditdownloader.site_downloaders.gallery import Gallery
from bulkredditdownloader.site_downloaders.gfycat import Gfycat
from bulkredditdownloader.site_downloaders.gif_delivery_network import GifDeliveryNetwork
from bulkredditdownloader.site_downloaders.imgur import Imgur
from bulkredditdownloader.site_downloaders.redgifs import Redgifs
from bulkredditdownloader.site_downloaders.self_post import SelfPost
from bulkredditdownloader.site_downloaders.vreddit import VReddit
from bulkredditdownloader.site_downloaders.youtube import Youtube
2021-02-07 14:05:18 +13:00
from bulkredditdownloader.errors import (AlbumNotDownloadedCompletely, DomainInSkip, FailedToDownload, FileAlreadyExistsError,
ImgurLimitError, ImgurLoginError, InvalidJSONFile, NoSuitablePost, NotADownloadableLinkError,
TypeInSkip, full_exc_info)
2021-02-07 14:09:31 +13:00
from bulkredditdownloader.json_helper import JsonFile
from bulkredditdownloader.program_mode import ProgramMode
2021-02-07 14:05:18 +13:00
from bulkredditdownloader.reddit import Reddit
from bulkredditdownloader.searcher import getPosts
from bulkredditdownloader.store import Store
from bulkredditdownloader.utils import GLOBAL, createLogFile, nameCorrector, printToFile
from time import sleep
__author__ = "Ali Parlakci"
__license__ = "GPL"
2021-04-18 21:09:10 +12:00
__version__ = "1.10.0"
__maintainer__ = "Ali Parlakci"
__email__ = "parlakciali@gmail.com"
def postFromLog(filename):
"""Analyze a log file and return a list of dictionaries containing
submissions
"""
if Path.is_file(Path(filename)):
content = JsonFile(filename).read()
else:
print("File not found")
sys.exit()
try:
del content["HEADER"]
except KeyError:
pass
posts = []
for post in content:
if not content[post][-1]['TYPE'] is None:
posts.append(content[post][-1])
return posts
def isPostExists(post, directory):
"""Figure out a file's name and checks if the file already exists"""
filename = GLOBAL.config['filename'].format(**post)
possible_extensions = [".jpg", ".png", ".mp4", ".gif", ".webm", ".md", ".mkv", ".flv"]
for extension in possible_extensions:
path = directory / Path(filename + extension)
if path.exists():
return True
return False
def downloadPost(submission, directory):
downloaders = {
"imgur": Imgur, "gfycat": Gfycat, "erome": Erome, "direct": Direct, "self": SelfPost,
"redgifs": Redgifs, "gifdeliverynetwork": GifDeliveryNetwork,
"v.redd.it": VReddit, "youtube": Youtube, "gallery": Gallery
}
print()
if submission['TYPE'] in downloaders:
downloaders[submission['TYPE']](directory, submission)
else:
raise NoSuitablePost
def download(submissions):
"""Analyze list of submissions and call the right function
to download each one, catch errors, update the log files
"""
downloaded_count = 0
duplicates = 0
failed_file = createLogFile("FAILED")
if GLOBAL.arguments.unsave:
reddit = Reddit(GLOBAL.config['credentials']['reddit']).begin()
subs_length = len(submissions)
for i in range(len(submissions)):
print(f"\n({i+1}/{subs_length})", end="")
print(submissions[i]['POSTID'],
f"r/{submissions[i]['SUBREDDIT']}",
f"u/{submissions[i]['REDDITOR']}",
submissions[i]['FLAIR'] if submissions[i]['FLAIR'] else "",
sep="",
end="")
print(f" {submissions[i]['TYPE'].upper()}", end="", no_print=True)
directory = GLOBAL.directory / \
GLOBAL.config["folderpath"].format(**submissions[i])
details = {
**submissions[i],
**{"TITLE": nameCorrector(
submissions[i]['TITLE'],
reference=str(directory)
+ GLOBAL.config['filename'].format(**submissions[i])
+ ".ext")}
}
filename = GLOBAL.config['filename'].format(**details)
if isPostExists(details, directory):
print()
print(directory)
print(filename)
print("It already exists")
duplicates += 1
continue
if any(domain in submissions[i]['CONTENTURL'] for domain in GLOBAL.arguments.skip):
print()
print(submissions[i]['CONTENTURL'])
print("Domain found in skip domains, skipping post...")
continue
try:
downloadPost(details, directory)
GLOBAL.downloadedPosts.add(details['POSTID'])
try:
if GLOBAL.arguments.unsave:
reddit.submission(id=details['POSTID']).unsave()
except InsufficientScope:
reddit = Reddit().begin()
reddit.submission(id=details['POSTID']).unsave()
downloaded_count += 1
except FileAlreadyExistsError:
print("It already exists")
GLOBAL.downloadedPosts.add(details['POSTID'])
duplicates += 1
except ImgurLoginError:
print("Imgur login failed. \nQuitting the program as unexpected errors might occur.")
sys.exit()
except ImgurLimitError as exception:
failed_file.add({int(i + 1): [
"{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception)), details
]})
except NotADownloadableLinkError as exception:
print("{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception)))
failed_file.add({int(i + 1): [
"{class_name}: {info}".format(class_name=exception.__class__.__name__, info=str(exception)),
submissions[i]
]})
except TypeInSkip:
print()
print(submissions[i]['CONTENTURL'])
print("Skipping post...")
except DomainInSkip:
print()
print(submissions[i]['CONTENTURL'])
print("Skipping post...")
except NoSuitablePost:
print("No match found, skipping...")
except FailedToDownload:
print("Failed to download the posts, skipping...")
except AlbumNotDownloadedCompletely:
print("Album did not downloaded completely.")
failed_file.add({int(i + 1): [
"{class_name}: {info}".format(class_name=exc.__class__.__name__, info=str(exc)),
submissions[i]
]})
except Exception as exc:
print("{class_name}: {info}\nSee CONSOLE_LOG.txt for more information".format(
class_name=exc.__class__.__name__, info=str(exc))
)
logging.error(sys.exc_info()[0].__name__, exc_info=full_exc_info(sys.exc_info()))
print(GLOBAL.log_stream.getvalue(), no_print=True)
failed_file.add({int(i + 1): [
"{class_name}: {info}".format(class_name=exc.__class__.__name__, info=str(exc)),
submissions[i]
]})
if duplicates:
print(f"\nThere {'were' if duplicates > 1 else 'was'} {duplicates} duplicate{'s' if duplicates > 1 else ''}")
if downloaded_count == 0:
print("Nothing is downloaded :(")
else:
print(f"Total of {downloaded_count} link{'s' if downloaded_count > 1 else ''} downloaded!")
def printLogo():
VanillaPrint(f"\nBulk Downloader for Reddit v{__version__}\n"
f"Written by Ali PARLAKCI parlakciali@gmail.com\n\n"
f"https://github.com/aliparlakci/bulk-downloader-for-reddit/\n"
)
def main():
if Path("config.json").exists():
GLOBAL.configDirectory = Path("config.json")
else:
if not Path(GLOBAL.defaultConfigDirectory).is_dir():
os.makedirs(GLOBAL.defaultConfigDirectory)
GLOBAL.configDirectory = GLOBAL.defaultConfigDirectory / "config.json"
try:
GLOBAL.config = Config(GLOBAL.configDirectory).generate()
except InvalidJSONFile as exception:
VanillaPrint(str(exception.__class__.__name__), ">>", str(exception))
VanillaPrint("Resolve it or remove it to proceed")
sys.exit()
sys.argv = sys.argv + GLOBAL.config["options"].split()
arguments = Arguments.parse()
GLOBAL.arguments = arguments
if arguments.set_filename:
Config(GLOBAL.configDirectory).setCustomFileName()
sys.exit()
if arguments.set_folderpath:
Config(GLOBAL.configDirectory).setCustomFolderPath()
sys.exit()
if arguments.set_default_directory:
Config(GLOBAL.configDirectory).setDefaultDirectory()
sys.exit()
if arguments.set_default_options:
Config(GLOBAL.configDirectory).setDefaultOptions()
sys.exit()
if arguments.use_local_config:
JsonFile("config.json").add(GLOBAL.config)
sys.exit()
if arguments.directory:
GLOBAL.directory = Path(arguments.directory.strip())
elif "default_directory" in GLOBAL.config and GLOBAL.config["default_directory"] != "":
GLOBAL.directory = Path(
GLOBAL.config["default_directory"].format(time=GLOBAL.RUN_TIME))
else:
GLOBAL.directory = Path(input("\ndownload directory: ").strip())
if arguments.downloaded_posts:
GLOBAL.downloadedPosts = Store(arguments.downloaded_posts)
else:
GLOBAL.downloadedPosts = Store()
printLogo()
print("\n", " ".join(sys.argv), "\n", no_print=True)
if arguments.log is not None:
log_dir = Path(arguments.log)
download(postFromLog(log_dir))
sys.exit()
program_mode = ProgramMode(arguments).generate()
try:
posts = getPosts(program_mode)
except Exception as exc:
logging.error(sys.exc_info()[0].__name__, exc_info=full_exc_info(sys.exc_info()))
print(GLOBAL.log_stream.getvalue(), no_print=True)
print(exc)
sys.exit()
if posts is None:
print("I could not find any posts in that URL")
sys.exit()
if GLOBAL.arguments.no_download:
pass
else:
download(posts)
if __name__ == "__main__":
GLOBAL.log_stream = StringIO()
logging.basicConfig(stream=GLOBAL.log_stream, level=logging.INFO)
try:
VanillaPrint = print
print = printToFile
GLOBAL.RUN_TIME = str(time.strftime("%d-%m-%Y_%H-%M-%S", time.localtime(time.time())))
main()
except KeyboardInterrupt:
if GLOBAL.directory is None:
2021-02-07 14:05:18 +13:00
GLOBAL.directory = Path("../..\\")
except Exception as exception:
if GLOBAL.directory is None:
2021-02-07 14:05:18 +13:00
GLOBAL.directory = Path("../..\\")
logging.error(sys.exc_info()[0].__name__, exc_info=full_exc_info(sys.exc_info()))
print(GLOBAL.log_stream.getvalue())
if not GLOBAL.arguments.quit:
input("\nPress enter to quit\n")