1
0
Fork 0
mirror of synced 2024-05-19 11:42:40 +12:00
bulk-downloader-for-reddit/bdfr/__main__.py

223 lines
7.7 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
import sys
2021-03-11 14:20:59 +13:00
import click
import requests
2021-03-11 14:20:59 +13:00
from bdfr import __version__
2021-04-12 19:58:32 +12:00
from bdfr.archiver import Archiver
2021-07-05 18:58:33 +12:00
from bdfr.cloner import RedditCloner
2022-12-17 08:56:44 +13:00
from bdfr.completion import Completion
2021-04-12 19:58:32 +12:00
from bdfr.configuration import Configuration
from bdfr.downloader import RedditDownloader
2021-02-11 12:10:40 +13:00
logger = logging.getLogger()
2021-03-14 14:11:37 +13:00
_common_options = [
2022-12-03 18:11:17 +13:00
click.argument("directory", type=str),
click.option("--authenticate", is_flag=True, default=None),
click.option("--config", type=str, default=None),
click.option("--disable-module", multiple=True, default=None, type=str),
click.option("--exclude-id", default=None, multiple=True),
click.option("--exclude-id-file", default=None, multiple=True),
click.option("--file-scheme", default=None, type=str),
2023-01-04 22:04:31 +13:00
click.option("--filename-restriction-scheme", type=click.Choice(("linux", "windows")), default=None),
2022-12-03 18:11:17 +13:00
click.option("--folder-scheme", default=None, type=str),
click.option("--ignore-user", type=str, multiple=True, default=None),
click.option("--include-id-file", multiple=True, default=None),
click.option("--log", type=str, default=None),
2023-01-04 22:04:31 +13:00
click.option("--opts", type=str, default=None),
2022-12-03 18:11:17 +13:00
click.option("--saved", is_flag=True, default=None),
click.option("--search", default=None, type=str),
click.option("--submitted", is_flag=True, default=None),
click.option("--subscribed", is_flag=True, default=None),
click.option("--time-format", type=str, default=None),
click.option("--upvoted", is_flag=True, default=None),
click.option("-L", "--limit", default=None, type=int),
click.option("-l", "--link", multiple=True, default=None, type=str),
click.option("-m", "--multireddit", multiple=True, default=None, type=str),
click.option(
"-S", "--sort", type=click.Choice(("hot", "top", "new", "controversial", "rising", "relevance")), default=None
),
click.option("-s", "--subreddit", multiple=True, default=None, type=str),
click.option("-t", "--time", type=click.Choice(("all", "hour", "day", "week", "month", "year")), default=None),
click.option("-u", "--user", type=str, multiple=True, default=None),
click.option("-v", "--verbose", default=None, count=True),
2021-03-14 14:11:37 +13:00
]
_downloader_options = [
2022-12-03 18:11:17 +13:00
click.option("--make-hard-links", is_flag=True, default=None),
click.option("--max-wait-time", type=int, default=None),
click.option("--no-dupes", is_flag=True, default=None),
click.option("--search-existing", is_flag=True, default=None),
click.option("--skip", default=None, multiple=True),
click.option("--skip-domain", default=None, multiple=True),
click.option("--skip-subreddit", default=None, multiple=True),
click.option("--min-score", type=int, default=None),
click.option("--max-score", type=int, default=None),
click.option("--min-score-ratio", type=float, default=None),
click.option("--max-score-ratio", type=float, default=None),
]
_archiver_options = [
2022-12-03 18:11:17 +13:00
click.option("--all-comments", is_flag=True, default=None),
click.option("--comment-context", is_flag=True, default=None),
click.option("-f", "--format", type=click.Choice(("xml", "json", "yaml")), default=None),
]
2021-03-14 14:11:37 +13:00
def _add_options(opts: list):
def wrap(func):
for opt in opts:
func = opt(func)
return func
2022-12-03 18:11:17 +13:00
return wrap
2021-03-14 14:11:37 +13:00
2021-02-11 12:10:40 +13:00
def _check_version(context, param, value):
if not value or context.resilient_parsing:
return
current = __version__
latest = requests.get("https://pypi.org/pypi/bdfr/json").json()["info"]["version"]
print(f"You are currently using v{current} the latest is v{latest}")
context.exit()
2021-03-11 14:20:59 +13:00
@click.group()
2022-12-17 08:56:44 +13:00
@click.help_option("-h", "--help")
@click.option(
"--version",
is_flag=True,
is_eager=True,
expose_value=False,
callback=_check_version,
help="Check version and exit.",
)
2021-03-11 14:20:59 +13:00
def cli():
2022-12-17 08:56:44 +13:00
"""BDFR is used to download and archive content from Reddit."""
2021-03-11 14:20:59 +13:00
pass
2022-12-03 18:11:17 +13:00
@cli.command("download")
@_add_options(_common_options)
@_add_options(_downloader_options)
2022-12-17 08:56:44 +13:00
@click.help_option("-h", "--help")
2021-03-11 14:20:59 +13:00
@click.pass_context
def cli_download(context: click.Context, **_):
2022-12-17 08:56:44 +13:00
"""Used to download content posted to Reddit."""
2021-03-11 14:20:59 +13:00
config = Configuration()
config.process_click_arguments(context)
silence_module_loggers()
stream = make_console_logging_handler(config.verbose)
try:
reddit_downloader = RedditDownloader(config, [stream])
reddit_downloader.download()
except Exception:
2022-12-03 18:11:17 +13:00
logger.exception("Downloader exited unexpectedly")
raise
else:
2022-12-03 18:11:17 +13:00
logger.info("Program complete")
2021-02-11 12:10:40 +13:00
2022-12-03 18:11:17 +13:00
@cli.command("archive")
@_add_options(_common_options)
@_add_options(_archiver_options)
2022-12-17 08:56:44 +13:00
@click.help_option("-h", "--help")
2021-03-14 14:11:37 +13:00
@click.pass_context
def cli_archive(context: click.Context, **_):
2022-12-17 08:56:44 +13:00
"""Used to archive post data from Reddit."""
2021-03-14 14:11:37 +13:00
config = Configuration()
config.process_click_arguments(context)
silence_module_loggers()
stream = make_console_logging_handler(config.verbose)
try:
reddit_archiver = Archiver(config, [stream])
reddit_archiver.download()
except Exception:
2022-12-03 18:11:17 +13:00
logger.exception("Archiver exited unexpectedly")
raise
else:
2022-12-03 18:11:17 +13:00
logger.info("Program complete")
2022-12-03 18:11:17 +13:00
@cli.command("clone")
@_add_options(_common_options)
@_add_options(_archiver_options)
@_add_options(_downloader_options)
2022-12-17 08:56:44 +13:00
@click.help_option("-h", "--help")
@click.pass_context
def cli_clone(context: click.Context, **_):
2022-12-17 08:56:44 +13:00
"""Combines archive and download commands."""
config = Configuration()
config.process_click_arguments(context)
silence_module_loggers()
stream = make_console_logging_handler(config.verbose)
try:
reddit_scraper = RedditCloner(config, [stream])
reddit_scraper.download()
except Exception:
2022-12-03 18:11:17 +13:00
logger.exception("Scraper exited unexpectedly")
raise
else:
2022-12-03 18:11:17 +13:00
logger.info("Program complete")
2021-03-14 14:11:37 +13:00
2022-12-17 08:56:44 +13:00
@cli.command("completion")
@click.argument("shell", type=click.Choice(("all", "bash", "fish", "zsh"), case_sensitive=False), default="all")
@click.help_option("-h", "--help")
@click.option("-u", "--uninstall", is_flag=True, default=False, help="Uninstall completion")
def cli_completion(shell: str, uninstall: bool):
"""\b
Installs shell completions for BDFR.
Options: all, bash, fish, zsh
Default: all"""
shell = shell.lower()
if sys.platform == "win32":
print("Completions are not currently supported on Windows.")
return
if uninstall and click.confirm(f"Would you like to uninstall {shell} completions for BDFR"):
Completion(shell).uninstall()
return
if shell not in ("all", "bash", "fish", "zsh"):
print(f"{shell} is not a valid option.")
print("Options: all, bash, fish, zsh")
return
if click.confirm(f"Would you like to install {shell} completions for BDFR"):
Completion(shell).install()
def make_console_logging_handler(verbosity: int) -> logging.StreamHandler:
class StreamExceptionFilter(logging.Filter):
def filter(self, record: logging.LogRecord) -> bool:
result = not (record.levelno == logging.ERROR and record.exc_info)
return result
2021-02-11 12:10:40 +13:00
logger.setLevel(1)
stream = logging.StreamHandler(sys.stdout)
stream.addFilter(StreamExceptionFilter())
2022-12-03 18:11:17 +13:00
formatter = logging.Formatter("[%(asctime)s - %(name)s - %(levelname)s] - %(message)s")
2021-02-11 12:10:40 +13:00
stream.setFormatter(formatter)
2021-03-11 16:20:39 +13:00
if verbosity <= 0:
2021-02-11 12:10:40 +13:00
stream.setLevel(logging.INFO)
2021-03-11 16:20:39 +13:00
elif verbosity == 1:
2021-02-11 12:10:40 +13:00
stream.setLevel(logging.DEBUG)
2021-03-11 16:20:39 +13:00
else:
stream.setLevel(9)
return stream
def silence_module_loggers():
2022-12-03 18:11:17 +13:00
logging.getLogger("praw").setLevel(logging.CRITICAL)
logging.getLogger("prawcore").setLevel(logging.CRITICAL)
logging.getLogger("urllib3").setLevel(logging.CRITICAL)
2022-12-03 18:11:17 +13:00
if __name__ == "__main__":
2021-03-11 14:20:59 +13:00
cli()