1
0
Fork 0
mirror of synced 2024-05-17 10:42:39 +12:00
bulk-downloader-for-reddit/bdfr/__main__.py

161 lines
5.5 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
import logging
import sys
2021-03-11 14:20:59 +13:00
import click
2021-04-12 19:58:32 +12:00
from bdfr.archiver import Archiver
2021-07-05 18:58:33 +12:00
from bdfr.cloner import RedditCloner
2021-04-12 19:58:32 +12:00
from bdfr.configuration import Configuration
from bdfr.downloader import RedditDownloader
2021-02-11 12:10:40 +13:00
logger = logging.getLogger()
2021-03-14 14:11:37 +13:00
_common_options = [
click.argument('directory', type=str),
click.option('--authenticate', is_flag=True, default=None),
click.option('--config', type=str, default=None),
2022-03-28 07:29:05 +13:00
click.option('--opts', type=str, default=None),
click.option('--disable-module', multiple=True, default=None, type=str),
2022-07-06 18:52:01 +12:00
click.option('--exclude-id', default=None, multiple=True),
click.option('--exclude-id-file', default=None, multiple=True),
2022-02-18 15:30:38 +13:00
click.option('--file-scheme', default=None, type=str),
click.option('--folder-scheme', default=None, type=str),
2021-10-31 15:19:46 +13:00
click.option('--ignore-user', type=str, multiple=True, default=None),
2021-07-05 18:58:33 +12:00
click.option('--include-id-file', multiple=True, default=None),
2021-04-27 14:29:37 +12:00
click.option('--log', type=str, default=None),
2021-03-14 14:11:37 +13:00
click.option('--saved', is_flag=True, default=None),
click.option('--search', default=None, type=str),
click.option('--submitted', is_flag=True, default=None),
2022-02-18 13:21:52 +13:00
click.option('--subscribed', is_flag=True, default=None),
2021-05-02 15:56:39 +12:00
click.option('--time-format', type=str, default=None),
click.option('--upvoted', is_flag=True, default=None),
click.option('-L', '--limit', default=None, type=int),
click.option('-l', '--link', multiple=True, default=None, type=str),
click.option('-m', '--multireddit', multiple=True, default=None, type=str),
2021-07-05 18:58:33 +12:00
click.option('-S', '--sort', type=click.Choice(('hot', 'top', 'new', 'controversial', 'rising', 'relevance')),
default=None),
click.option('-s', '--subreddit', multiple=True, default=None, type=str),
2021-03-14 14:11:37 +13:00
click.option('-t', '--time', type=click.Choice(('all', 'hour', 'day', 'week', 'month', 'year')), default=None),
2021-07-05 18:58:33 +12:00
click.option('-u', '--user', type=str, multiple=True, default=None),
click.option('-v', '--verbose', default=None, count=True),
2021-03-14 14:11:37 +13:00
]
_downloader_options = [
click.option('--make-hard-links', is_flag=True, default=None),
click.option('--max-wait-time', type=int, default=None),
click.option('--no-dupes', is_flag=True, default=None),
click.option('--search-existing', is_flag=True, default=None),
click.option('--skip', default=None, multiple=True),
click.option('--skip-domain', default=None, multiple=True),
click.option('--skip-subreddit', default=None, multiple=True),
2022-05-05 15:52:12 +12:00
click.option('--min-score', type=int, default=None),
click.option('--max-score', type=int, default=None),
click.option('--min-score-ratio', type=float, default=None),
click.option('--max-score-ratio', type=float, default=None),
]
_archiver_options = [
click.option('--all-comments', is_flag=True, default=None),
2021-06-12 12:35:31 +12:00
click.option('--comment-context', is_flag=True, default=None),
click.option('-f', '--format', type=click.Choice(('xml', 'json', 'yaml')), default=None),
]
2021-03-14 14:11:37 +13:00
def _add_options(opts: list):
def wrap(func):
for opt in opts:
func = opt(func)
return func
return wrap
2021-03-14 14:11:37 +13:00
2021-02-11 12:10:40 +13:00
2021-03-11 14:20:59 +13:00
@click.group()
def cli():
pass
@cli.command('download')
@_add_options(_common_options)
@_add_options(_downloader_options)
2021-03-11 14:20:59 +13:00
@click.pass_context
def cli_download(context: click.Context, **_):
config = Configuration()
config.process_click_arguments(context)
2021-03-15 15:37:37 +13:00
setup_logging(config.verbose)
try:
reddit_downloader = RedditDownloader(config)
reddit_downloader.download()
except Exception:
logger.exception('Downloader exited unexpectedly')
raise
else:
logger.info('Program complete')
2021-02-11 12:10:40 +13:00
2021-03-14 14:11:37 +13:00
@cli.command('archive')
@_add_options(_common_options)
@_add_options(_archiver_options)
2021-03-14 14:11:37 +13:00
@click.pass_context
def cli_archive(context: click.Context, **_):
config = Configuration()
config.process_click_arguments(context)
2021-03-15 15:37:37 +13:00
setup_logging(config.verbose)
try:
reddit_archiver = Archiver(config)
reddit_archiver.download()
except Exception:
logger.exception('Archiver exited unexpectedly')
raise
else:
logger.info('Program complete')
@cli.command('clone')
@_add_options(_common_options)
@_add_options(_archiver_options)
@_add_options(_downloader_options)
@click.pass_context
def cli_clone(context: click.Context, **_):
config = Configuration()
config.process_click_arguments(context)
setup_logging(config.verbose)
try:
reddit_scraper = RedditCloner(config)
reddit_scraper.download()
except Exception:
logger.exception('Scraper exited unexpectedly')
raise
else:
logger.info('Program complete')
2021-03-14 14:11:37 +13:00
2021-03-15 15:37:37 +13:00
def setup_logging(verbosity: int):
class StreamExceptionFilter(logging.Filter):
def filter(self, record: logging.LogRecord) -> bool:
result = not (record.levelno == logging.ERROR and record.exc_info)
return result
2021-02-11 12:10:40 +13:00
logger.setLevel(1)
stream = logging.StreamHandler(sys.stdout)
stream.addFilter(StreamExceptionFilter())
2021-02-11 12:10:40 +13:00
formatter = logging.Formatter('[%(asctime)s - %(name)s - %(levelname)s] - %(message)s')
stream.setFormatter(formatter)
2021-02-11 12:10:40 +13:00
logger.addHandler(stream)
2021-03-11 16:20:39 +13:00
if verbosity <= 0:
2021-02-11 12:10:40 +13:00
stream.setLevel(logging.INFO)
2021-03-11 16:20:39 +13:00
elif verbosity == 1:
2021-02-11 12:10:40 +13:00
stream.setLevel(logging.DEBUG)
2021-03-11 16:20:39 +13:00
else:
stream.setLevel(9)
2021-02-11 12:10:40 +13:00
logging.getLogger('praw').setLevel(logging.CRITICAL)
logging.getLogger('prawcore').setLevel(logging.CRITICAL)
logging.getLogger('urllib3').setLevel(logging.CRITICAL)
2021-02-11 12:10:40 +13:00
if __name__ == '__main__':
2021-03-11 14:20:59 +13:00
cli()