1
0
Fork 0
mirror of synced 2024-05-29 16:40:06 +12:00

Add integration tests for archiver

This commit is contained in:
Serene-Arc 2021-03-14 11:11:37 +10:00 committed by Ali Parlakci
parent c2d3cfd50f
commit b08c31a1db
3 changed files with 83 additions and 21 deletions

View file

@ -5,12 +5,37 @@ import sys
import click
from bulkredditdownloader.archiver import Archiver
from bulkredditdownloader.configuration import Configuration
from bulkredditdownloader.downloader import RedditDownloader
from bulkredditdownloader.exceptions import BulkDownloaderException
logger = logging.getLogger()
_common_options = [
click.argument('directory', type=str),
click.option('--config', type=str, default=None),
click.option('-v', '--verbose', default=None, count=True),
click.option('-l', '--link', multiple=True, default=None, type=str),
click.option('-s', '--subreddit', multiple=True, default=None, type=str),
click.option('-m', '--multireddit', multiple=True, default=None, type=str),
click.option('-L', '--limit', default=None, type=int),
click.option('--authenticate', is_flag=True, default=None),
click.option('--submitted', is_flag=True, default=None),
click.option('--upvoted', is_flag=True, default=None),
click.option('--saved', is_flag=True, default=None),
click.option('--search', default=None, type=str),
click.option('-u', '--user', type=str, default=None),
click.option('-t', '--time', type=click.Choice(('all', 'hour', 'day', 'week', 'month', 'year')), default=None),
click.option('-S', '--sort', type=click.Choice(('hot', 'top', 'new',
'controversial', 'rising', 'relevance')), default=None),
]
def _add_common_options(func):
for opt in _common_options:
func = opt(func)
return func
@click.group()
def cli():
@ -18,28 +43,13 @@ def cli():
@cli.command('download')
@click.argument('directory', type=str)
@click.option('-v', '--verbose', default=None, count=True)
@click.option('-l', '--link', multiple=True, default=None, type=str)
@click.option('-s', '--subreddit', multiple=True, default=None, type=str)
@click.option('-m', '--multireddit', multiple=True, default=None, type=str)
@click.option('-L', '--limit', default=None, type=int)
@click.option('--authenticate', is_flag=True, default=None)
@click.option('--submitted', is_flag=True, default=None)
@click.option('--upvoted', is_flag=True, default=None)
@click.option('--saved', is_flag=True, default=None)
@click.option('--search', default=None, type=str)
@click.option('-u', '--user', type=str, default=None)
@click.option('-t', '--time', type=click.Choice(('all', 'hour', 'day', 'week', 'month', 'year')), default=None)
@click.option('-S', '--sort', type=click.Choice(('hot', 'top', 'new',
'controversial', 'rising', 'relevance')), default=None)
@click.option('--skip', default=None, multiple=True)
@click.option('--skip-domain', default=None, multiple=True)
@click.option('--no-dupes', is_flag=True, default=None)
@click.option('--search-existing', is_flag=True, default=None)
@click.option('--set-file-scheme', default=None, type=str)
@click.option('--set-folder-scheme', default=None, type=str)
@click.option('--no-dupes', is_flag=True, default=None)
@click.option('--config', type=str, default=None)
@click.option('--search-existing', is_flag=True, default=None)
@click.option('--skip', default=None, multiple=True)
@click.option('--skip-domain', default=None, multiple=True)
@_add_common_options
@click.pass_context
def cli_download(context: click.Context, **_):
config = Configuration()
@ -50,6 +60,19 @@ def cli_download(context: click.Context, **_):
logger.info('Program complete')
@cli.command('archive')
@_add_common_options
@click.option('-f,', '--format', type=click.Choice(('xml', 'json', 'yaml')), default=None)
@click.pass_context
def cli_archive(context: click.Context, **_):
config = Configuration()
config.process_click_arguments(context)
_setup_logging(config.verbose)
reddit_archiver = Archiver(config)
reddit_archiver.download()
logger.info('Program complete')
def _setup_logging(verbosity: int):
logger.setLevel(1)
stream = logging.StreamHandler(sys.stdout)

View file

@ -42,6 +42,7 @@ class Archiver(RedditDownloader):
def _write_submission_json(self, entry: ArchiveEntry):
resource = Resource(entry.submission, '', '.json')
file_path = self.file_name_formatter.format_path(resource, self.download_directory)
file_path.parent.mkdir(exist_ok=True, parents=True)
with open(file_path, 'w') as file:
logger.debug(f'Writing submission {entry.submission.id} to file in JSON format at {file_path}')
json.dump(entry.compile(), file)
@ -49,6 +50,7 @@ class Archiver(RedditDownloader):
def _write_submission_xml(self, entry: ArchiveEntry):
resource = Resource(entry.submission, '', '.xml')
file_path = self.file_name_formatter.format_path(resource, self.download_directory)
file_path.parent.mkdir(exist_ok=True, parents=True)
with open(file_path, 'w') as file:
logger.debug(f'Writing submission {entry.submission.id} to file in XML format at {file_path}')
xml_entry = dict2xml.dict2xml(entry.compile(), wrap='root')
@ -57,6 +59,7 @@ class Archiver(RedditDownloader):
def _write_submission_yaml(self, entry: ArchiveEntry):
resource = Resource(entry.submission, '', '.yaml')
file_path = self.file_name_formatter.format_path(resource, self.download_directory)
file_path.parent.mkdir(exist_ok=True, parents=True)
with open(file_path, 'w') as file:
logger.debug(f'Writing submission {entry.submission.id} to file in YAML format at {file_path}')
yaml.dump(entry.compile(), file)

View file

@ -1,6 +1,7 @@
#!/usr/bin/env python3
# coding=utf-8
import re
from pathlib import Path
import pytest
@ -163,3 +164,38 @@ def test_cli_download_long(test_args: list[str], tmp_path: Path):
test_args = ['download', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
result = runner.invoke(cli, test_args)
assert result.exit_code == 0
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')
@pytest.mark.parametrize('test_args', (
['--subreddit', 'Mindustry', '-L', 25],
['--subreddit', 'Mindustry', '-L', 25, '--format', 'xml'],
['--subreddit', 'Mindustry', '-L', 25, '--format', 'yaml'],
['--subreddit', 'Mindustry', '-L', 25, '--sort', 'new'],
['--subreddit', 'Mindustry', '-L', 25, '--time', 'day'],
['--subreddit', 'Mindustry', '-L', 25, '--time', 'day', '--sort', 'new'],
))
def test_cli_archive_subreddit(test_args: list[str], tmp_path: Path):
runner = CliRunner()
test_args = ['archive', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
result = runner.invoke(cli, test_args)
assert result.exit_code == 0
assert re.search(r'Writing submission .*? to file in .*? format', result.output)
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.slow
@pytest.mark.skipif(Path('test_config.cfg') is False, reason='A test config file is required for integration tests')
@pytest.mark.parametrize('test_args', (
['--subreddit', 'all', '-L', 100],
['--subreddit', 'all', '-L', 100, '--sort', 'new'],
))
def test_cli_archive_long(test_args: list[str], tmp_path: Path):
runner = CliRunner()
test_args = ['archive', str(tmp_path), '-v', '--config', 'test_config.cfg'] + test_args
result = runner.invoke(cli, test_args)
assert result.exit_code == 0
assert re.search(r'Writing submission .*? to file in .*? format', result.output)