1
0
Fork 0
mirror of synced 2024-05-29 16:40:06 +12:00
bulk-downloader-for-reddit/bulkredditdownloader/tests/test_downloader.py

423 lines
17 KiB
Python
Raw Normal View History

2021-03-03 15:53:53 +13:00
#!/usr/bin/env python3
# coding=utf-8
import argparse
import re
2021-03-03 15:53:53 +13:00
from pathlib import Path
from typing import Iterator
2021-03-03 15:53:53 +13:00
from unittest.mock import MagicMock
import praw
import praw.models
import pytest
2021-03-15 15:37:37 +13:00
from bulkredditdownloader.__main__ import setup_logging
2021-03-11 00:47:57 +13:00
from bulkredditdownloader.configuration import Configuration
2021-03-03 15:53:53 +13:00
from bulkredditdownloader.download_filter import DownloadFilter
from bulkredditdownloader.downloader import RedditDownloader, RedditTypes
2021-03-05 16:32:24 +13:00
from bulkredditdownloader.exceptions import BulkDownloaderException, RedditAuthenticationError, RedditUserError
2021-03-03 15:53:53 +13:00
from bulkredditdownloader.file_name_formatter import FileNameFormatter
from bulkredditdownloader.site_authenticator import SiteAuthenticator
@pytest.fixture()
2021-03-11 00:47:57 +13:00
def args() -> Configuration:
args = Configuration()
2021-03-03 15:53:53 +13:00
return args
@pytest.fixture()
def downloader_mock(args: Configuration):
downloader_mock = MagicMock()
downloader_mock.args = args
downloader_mock._sanitise_subreddit_name = RedditDownloader._sanitise_subreddit_name
downloader_mock._split_args_input = RedditDownloader._split_args_input
downloader_mock.master_hash_list = {}
return downloader_mock
2021-03-03 15:53:53 +13:00
def assert_all_results_are_submissions(result_limit: int, results: list[Iterator]):
results = [sub for res in results for sub in res]
assert all([isinstance(res, praw.models.Submission) for res in results])
if result_limit is not None:
assert len(results) == result_limit
return results
2021-03-03 15:53:53 +13:00
def test_determine_directories(tmp_path: Path, downloader_mock: MagicMock):
downloader_mock.args.directory = tmp_path / 'test'
2021-03-27 00:42:47 +13:00
downloader_mock.config_directories.user_config_dir = tmp_path
2021-03-03 15:53:53 +13:00
RedditDownloader._determine_directories(downloader_mock)
assert Path(tmp_path / 'test').exists()
@pytest.mark.parametrize(('skip_extensions', 'skip_domains'), (
([], []),
(['.test'], ['test.com']),
))
def test_create_download_filter(skip_extensions: list[str], skip_domains: list[str], downloader_mock: MagicMock):
downloader_mock.args.skip = skip_extensions
downloader_mock.args.skip_domain = skip_domains
result = RedditDownloader._create_download_filter(downloader_mock)
assert isinstance(result, DownloadFilter)
assert result.excluded_domains == skip_domains
assert result.excluded_extensions == skip_extensions
@pytest.mark.parametrize(('test_time', 'expected'), (
('all', 'all'),
('hour', 'hour'),
('day', 'day'),
('week', 'week'),
('random', 'all'),
('', 'all'),
))
def test_create_time_filter(test_time: str, expected: str, downloader_mock: MagicMock):
downloader_mock.args.time = test_time
result = RedditDownloader._create_time_filter(downloader_mock)
assert isinstance(result, RedditTypes.TimeType)
assert result.name.lower() == expected
@pytest.mark.parametrize(('test_sort', 'expected'), (
('', 'hot'),
('hot', 'hot'),
('controversial', 'controversial'),
('new', 'new'),
))
def test_create_sort_filter(test_sort: str, expected: str, downloader_mock: MagicMock):
downloader_mock.args.sort = test_sort
result = RedditDownloader._create_sort_filter(downloader_mock)
assert isinstance(result, RedditTypes.SortType)
assert result.name.lower() == expected
@pytest.mark.parametrize(('test_file_scheme', 'test_folder_scheme'), (
('{POSTID}', '{SUBREDDIT}'),
('{REDDITOR}_{TITLE}_{POSTID}', '{SUBREDDIT}'),
('{POSTID}', 'test'),
('{POSTID}', ''),
2021-03-03 15:53:53 +13:00
))
def test_create_file_name_formatter(test_file_scheme: str, test_folder_scheme: str, downloader_mock: MagicMock):
2021-03-20 17:05:07 +13:00
downloader_mock.args.file_scheme = test_file_scheme
downloader_mock.args.folder_scheme = test_folder_scheme
2021-03-03 15:53:53 +13:00
result = RedditDownloader._create_file_name_formatter(downloader_mock)
assert isinstance(result, FileNameFormatter)
assert result.file_format_string == test_file_scheme
assert result.directory_format_string == test_folder_scheme
@pytest.mark.parametrize(('test_file_scheme', 'test_folder_scheme'), (
('', ''),
('', '{SUBREDDIT}'),
('test', '{SUBREDDIT}'),
))
def test_create_file_name_formatter_bad(test_file_scheme: str, test_folder_scheme: str, downloader_mock: MagicMock):
2021-03-20 17:05:07 +13:00
downloader_mock.args.file_scheme = test_file_scheme
downloader_mock.args.folder_scheme = test_folder_scheme
2021-03-03 15:53:53 +13:00
with pytest.raises(BulkDownloaderException):
RedditDownloader._create_file_name_formatter(downloader_mock)
def test_create_authenticator(downloader_mock: MagicMock):
result = RedditDownloader._create_authenticator(downloader_mock)
assert isinstance(result, SiteAuthenticator)
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.parametrize('test_submission_ids', (
('lvpf4l',),
('lvpf4l', 'lvqnsn'),
('lvpf4l', 'lvqnsn', 'lvl9kd'),
))
def test_get_submissions_from_link(
test_submission_ids: list[str],
reddit_instance: praw.Reddit,
downloader_mock: MagicMock):
downloader_mock.args.link = test_submission_ids
downloader_mock.reddit_instance = reddit_instance
results = RedditDownloader._get_submissions_from_link(downloader_mock)
assert all([isinstance(sub, praw.models.Submission) for res in results for sub in res])
assert len(results[0]) == len(test_submission_ids)
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.parametrize(('test_subreddits', 'limit'), (
(('Futurology',), 10),
2021-03-15 16:22:41 +13:00
(('Futurology', 'Mindustry, Python'), 10),
2021-03-03 15:53:53 +13:00
(('Futurology',), 20),
(('Futurology', 'Python'), 10),
(('Futurology',), 100),
(('Futurology',), 0),
))
def test_get_subreddit_normal(
test_subreddits: list[str],
limit: int,
downloader_mock: MagicMock,
reddit_instance: praw.Reddit):
downloader_mock._determine_sort_function.return_value = praw.models.Subreddit.hot
downloader_mock.args.limit = limit
downloader_mock.args.subreddit = test_subreddits
downloader_mock.reddit_instance = reddit_instance
2021-03-03 15:53:53 +13:00
downloader_mock.sort_filter = RedditTypes.SortType.HOT
results = RedditDownloader._get_subreddits(downloader_mock)
2021-03-15 16:22:41 +13:00
test_subreddits = downloader_mock._split_args_input(test_subreddits)
results = assert_all_results_are_submissions(
(limit * len(test_subreddits)) if limit else None, results)
assert all([res.subreddit.display_name in test_subreddits for res in results])
2021-03-03 15:53:53 +13:00
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.parametrize(('test_subreddits', 'search_term', 'limit'), (
(('Python',), 'scraper', 10),
(('Python',), '', 10),
(('Python',), 'djsdsgewef', 0),
2021-03-03 15:53:53 +13:00
))
def test_get_subreddit_search(
test_subreddits: list[str],
search_term: str,
limit: int,
downloader_mock: MagicMock,
reddit_instance: praw.Reddit):
downloader_mock._determine_sort_function.return_value = praw.models.Subreddit.hot
downloader_mock.args.limit = limit
downloader_mock.args.search = search_term
2021-03-03 15:53:53 +13:00
downloader_mock.args.subreddit = test_subreddits
downloader_mock.reddit_instance = reddit_instance
downloader_mock.sort_filter = RedditTypes.SortType.HOT
results = RedditDownloader._get_subreddits(downloader_mock)
results = assert_all_results_are_submissions(
(limit * len(test_subreddits)) if limit else None, results)
assert all([res.subreddit.display_name in test_subreddits for res in results])
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.parametrize(('test_user', 'test_multireddits', 'limit'), (
('helen_darten', ('cuteanimalpics',), 10),
('korfor', ('chess',), 100),
))
# Good sources at https://www.reddit.com/r/multihub/
def test_get_multireddits_public(
test_user: str,
test_multireddits: list[str],
limit: int,
reddit_instance: praw.Reddit,
downloader_mock: MagicMock):
downloader_mock._determine_sort_function.return_value = praw.models.Subreddit.hot
downloader_mock.sort_filter = RedditTypes.SortType.HOT
downloader_mock.args.limit = limit
downloader_mock.args.multireddit = test_multireddits
downloader_mock.args.user = test_user
downloader_mock.reddit_instance = reddit_instance
results = RedditDownloader._get_multireddits(downloader_mock)
assert_all_results_are_submissions((limit * len(test_multireddits)) if limit else None, results)
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.parametrize(('test_user', 'limit'), (
('danigirl3694', 10),
('danigirl3694', 50),
('CapitanHam', None),
))
def test_get_user_submissions(test_user: str, limit: int, downloader_mock: MagicMock, reddit_instance: praw.Reddit):
2021-03-03 15:53:53 +13:00
downloader_mock.args.limit = limit
downloader_mock._determine_sort_function.return_value = praw.models.Subreddit.hot
downloader_mock.sort_filter = RedditTypes.SortType.HOT
downloader_mock.args.submitted = True
downloader_mock.args.user = test_user
downloader_mock.authenticated = False
downloader_mock.reddit_instance = reddit_instance
results = RedditDownloader._get_user_data(downloader_mock)
results = assert_all_results_are_submissions(limit, results)
assert all([res.author.name == test_user for res in results])
2021-03-03 15:53:53 +13:00
@pytest.mark.online
@pytest.mark.reddit
2021-03-10 14:06:50 +13:00
@pytest.mark.authenticated
def test_get_user_upvoted(downloader_mock: MagicMock, authenticated_reddit_instance: praw.Reddit):
downloader_mock.reddit_instance = authenticated_reddit_instance
downloader_mock.args.user = 'me'
downloader_mock.args.upvoted = True
downloader_mock.args.limit = 10
downloader_mock._determine_sort_function.return_value = praw.models.Subreddit.hot
downloader_mock.sort_filter = RedditTypes.SortType.HOT
RedditDownloader._resolve_user_name(downloader_mock)
results = RedditDownloader._get_user_data(downloader_mock)
assert_all_results_are_submissions(10, results)
2021-03-03 15:53:53 +13:00
@pytest.mark.online
@pytest.mark.reddit
2021-03-10 14:06:50 +13:00
@pytest.mark.authenticated
def test_get_user_saved(downloader_mock: MagicMock, authenticated_reddit_instance: praw.Reddit):
downloader_mock.reddit_instance = authenticated_reddit_instance
downloader_mock.args.user = 'me'
downloader_mock.args.saved = True
downloader_mock.args.limit = 10
downloader_mock._determine_sort_function.return_value = praw.models.Subreddit.hot
downloader_mock.sort_filter = RedditTypes.SortType.HOT
RedditDownloader._resolve_user_name(downloader_mock)
results = RedditDownloader._get_user_data(downloader_mock)
assert_all_results_are_submissions(10, results)
2021-03-03 15:53:53 +13:00
@pytest.mark.online
@pytest.mark.reddit
2021-03-18 01:28:31 +13:00
@pytest.mark.parametrize(('test_submission_id', 'expected_files_len'), (
('ljyy27', 4),
))
def test_download_submission(
test_submission_id: str,
expected_files_len: int,
downloader_mock: MagicMock,
reddit_instance: praw.Reddit,
tmp_path: Path):
downloader_mock.reddit_instance = reddit_instance
downloader_mock.download_filter.check_url.return_value = True
2021-03-20 17:05:07 +13:00
downloader_mock.args.folder_scheme = ''
downloader_mock.file_name_formatter = RedditDownloader._create_file_name_formatter(downloader_mock)
downloader_mock.download_directory = tmp_path
2021-03-18 01:28:31 +13:00
submission = downloader_mock.reddit_instance.submission(id=test_submission_id)
RedditDownloader._download_submission(downloader_mock, submission)
folder_contents = list(tmp_path.iterdir())
2021-03-18 01:28:31 +13:00
assert len(folder_contents) == expected_files_len
2021-03-03 15:53:53 +13:00
@pytest.mark.online
@pytest.mark.reddit
def test_download_submission_file_exists(
downloader_mock: MagicMock,
reddit_instance: praw.Reddit,
tmp_path: Path,
capsys: pytest.CaptureFixture):
2021-03-15 15:37:37 +13:00
setup_logging(3)
downloader_mock.reddit_instance = reddit_instance
downloader_mock.download_filter.check_url.return_value = True
2021-03-20 17:05:07 +13:00
downloader_mock.args.folder_scheme = ''
downloader_mock.file_name_formatter = RedditDownloader._create_file_name_formatter(downloader_mock)
downloader_mock.download_directory = tmp_path
submission = downloader_mock.reddit_instance.submission(id='m1hqw6')
Path(tmp_path, 'Arneeman_Metagaming isn\'t always a bad thing_m1hqw6.png').touch()
RedditDownloader._download_submission(downloader_mock, submission)
folder_contents = list(tmp_path.iterdir())
output = capsys.readouterr()
assert len(folder_contents) == 1
2021-03-30 12:05:09 +13:00
assert 'Arneeman_Metagaming isn\'t always a bad thing_m1hqw6.png already exists' in output.out
2021-03-03 15:53:53 +13:00
@pytest.mark.online
@pytest.mark.reddit
def test_download_submission_hash_exists(
downloader_mock: MagicMock,
reddit_instance: praw.Reddit,
tmp_path: Path,
capsys: pytest.CaptureFixture):
2021-03-15 15:37:37 +13:00
setup_logging(3)
downloader_mock.reddit_instance = reddit_instance
downloader_mock.download_filter.check_url.return_value = True
2021-03-20 17:05:07 +13:00
downloader_mock.args.folder_scheme = ''
downloader_mock.args.no_dupes = True
downloader_mock.file_name_formatter = RedditDownloader._create_file_name_formatter(downloader_mock)
downloader_mock.download_directory = tmp_path
downloader_mock.master_hash_list = {'a912af8905ae468e0121e9940f797ad7': None}
submission = downloader_mock.reddit_instance.submission(id='m1hqw6')
RedditDownloader._download_submission(downloader_mock, submission)
folder_contents = list(tmp_path.iterdir())
output = capsys.readouterr()
assert len(folder_contents) == 0
2021-03-30 12:05:09 +13:00
assert re.search(r'Resource hash .*? downloaded elsewhere', output.out)
@pytest.mark.parametrize(('test_name', 'expected'), (
('Mindustry', 'Mindustry'),
('Futurology', 'Futurology'),
('r/Mindustry', 'Mindustry'),
('TrollXChromosomes', 'TrollXChromosomes'),
('r/TrollXChromosomes', 'TrollXChromosomes'),
('https://www.reddit.com/r/TrollXChromosomes/', 'TrollXChromosomes'),
('https://www.reddit.com/r/TrollXChromosomes', 'TrollXChromosomes'),
('https://www.reddit.com/r/Futurology/', 'Futurology'),
('https://www.reddit.com/r/Futurology', 'Futurology'),
))
def test_sanitise_subreddit_name(test_name: str, expected: str):
result = RedditDownloader._sanitise_subreddit_name(test_name)
assert result == expected
def test_search_existing_files():
results = RedditDownloader.scan_existing_files(Path('.'))
assert len(results.keys()) >= 40
2021-03-15 16:22:41 +13:00
@pytest.mark.parametrize(('test_subreddit_entries', 'expected'), (
(['test1', 'test2', 'test3'], {'test1', 'test2', 'test3'}),
(['test1,test2', 'test3'], {'test1', 'test2', 'test3'}),
(['test1, test2', 'test3'], {'test1', 'test2', 'test3'}),
(['test1; test2', 'test3'], {'test1', 'test2', 'test3'}),
(['test1, test2', 'test1,test2,test3', 'test4'], {'test1', 'test2', 'test3', 'test4'})
))
def test_split_subreddit_entries(test_subreddit_entries: list[str], expected: set[str]):
results = RedditDownloader._split_args_input(test_subreddit_entries)
assert results == expected
@pytest.mark.online
@pytest.mark.reddit
def test_mark_hard_link(downloader_mock: MagicMock, tmp_path: Path, reddit_instance: praw.Reddit):
downloader_mock.reddit_instance = reddit_instance
downloader_mock.args.make_hard_links = True
downloader_mock.download_directory = tmp_path
2021-03-20 17:05:07 +13:00
downloader_mock.args.folder_scheme = ''
downloader_mock.args.file_scheme = '{POSTID}'
downloader_mock.file_name_formatter = RedditDownloader._create_file_name_formatter(downloader_mock)
submission = downloader_mock.reddit_instance.submission(id='m1hqw6')
original = Path(tmp_path, 'm1hqw6.png')
RedditDownloader._download_submission(downloader_mock, submission)
assert original.exists()
2021-03-20 17:05:07 +13:00
downloader_mock.args.file_scheme = 'test2_{POSTID}'
downloader_mock.file_name_formatter = RedditDownloader._create_file_name_formatter(downloader_mock)
RedditDownloader._download_submission(downloader_mock, submission)
test_file_1_stats = original.stat()
test_file_2_inode = Path(tmp_path, 'test2_m1hqw6.png').stat().st_ino
assert test_file_1_stats.st_nlink == 2
assert test_file_1_stats.st_ino == test_file_2_inode
@pytest.mark.parametrize(('test_ids', 'test_excluded', 'expected_len'), (
(('aaaaaa',), (), 1),
(('aaaaaa',), ('aaaaaa',), 0),
((), ('aaaaaa',), 0),
(('aaaaaa', 'bbbbbb'), ('aaaaaa',), 1),
))
def test_excluded_ids(test_ids: tuple[str], test_excluded: tuple[str], expected_len: int, downloader_mock: MagicMock):
downloader_mock.excluded_submission_ids = test_excluded
test_submissions = []
for test_id in test_ids:
m = MagicMock()
m.id = test_id
test_submissions.append(m)
downloader_mock.reddit_lists = [test_submissions]
RedditDownloader.download(downloader_mock)
assert downloader_mock._download_submission.call_count == expected_len
def test_read_excluded_submission_ids_from_file(downloader_mock: MagicMock, tmp_path: Path):
test_file = tmp_path / 'test.txt'
test_file.write_text('aaaaaa\nbbbbbb')
downloader_mock.args.exclude_id_file = [test_file]
results = RedditDownloader._read_excluded_ids(downloader_mock)
assert results == {'aaaaaa', 'bbbbbb'}