1
0
Fork 0
mirror of synced 2024-06-29 03:21:19 +12:00
bulk-downloader-for-reddit/tests/test_file_name_formatter.py

383 lines
14 KiB
Python
Raw Normal View History

2021-02-11 12:08:47 +13:00
#!/usr/bin/env python3
# coding=utf-8
2021-05-18 14:39:08 +12:00
import platform
import unittest.mock
2021-04-22 12:38:32 +12:00
from datetime import datetime
2021-02-11 12:08:47 +13:00
from pathlib import Path
from typing import Optional
from unittest.mock import MagicMock
2021-02-11 12:08:47 +13:00
import praw.models
import pytest
2021-04-12 19:58:32 +12:00
from bdfr.file_name_formatter import FileNameFormatter
from bdfr.resource import Resource
2021-02-11 12:08:47 +13:00
@pytest.fixture()
def submission() -> MagicMock:
test = MagicMock()
2021-02-11 12:08:47 +13:00
test.title = 'name'
test.subreddit.display_name = 'randomreddit'
test.author.name = 'person'
test.id = '12345'
test.score = 1000
test.link_flair_text = 'test_flair'
2021-04-22 12:38:32 +12:00
test.created_utc = datetime(2021, 4, 21, 9, 30, 0).timestamp()
test.__class__ = praw.models.Submission
2021-02-11 12:08:47 +13:00
return test
2021-05-18 14:39:08 +12:00
def do_test_string_equality(result: [Path, str], expected: str) -> bool:
2021-05-04 17:45:16 +12:00
if platform.system() == 'Windows':
expected = FileNameFormatter._format_for_windows(expected)
2021-05-18 14:39:08 +12:00
return str(result).endswith(expected)
2021-05-04 17:45:16 +12:00
def do_test_path_equality(result: Path, expected: str) -> bool:
if platform.system() == 'Windows':
expected = expected.split('/')
expected = [FileNameFormatter._format_for_windows(part) for part in expected]
expected = Path(*expected)
else:
expected = Path(expected)
2021-05-18 14:39:08 +12:00
return str(result).endswith(str(expected))
2021-05-04 17:45:16 +12:00
@pytest.fixture(scope='session')
def reddit_submission(reddit_instance: praw.Reddit) -> praw.models.Submission:
2021-02-15 22:16:51 +13:00
return reddit_instance.submission(id='lgilgt')
2021-02-11 12:08:47 +13:00
2021-05-02 15:56:39 +12:00
@pytest.mark.parametrize(('test_format_string', 'expected'), (
('{SUBREDDIT}', 'randomreddit'),
('{REDDITOR}', 'person'),
('{POSTID}', '12345'),
('{UPVOTES}', '1000'),
('{FLAIR}', 'test_flair'),
2021-04-22 12:38:32 +12:00
('{DATE}', '2021-04-21T09:30:00'),
('{REDDITOR}_{TITLE}_{POSTID}', 'person_name_12345'),
))
2021-05-02 15:56:39 +12:00
def test_format_name_mock(test_format_string: str, expected: str, submission: MagicMock):
test_formatter = FileNameFormatter(test_format_string, '', 'ISO')
result = test_formatter._format_name(submission, test_format_string)
2021-05-04 17:45:16 +12:00
assert do_test_string_equality(result, expected)
2021-02-11 12:08:47 +13:00
@pytest.mark.parametrize(('test_string', 'expected'), (
('', False),
('test', False),
('{POSTID}', True),
('POSTID', False),
('{POSTID}_test', True),
('test_{TITLE}', True),
('TITLE_POSTID', False),
))
def test_check_format_string_validity(test_string: str, expected: bool):
result = FileNameFormatter.validate_string(test_string)
assert result == expected
2021-02-26 22:09:25 +13:00
@pytest.mark.online
2021-02-26 22:19:12 +13:00
@pytest.mark.reddit
2021-05-02 15:56:39 +12:00
@pytest.mark.parametrize(('test_format_string', 'expected'), (
('{SUBREDDIT}', 'Mindustry'),
('{REDDITOR}', 'Gamer_player_boi'),
('{POSTID}', 'lgilgt'),
('{FLAIR}', 'Art'),
('{SUBREDDIT}_{TITLE}', 'Mindustry_Toxopid that is NOT humane >:('),
('{REDDITOR}_{TITLE}_{POSTID}', 'Gamer_player_boi_Toxopid that is NOT humane >:(_lgilgt')
))
2021-05-02 15:56:39 +12:00
def test_format_name_real(test_format_string: str, expected: str, reddit_submission: praw.models.Submission):
test_formatter = FileNameFormatter(test_format_string, '', '')
result = test_formatter._format_name(reddit_submission, test_format_string)
2021-05-04 17:45:16 +12:00
assert do_test_string_equality(result, expected)
2021-02-11 12:08:47 +13:00
2021-02-26 22:09:25 +13:00
@pytest.mark.online
2021-02-26 22:19:12 +13:00
@pytest.mark.reddit
@pytest.mark.parametrize(('format_string_directory', 'format_string_file', 'expected'), (
(
'{SUBREDDIT}',
'{POSTID}',
'test/Mindustry/lgilgt.png',
),
(
'{SUBREDDIT}',
'{TITLE}_{POSTID}',
'test/Mindustry/Toxopid that is NOT humane >:(_lgilgt.png',
),
(
'{SUBREDDIT}',
'{REDDITOR}_{TITLE}_{POSTID}',
'test/Mindustry/Gamer_player_boi_Toxopid that is NOT humane >:(_lgilgt.png',
),
))
2021-02-11 12:08:47 +13:00
def test_format_full(
format_string_directory: str,
format_string_file: str,
expected: str,
reddit_submission: praw.models.Submission):
2021-02-26 22:09:25 +13:00
test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png')
2021-05-02 15:56:39 +12:00
test_formatter = FileNameFormatter(format_string_file, format_string_directory, 'ISO')
2021-03-13 23:18:30 +13:00
result = test_formatter.format_path(test_resource, Path('test'))
2021-05-04 17:45:16 +12:00
assert do_test_path_equality(result, expected)
2021-03-14 14:10:26 +13:00
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.parametrize(('format_string_directory', 'format_string_file'), (
('{SUBREDDIT}', '{POSTID}'),
('{SUBREDDIT}', '{UPVOTES}'),
('{SUBREDDIT}', '{UPVOTES}{POSTID}'),
))
def test_format_full_conform(
format_string_directory: str,
format_string_file: str,
reddit_submission: praw.models.Submission):
test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png')
2021-05-02 15:56:39 +12:00
test_formatter = FileNameFormatter(format_string_file, format_string_directory, 'ISO')
2021-03-14 14:10:26 +13:00
test_formatter.format_path(test_resource, Path('test'))
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.parametrize(('format_string_directory', 'format_string_file', 'index', 'expected'), (
('{SUBREDDIT}', '{POSTID}', None, 'test/Mindustry/lgilgt.png'),
('{SUBREDDIT}', '{POSTID}', 1, 'test/Mindustry/lgilgt_1.png'),
('{SUBREDDIT}', '{POSTID}', 2, 'test/Mindustry/lgilgt_2.png'),
('{SUBREDDIT}', '{TITLE}_{POSTID}', 2, 'test/Mindustry/Toxopid that is NOT humane >:(_lgilgt_2.png'),
))
def test_format_full_with_index_suffix(
format_string_directory: str,
format_string_file: str,
index: Optional[int],
expected: str,
2021-04-12 22:41:40 +12:00
reddit_submission: praw.models.Submission,
):
test_resource = Resource(reddit_submission, 'i.reddit.com/blabla.png')
2021-05-02 15:56:39 +12:00
test_formatter = FileNameFormatter(format_string_file, format_string_directory, 'ISO')
2021-03-13 23:18:30 +13:00
result = test_formatter.format_path(test_resource, Path('test'), index)
2021-05-04 17:45:16 +12:00
assert do_test_path_equality(result, expected)
def test_format_multiple_resources():
mocks = []
for i in range(1, 5):
new_mock = MagicMock()
new_mock.url = 'https://example.com/test.png'
new_mock.extension = '.png'
new_mock.source_submission.title = 'test'
new_mock.source_submission.__class__ = praw.models.Submission
mocks.append(new_mock)
2021-05-02 15:56:39 +12:00
test_formatter = FileNameFormatter('{TITLE}', '', 'ISO')
results = test_formatter.format_resource_paths(mocks, Path('.'))
2021-05-27 17:58:40 +12:00
results = set([str(res[0].name) for res in results])
expected = {'test_1.png', 'test_2.png', 'test_3.png', 'test_4.png'}
2021-05-18 14:39:08 +12:00
assert results == expected
2021-03-13 14:13:36 +13:00
@pytest.mark.parametrize(('test_filename', 'test_ending'), (
('A' * 300, '.png'),
('A' * 300, '_1.png'),
('a' * 300, '_1000.jpeg'),
2021-03-13 15:39:54 +13:00
('😍💕✨' * 100, '_1.png'),
2021-03-13 14:13:36 +13:00
))
def test_limit_filename_length(test_filename: str, test_ending: str):
2021-05-18 14:39:08 +12:00
result = FileNameFormatter._limit_file_name_length(test_filename, test_ending, Path('.'))
assert len(result.name) <= 255
assert len(result.name.encode('utf-8')) <= 255
assert len(str(result)) <= FileNameFormatter.find_max_path_length()
assert isinstance(result, Path)
2021-03-13 14:13:36 +13:00
@pytest.mark.parametrize(('test_filename', 'test_ending', 'expected_end'), (
('test_aaaaaa', '_1.png', 'test_aaaaaa_1.png'),
('test_aataaa', '_1.png', 'test_aataaa_1.png'),
2021-03-30 21:22:11 +13:00
('test_abcdef', '_1.png', 'test_abcdef_1.png'),
('test_aaaaaa', '.png', 'test_aaaaaa.png'),
('test', '_1.png', 'test_1.png'),
('test_m1hqw6', '_1.png', 'test_m1hqw6_1.png'),
('A' * 300 + '_bbbccc', '.png', '_bbbccc.png'),
('A' * 300 + '_bbbccc', '_1000.jpeg', '_bbbccc_1000.jpeg'),
('😍💕✨' * 100 + '_aaa1aa', '_1.png', '_aaa1aa_1.png'),
))
def test_preserve_id_append_when_shortening(test_filename: str, test_ending: str, expected_end: str):
2021-05-18 14:39:08 +12:00
result = FileNameFormatter._limit_file_name_length(test_filename, test_ending, Path('.'))
assert len(result.name) <= 255
assert len(result.name.encode('utf-8')) <= 255
assert result.name.endswith(expected_end)
assert len(str(result)) <= FileNameFormatter.find_max_path_length()
def test_shorten_filenames(submission: MagicMock, tmp_path: Path):
submission.title = 'A' * 300
submission.author.name = 'test'
submission.subreddit.display_name = 'test'
submission.id = 'BBBBBB'
test_resource = Resource(submission, 'www.example.com/empty', '.jpeg')
2021-05-02 15:56:39 +12:00
test_formatter = FileNameFormatter('{REDDITOR}_{TITLE}_{POSTID}', '{SUBREDDIT}', 'ISO')
2021-03-13 23:18:30 +13:00
result = test_formatter.format_path(test_resource, tmp_path)
2021-03-13 14:13:36 +13:00
result.parent.mkdir(parents=True)
result.touch()
@pytest.mark.parametrize(('test_string', 'expected'), (
('test', 'test'),
('test😍', 'test'),
('test.png', 'test.png'),
('test*', 'test'),
('test**', 'test'),
('test?*', 'test'),
('test_???.png', 'test_.png'),
('test_???😍.png', 'test_.png'),
))
def test_format_file_name_for_windows(test_string: str, expected: str):
result = FileNameFormatter._format_for_windows(test_string)
assert result == expected
@pytest.mark.parametrize(('test_string', 'expected'), (
('test', 'test'),
('test😍', 'test'),
('😍', ''),
))
def test_strip_emojies(test_string: str, expected: str):
result = FileNameFormatter._strip_emojis(test_string)
assert result == expected
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.parametrize(('test_submission_id', 'expected'), (
('mfuteh', {
'title': 'Why Do Interviewers Ask Linked List Questions?',
'redditor': 'mjgardner',
}),
))
def test_generate_dict_for_submission(test_submission_id: str, expected: dict, reddit_instance: praw.Reddit):
test_submission = reddit_instance.submission(id=test_submission_id)
2021-05-02 15:56:39 +12:00
test_formatter = FileNameFormatter('{TITLE}', '', 'ISO')
result = test_formatter._generate_name_dict_from_submission(test_submission)
assert all([result.get(key) == expected[key] for key in expected.keys()])
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.parametrize(('test_comment_id', 'expected'), (
('gsq0yuw', {
'title': 'Why Do Interviewers Ask Linked List Questions?',
'redditor': 'Doctor-Dapper',
'postid': 'gsq0yuw',
'flair': '',
}),
))
def test_generate_dict_for_comment(test_comment_id: str, expected: dict, reddit_instance: praw.Reddit):
test_comment = reddit_instance.comment(id=test_comment_id)
2021-05-02 15:56:39 +12:00
test_formatter = FileNameFormatter('{TITLE}', '', 'ISO')
result = test_formatter._generate_name_dict_from_comment(test_comment)
assert all([result.get(key) == expected[key] for key in expected.keys()])
@pytest.mark.online
@pytest.mark.reddit
@pytest.mark.parametrize(('test_file_scheme', 'test_folder_scheme', 'test_comment_id', 'expected_name'), (
('{POSTID}', '', 'gsoubde', 'gsoubde.json'),
('{REDDITOR}_{POSTID}', '', 'gsoubde', 'DELETED_gsoubde.json'),
))
def test_format_archive_entry_comment(
test_file_scheme: str,
test_folder_scheme: str,
test_comment_id: str,
expected_name: str,
tmp_path: Path,
reddit_instance: praw.Reddit,
):
test_comment = reddit_instance.comment(id=test_comment_id)
2021-05-02 15:56:39 +12:00
test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme, 'ISO')
test_entry = Resource(test_comment, '', '.json')
result = test_formatter.format_path(test_entry, tmp_path)
2021-05-18 14:39:08 +12:00
assert do_test_string_equality(result, expected_name)
@pytest.mark.parametrize(('test_folder_scheme', 'expected'), (
('{REDDITOR}/{SUBREDDIT}', 'person/randomreddit'),
('{POSTID}/{SUBREDDIT}/{REDDITOR}', '12345/randomreddit/person'),
))
def test_multilevel_folder_scheme(
test_folder_scheme: str,
expected: str,
tmp_path: Path,
submission: MagicMock,
):
2021-05-02 15:56:39 +12:00
test_formatter = FileNameFormatter('{POSTID}', test_folder_scheme, 'ISO')
test_resource = MagicMock()
test_resource.source_submission = submission
test_resource.extension = '.png'
result = test_formatter.format_path(test_resource, tmp_path)
result = result.relative_to(tmp_path)
2021-05-04 17:45:16 +12:00
assert do_test_path_equality(result.parent, expected)
assert len(result.parents) == (len(expected.split('/')) + 1)
@pytest.mark.parametrize(('test_name_string', 'expected'), (
('test', 'test'),
('😍', '😍'),
('test😍', 'test😍'),
('test😍 ', 'test😍 '),
('test😍 \\u2019', 'test😍 '),
('Using that real good [1\\4]', 'Using that real good [1\\4]'),
))
def test_preserve_emojis(test_name_string: str, expected: str, submission: MagicMock):
submission.title = test_name_string
2021-05-02 15:56:39 +12:00
test_formatter = FileNameFormatter('{TITLE}', '', 'ISO')
result = test_formatter._format_name(submission, '{TITLE}')
2021-05-04 17:45:16 +12:00
assert do_test_string_equality(result, expected)
@pytest.mark.parametrize(('test_string', 'expected'), (
('test \\u2019', 'test '),
('My cat\\u2019s paws are so cute', 'My cats paws are so cute'),
))
def test_convert_unicode_escapes(test_string: str, expected: str):
result = FileNameFormatter._convert_unicode_escapes(test_string)
assert result == expected
2021-04-22 12:38:32 +12:00
@pytest.mark.parametrize(('test_datetime', 'expected'), (
(datetime(2020, 1, 1, 8, 0, 0), '2020-01-01T08:00:00'),
(datetime(2020, 1, 1, 8, 0), '2020-01-01T08:00:00'),
(datetime(2021, 4, 21, 8, 30, 21), '2021-04-21T08:30:21'),
))
def test_convert_timestamp(test_datetime: datetime, expected: str):
test_timestamp = test_datetime.timestamp()
2021-05-02 15:56:39 +12:00
test_formatter = FileNameFormatter('{POSTID}', '', 'ISO')
result = test_formatter._convert_timestamp(test_timestamp)
assert result == expected
@pytest.mark.parametrize(('test_time_format', 'expected'), (
('ISO', '2021-05-02T13:33:00'),
('%Y_%m', '2021_05'),
('%Y-%m-%d', '2021-05-02'),
))
def test_time_string_formats(test_time_format: str, expected: str):
test_time = datetime(2021, 5, 2, 13, 33)
test_formatter = FileNameFormatter('{TITLE}', '', test_time_format)
result = test_formatter._convert_timestamp(test_time.timestamp())
2021-04-22 12:38:32 +12:00
assert result == expected
2021-05-18 14:39:08 +12:00
def test_get_max_path_length():
result = FileNameFormatter.find_max_path_length()
2021-05-18 14:46:55 +12:00
assert result in (4096, 260, 1024)
2021-05-18 14:39:08 +12:00
2021-05-27 17:29:43 +12:00
def test_windows_max_path(tmp_path: Path):
2021-05-18 14:39:08 +12:00
with unittest.mock.patch('platform.system', return_value='Windows'):
with unittest.mock.patch('bdfr.file_name_formatter.FileNameFormatter.find_max_path_length', return_value=260):
2021-05-27 17:29:43 +12:00
result = FileNameFormatter._limit_file_name_length('test' * 100, '_1.png', tmp_path)
2021-05-18 14:39:08 +12:00
assert len(str(result)) <= 260
2021-05-27 17:29:43 +12:00
assert len(result.name) <= (260 - len(str(tmp_path)))