1
0
Fork 0
mirror of synced 2024-06-02 18:34:37 +12:00
bulk-downloader-for-reddit/tests/test_file_name_formatter.py

522 lines
18 KiB
Python
Raw Normal View History

2021-02-11 12:08:47 +13:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
2021-02-11 12:08:47 +13:00
2021-05-18 14:39:08 +12:00
import platform
import sys
2021-05-18 14:39:08 +12:00
import unittest.mock
2021-04-22 12:38:32 +12:00
from datetime import datetime
2021-02-11 12:08:47 +13:00
from pathlib import Path
2022-12-01 15:48:10 +13:00
from typing import Optional, Type, Union
from unittest.mock import MagicMock
2021-02-11 12:08:47 +13:00
import praw.models
import pytest
2021-04-12 19:58:32 +12:00
from bdfr.file_name_formatter import FileNameFormatter
from bdfr.resource import Resource
from bdfr.site_downloaders.base_downloader import BaseDownloader
from bdfr.site_downloaders.fallback_downloaders.ytdlp_fallback import YtdlpFallback
from bdfr.site_downloaders.self_post import SelfPost
2021-02-11 12:08:47 +13:00
@pytest.fixture()
def submission() -> MagicMock:
test = MagicMock()
2022-12-03 18:11:17 +13:00
test.title = "name"
test.subreddit.display_name = "randomreddit"
test.author.name = "person"
test.id = "12345"
2021-02-11 12:08:47 +13:00
test.score = 1000
2022-12-03 18:11:17 +13:00
test.link_flair_text = "test_flair"
2021-04-22 12:38:32 +12:00
test.created_utc = datetime(2021, 4, 21, 9, 30, 0).timestamp()
test.__class__ = praw.models.Submission
2021-02-11 12:08:47 +13:00
return test
@pytest.fixture()
def test_formatter() -> FileNameFormatter:
out = FileNameFormatter("{TITLE}", "", "ISO")
return out
def check_valid_windows_path(test_string: str):
return test_string == FileNameFormatter._format_for_windows(test_string)
2022-12-01 15:48:10 +13:00
def do_test_string_equality(result: Union[Path, str], expected: str) -> bool:
2022-12-03 18:11:17 +13:00
if platform.system() == "Windows":
2021-05-04 17:45:16 +12:00
expected = FileNameFormatter._format_for_windows(expected)
2021-05-18 14:39:08 +12:00
return str(result).endswith(expected)
2021-05-04 17:45:16 +12:00
def do_test_path_equality(result: Path, expected: str) -> bool:
2022-12-03 18:11:17 +13:00
if platform.system() == "Windows":
expected = expected.split("/")
2021-05-04 17:45:16 +12:00
expected = [FileNameFormatter._format_for_windows(part) for part in expected]
expected = Path(*expected)
else:
expected = Path(expected)
return str(result).endswith(str(expected)) # noqa: FURB123
2021-05-04 17:45:16 +12:00
2022-12-03 18:11:17 +13:00
@pytest.fixture(scope="session")
def reddit_submission(reddit_instance: praw.Reddit) -> praw.models.Submission:
2022-12-03 18:11:17 +13:00
return reddit_instance.submission(id="w22m5l")
@pytest.mark.parametrize(
("test_format_string", "expected"),
(
("{SUBREDDIT}", "randomreddit"),
("{REDDITOR}", "person"),
("{POSTID}", "12345"),
("{UPVOTES}", "1000"),
("{FLAIR}", "test_flair"),
("{DATE}", "2021-04-21T09:30:00"),
("{REDDITOR}_{TITLE}_{POSTID}", "person_name_12345"),
),
)
2021-05-02 15:56:39 +12:00
def test_format_name_mock(test_format_string: str, expected: str, submission: MagicMock):
2022-12-03 18:11:17 +13:00
test_formatter = FileNameFormatter(test_format_string, "", "ISO")
2021-05-02 15:56:39 +12:00
result = test_formatter._format_name(submission, test_format_string)
2021-05-04 17:45:16 +12:00
assert do_test_string_equality(result, expected)
2021-02-11 12:08:47 +13:00
2022-12-03 18:11:17 +13:00
@pytest.mark.parametrize(
("test_string", "expected"),
(
("", False),
("test", False),
("{POSTID}", True),
("POSTID", False),
("{POSTID}_test", True),
("test_{TITLE}", True),
("TITLE_POSTID", False),
),
)
def test_check_format_string_validity(test_string: str, expected: bool):
result = FileNameFormatter.validate_string(test_string)
assert result == expected
2021-02-26 22:09:25 +13:00
@pytest.mark.online
2021-02-26 22:19:12 +13:00
@pytest.mark.reddit
@pytest.mark.parametrize(
"restriction_scheme",
(
"windows",
"linux",
"bla",
None,
),
)
2022-12-03 18:11:17 +13:00
@pytest.mark.parametrize(
("test_format_string", "expected"),
(
("{SUBREDDIT}", "formula1"),
("{REDDITOR}", "Kirsty-Blue"),
("{POSTID}", "w22m5l"),
("{FLAIR}", "Social Media rall"),
("{SUBREDDIT}_{TITLE}", "formula1_George Russel acknowledges the Twitter trend about him"),
("{REDDITOR}_{TITLE}_{POSTID}", "Kirsty-Blue_George Russel acknowledges the Twitter trend about him_w22m5l"),
),
)
def test_format_name_real(
test_format_string: str,
expected: str,
reddit_submission: praw.models.Submission,
restriction_scheme: Optional[str],
):
test_formatter = FileNameFormatter(test_format_string, "", "", restriction_scheme)
2021-05-02 15:56:39 +12:00
result = test_formatter._format_name(reddit_submission, test_format_string)
2021-05-04 17:45:16 +12:00
assert do_test_string_equality(result, expected)
if restriction_scheme == "windows":
assert check_valid_windows_path(result)
2021-02-11 12:08:47 +13:00
2021-02-26 22:09:25 +13:00
@pytest.mark.online
2021-02-26 22:19:12 +13:00
@pytest.mark.reddit
2022-12-03 18:11:17 +13:00
@pytest.mark.parametrize(
("format_string_directory", "format_string_file", "expected"),
(
2022-12-03 18:11:17 +13:00
(
"{SUBREDDIT}",
"{POSTID}",
"test/formula1/w22m5l.png",
),
(
"{SUBREDDIT}",
"{TITLE}_{POSTID}",
"test/formula1/George Russel acknowledges the Twitter trend about him_w22m5l.png",
),
(
"{SUBREDDIT}",
"{REDDITOR}_{TITLE}_{POSTID}",
"test/formula1/Kirsty-Blue_George Russel acknowledges the Twitter trend about him_w22m5l.png",
),
),
2022-12-03 18:11:17 +13:00
)
2021-02-11 12:08:47 +13:00
def test_format_full(
2022-12-03 18:11:17 +13:00
format_string_directory: str, format_string_file: str, expected: str, reddit_submission: praw.models.Submission
):
test_resource = Resource(reddit_submission, "i.reddit.com/blabla.png", lambda: None)
test_formatter = FileNameFormatter(format_string_file, format_string_directory, "ISO")
result = test_formatter.format_path(test_resource, Path("test"))
2021-05-04 17:45:16 +12:00
assert do_test_path_equality(result, expected)
2021-03-14 14:10:26 +13:00
@pytest.mark.online
@pytest.mark.reddit
2022-12-03 18:11:17 +13:00
@pytest.mark.parametrize(
("format_string_directory", "format_string_file"),
(
("{SUBREDDIT}", "{POSTID}"),
("{SUBREDDIT}", "{UPVOTES}"),
("{SUBREDDIT}", "{UPVOTES}{POSTID}"),
),
)
2021-03-14 14:10:26 +13:00
def test_format_full_conform(
2022-12-03 18:11:17 +13:00
format_string_directory: str, format_string_file: str, reddit_submission: praw.models.Submission
):
test_resource = Resource(reddit_submission, "i.reddit.com/blabla.png", lambda: None)
test_formatter = FileNameFormatter(format_string_file, format_string_directory, "ISO")
test_formatter.format_path(test_resource, Path("test"))
2021-03-14 14:10:26 +13:00
@pytest.mark.online
@pytest.mark.reddit
2022-12-03 18:11:17 +13:00
@pytest.mark.parametrize(
("format_string_directory", "format_string_file", "index", "expected"),
(
("{SUBREDDIT}", "{POSTID}", None, "test/formula1/w22m5l.png"),
("{SUBREDDIT}", "{POSTID}", 1, "test/formula1/w22m5l_1.png"),
("{SUBREDDIT}", "{POSTID}", 2, "test/formula1/w22m5l_2.png"),
(
"{SUBREDDIT}",
"{TITLE}_{POSTID}",
2,
"test/formula1/George Russel acknowledges the Twitter trend about him_w22m5l_2.png",
),
),
)
def test_format_full_with_index_suffix(
2022-12-03 18:11:17 +13:00
format_string_directory: str,
format_string_file: str,
index: Optional[int],
expected: str,
reddit_submission: praw.models.Submission,
2021-04-12 22:41:40 +12:00
):
2022-12-03 18:11:17 +13:00
test_resource = Resource(reddit_submission, "i.reddit.com/blabla.png", lambda: None)
test_formatter = FileNameFormatter(format_string_file, format_string_directory, "ISO")
result = test_formatter.format_path(test_resource, Path("test"), index)
2021-05-04 17:45:16 +12:00
assert do_test_path_equality(result, expected)
def test_format_multiple_resources():
mocks = []
for i in range(1, 5):
new_mock = MagicMock()
2022-12-03 18:11:17 +13:00
new_mock.url = "https://example.com/test.png"
new_mock.extension = ".png"
new_mock.source_submission.title = "test"
new_mock.source_submission.__class__ = praw.models.Submission
mocks.append(new_mock)
2022-12-03 18:11:17 +13:00
test_formatter = FileNameFormatter("{TITLE}", "", "ISO")
results = test_formatter.format_resource_paths(mocks, Path("."))
2021-05-27 17:58:40 +12:00
results = set([str(res[0].name) for res in results])
2022-12-03 18:11:17 +13:00
expected = {"test_1.png", "test_2.png", "test_3.png", "test_4.png"}
2021-05-18 14:39:08 +12:00
assert results == expected
2021-03-13 14:13:36 +13:00
2022-12-03 18:11:17 +13:00
@pytest.mark.parametrize(
("test_filename", "test_ending"),
(
("A" * 300, ".png"),
("A" * 300, "_1.png"),
("a" * 300, "_1000.jpeg"),
("😍💕✨" * 100, "_1.png"),
),
)
def test_limit_filename_length(test_filename: str, test_ending: str, test_formatter: FileNameFormatter):
result = test_formatter.limit_file_name_length(test_filename, test_ending, Path("."))
2021-05-18 14:39:08 +12:00
assert len(result.name) <= 255
2022-12-03 18:11:17 +13:00
assert len(result.name.encode("utf-8")) <= 255
2021-05-18 14:39:08 +12:00
assert len(str(result)) <= FileNameFormatter.find_max_path_length()
assert isinstance(result, Path)
2021-03-13 14:13:36 +13:00
2022-12-03 18:11:17 +13:00
@pytest.mark.parametrize(
("test_filename", "test_ending", "expected_end"),
(
("test_aaaaaa", "_1.png", "test_aaaaaa_1.png"),
("test_aataaa", "_1.png", "test_aataaa_1.png"),
("test_abcdef", "_1.png", "test_abcdef_1.png"),
("test_aaaaaa", ".png", "test_aaaaaa.png"),
("test", "_1.png", "test_1.png"),
("test_m1hqw6", "_1.png", "test_m1hqw6_1.png"),
("A" * 300 + "_bbbccc", ".png", "_bbbccc.png"),
("A" * 300 + "_bbbccc", "_1000.jpeg", "_bbbccc_1000.jpeg"),
("😍💕✨" * 100 + "_aaa1aa", "_1.png", "_aaa1aa_1.png"),
),
)
def test_preserve_id_append_when_shortening(
test_filename: str, test_ending: str, expected_end: str, test_formatter: FileNameFormatter
):
result = test_formatter.limit_file_name_length(test_filename, test_ending, Path("."))
2021-05-18 14:39:08 +12:00
assert len(result.name) <= 255
2022-12-03 18:11:17 +13:00
assert len(result.name.encode("utf-8")) <= 255
2021-05-18 14:39:08 +12:00
assert result.name.endswith(expected_end)
assert len(str(result)) <= FileNameFormatter.find_max_path_length()
2022-12-03 18:11:17 +13:00
@pytest.mark.skipif(sys.platform == "win32", reason="Test broken on windows github")
def test_shorten_filename_real(submission: MagicMock, tmp_path: Path):
2022-12-03 18:11:17 +13:00
submission.title = "A" * 500
submission.author.name = "test"
submission.subreddit.display_name = "test"
submission.id = "BBBBBB"
test_resource = Resource(submission, "www.example.com/empty", lambda: None, ".jpeg")
test_formatter = FileNameFormatter("{REDDITOR}_{TITLE}_{POSTID}", "{SUBREDDIT}", "ISO")
2021-03-13 23:18:30 +13:00
result = test_formatter.format_path(test_resource, tmp_path)
2021-03-13 14:13:36 +13:00
result.parent.mkdir(parents=True)
result.touch()
2022-12-03 18:11:17 +13:00
@pytest.mark.parametrize(
("test_name", "test_ending"),
(
("a", "b"),
("a", "_bbbbbb.jpg"),
("a" * 20, "_bbbbbb.jpg"),
("a" * 50, "_bbbbbb.jpg"),
("a" * 500, "_bbbbbb.jpg"),
),
)
def test_shorten_path(test_name: str, test_ending: str, tmp_path: Path, test_formatter: FileNameFormatter):
result = test_formatter.limit_file_name_length(test_name, test_ending, tmp_path)
assert len(str(result.name)) <= 255
2022-12-03 18:11:17 +13:00
assert len(str(result.name).encode("UTF-8")) <= 255
assert len(str(result.name).encode("cp1252")) <= 255
assert len(str(result)) <= FileNameFormatter.find_max_path_length()
2022-12-03 18:11:17 +13:00
@pytest.mark.parametrize(
("test_string", "expected"),
(
("test", "test"),
("test😍", "test"),
("test.png", "test.png"),
("test*", "test"),
("test**", "test"),
("test?*", "test"),
("test_???.png", "test_.png"),
("test_???😍.png", "test_.png"),
),
)
def test_format_file_name_for_windows(test_string: str, expected: str):
result = FileNameFormatter._format_for_windows(test_string)
assert result == expected
2022-12-03 18:11:17 +13:00
@pytest.mark.parametrize(
("test_string", "expected"),
(
("test", "test"),
("test😍", "test"),
("😍", ""),
),
)
def test_strip_emojies(test_string: str, expected: str):
result = FileNameFormatter._strip_emojis(test_string)
assert result == expected
@pytest.mark.online
@pytest.mark.reddit
2022-12-03 18:11:17 +13:00
@pytest.mark.parametrize(
("test_submission_id", "expected"),
(
(
"mfuteh",
{
"title": "Why Do Interviewers Ask Linked List Questions?",
"redditor": "mjgardner",
},
),
),
)
def test_generate_dict_for_submission(test_submission_id: str, expected: dict, reddit_instance: praw.Reddit):
test_submission = reddit_instance.submission(id=test_submission_id)
2022-12-03 18:11:17 +13:00
test_formatter = FileNameFormatter("{TITLE}", "", "ISO")
2021-05-02 15:56:39 +12:00
result = test_formatter._generate_name_dict_from_submission(test_submission)
assert all([result.get(key) == expected[key] for key in expected.keys()])
@pytest.mark.online
@pytest.mark.reddit
2022-12-03 18:11:17 +13:00
@pytest.mark.parametrize(
("test_comment_id", "expected"),
(
(
"gsq0yuw",
{
"title": "Why Do Interviewers Ask Linked List Questions?",
"redditor": "Doctor-Dapper",
"postid": "gsq0yuw",
"flair": "",
},
),
),
)
def test_generate_dict_for_comment(test_comment_id: str, expected: dict, reddit_instance: praw.Reddit):
test_comment = reddit_instance.comment(id=test_comment_id)
2022-12-03 18:11:17 +13:00
test_formatter = FileNameFormatter("{TITLE}", "", "ISO")
2021-05-02 15:56:39 +12:00
result = test_formatter._generate_name_dict_from_comment(test_comment)
assert all([result.get(key) == expected[key] for key in expected.keys()])
@pytest.mark.online
@pytest.mark.reddit
2022-12-03 18:11:17 +13:00
@pytest.mark.parametrize(
("test_file_scheme", "test_folder_scheme", "test_comment_id", "expected_name"),
(
("{POSTID}", "", "gsoubde", "gsoubde.json"),
("{REDDITOR}_{POSTID}", "", "gsoubde", "DELETED_gsoubde.json"),
),
)
def test_format_archive_entry_comment(
2022-12-03 18:11:17 +13:00
test_file_scheme: str,
test_folder_scheme: str,
test_comment_id: str,
expected_name: str,
tmp_path: Path,
reddit_instance: praw.Reddit,
):
test_comment = reddit_instance.comment(id=test_comment_id)
2022-12-03 18:11:17 +13:00
test_formatter = FileNameFormatter(test_file_scheme, test_folder_scheme, "ISO")
test_entry = Resource(test_comment, "", lambda: None, ".json")
result = test_formatter.format_path(test_entry, tmp_path)
2021-05-18 14:39:08 +12:00
assert do_test_string_equality(result, expected_name)
2022-12-03 18:11:17 +13:00
@pytest.mark.parametrize(
("test_folder_scheme", "expected"),
(
("{REDDITOR}/{SUBREDDIT}", "person/randomreddit"),
("{POSTID}/{SUBREDDIT}/{REDDITOR}", "12345/randomreddit/person"),
),
)
def test_multilevel_folder_scheme(
2022-12-03 18:11:17 +13:00
test_folder_scheme: str,
expected: str,
tmp_path: Path,
submission: MagicMock,
):
2022-12-03 18:11:17 +13:00
test_formatter = FileNameFormatter("{POSTID}", test_folder_scheme, "ISO")
test_resource = MagicMock()
test_resource.source_submission = submission
2022-12-03 18:11:17 +13:00
test_resource.extension = ".png"
result = test_formatter.format_path(test_resource, tmp_path)
result = result.relative_to(tmp_path)
2021-05-04 17:45:16 +12:00
assert do_test_path_equality(result.parent, expected)
2022-12-03 18:11:17 +13:00
assert len(result.parents) == (len(expected.split("/")) + 1)
2022-12-03 18:11:17 +13:00
@pytest.mark.parametrize(
("test_name_string", "expected"),
(
("test", "test"),
("😍", "😍"),
("test😍", "test😍"),
("test😍 ", "test😍 "),
("test😍 \\u2019", "test😍 "),
("Using that real good [1\\4]", "Using that real good [1\\4]"),
),
)
def test_preserve_emojis(test_name_string: str, expected: str, submission: MagicMock):
submission.title = test_name_string
2022-12-03 18:11:17 +13:00
test_formatter = FileNameFormatter("{TITLE}", "", "ISO")
result = test_formatter._format_name(submission, "{TITLE}")
2021-05-04 17:45:16 +12:00
assert do_test_string_equality(result, expected)
2022-12-03 18:11:17 +13:00
@pytest.mark.parametrize(
("test_string", "expected"),
(
("test \\u2019", "test "),
("My cat\\u2019s paws are so cute", "My cats paws are so cute"),
),
)
def test_convert_unicode_escapes(test_string: str, expected: str):
result = FileNameFormatter._convert_unicode_escapes(test_string)
assert result == expected
2021-04-22 12:38:32 +12:00
2022-12-03 18:11:17 +13:00
@pytest.mark.parametrize(
("test_datetime", "expected"),
(
(datetime(2020, 1, 1, 8, 0, 0), "2020-01-01T08:00:00"),
(datetime(2020, 1, 1, 8, 0), "2020-01-01T08:00:00"),
(datetime(2021, 4, 21, 8, 30, 21), "2021-04-21T08:30:21"),
),
)
2021-04-22 12:38:32 +12:00
def test_convert_timestamp(test_datetime: datetime, expected: str):
test_timestamp = test_datetime.timestamp()
2022-12-03 18:11:17 +13:00
test_formatter = FileNameFormatter("{POSTID}", "", "ISO")
2021-05-02 15:56:39 +12:00
result = test_formatter._convert_timestamp(test_timestamp)
assert result == expected
2022-12-03 18:11:17 +13:00
@pytest.mark.parametrize(
("test_time_format", "expected"),
(
("ISO", "2021-05-02T13:33:00"),
("%Y_%m", "2021_05"),
("%Y-%m-%d", "2021-05-02"),
),
)
2021-05-02 15:56:39 +12:00
def test_time_string_formats(test_time_format: str, expected: str):
test_time = datetime(2021, 5, 2, 13, 33)
2022-12-03 18:11:17 +13:00
test_formatter = FileNameFormatter("{TITLE}", "", test_time_format)
2021-05-02 15:56:39 +12:00
result = test_formatter._convert_timestamp(test_time.timestamp())
2021-04-22 12:38:32 +12:00
assert result == expected
2021-05-18 14:39:08 +12:00
def test_get_max_path_length():
result = FileNameFormatter.find_max_path_length()
2021-05-18 14:46:55 +12:00
assert result in (4096, 260, 1024)
2021-05-18 14:39:08 +12:00
2021-05-27 17:29:43 +12:00
def test_windows_max_path(tmp_path: Path):
2022-12-03 18:11:17 +13:00
with unittest.mock.patch("platform.system", return_value="Windows"):
with unittest.mock.patch("bdfr.file_name_formatter.FileNameFormatter.find_max_path_length", return_value=260):
mock = MagicMock()
mock.max_path = 260
result = FileNameFormatter.limit_file_name_length(mock, "test" * 100, "_1.png", tmp_path)
2021-05-18 14:39:08 +12:00
assert len(str(result)) <= 260
2021-05-27 17:29:43 +12:00
assert len(result.name) <= (260 - len(str(tmp_path)))
@pytest.mark.online
@pytest.mark.reddit
2022-12-03 18:11:17 +13:00
@pytest.mark.parametrize(
("test_reddit_id", "test_downloader", "expected_names"),
(
("gphmnr", YtdlpFallback, {"He has a lot to say today.mp4"}),
("d0oir2", YtdlpFallback, {"Crunk's finest moment. Welcome to the new subreddit!.mp4"}),
("jiecu", SelfPost, {"[deleted by user].txt"}),
),
)
def test_name_submission(
2022-12-03 18:11:17 +13:00
test_reddit_id: str,
test_downloader: Type[BaseDownloader],
expected_names: set[str],
reddit_instance: praw.reddit.Reddit,
):
test_submission = reddit_instance.submission(id=test_reddit_id)
test_resources = test_downloader(test_submission).find_resources()
2022-12-03 18:11:17 +13:00
test_formatter = FileNameFormatter("{TITLE}", "", "")
results = test_formatter.format_resource_paths(test_resources, Path("."))
results = set([r[0].name for r in results])
assert results == expected_names